Ruby 4.1.0dev (2026-01-08 revision dcfbbdc38c2c0502f2eeb9172d1a82721b5ca45b)
prism.c
1#include "prism.h"
2
6const char *
7pm_version(void) {
8 return PRISM_VERSION;
9}
10
15#define PM_TAB_WHITESPACE_SIZE 8
16
17// Macros for min/max.
18#define MIN(a,b) (((a)<(b))?(a):(b))
19#define MAX(a,b) (((a)>(b))?(a):(b))
20
21/******************************************************************************/
22/* Helpful AST-related macros */
23/******************************************************************************/
24
25#define FL PM_NODE_FLAGS
26#define UP PM_NODE_UPCAST
27
28#define PM_TOKEN_START(token_) ((token_)->start)
29#define PM_TOKEN_END(token_) ((token_)->end)
30
31#define PM_NODE_START(node_) (UP(node_)->location.start)
32#define PM_NODE_END(node_) (UP(node_)->location.end)
33
34#define PM_LOCATION_NULL_VALUE(parser_) ((pm_location_t) { .start = (parser_)->start, .end = (parser_)->start })
35#define PM_LOCATION_TOKEN_VALUE(token_) ((pm_location_t) { .start = PM_TOKEN_START(token_), .end = PM_TOKEN_END(token_) })
36#define PM_LOCATION_NODE_VALUE(node_) ((pm_location_t) { .start = PM_NODE_START(node_), .end = PM_NODE_END(node_) })
37#define PM_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == PM_TOKEN_NOT_PROVIDED ? ((pm_location_t) { 0 }) : PM_LOCATION_TOKEN_VALUE(token))
38
39/******************************************************************************/
40/* Lex mode manipulations */
41/******************************************************************************/
42
47static inline uint8_t
48lex_mode_incrementor(const uint8_t start) {
49 switch (start) {
50 case '(':
51 case '[':
52 case '{':
53 case '<':
54 return start;
55 default:
56 return '\0';
57 }
58}
59
64static inline uint8_t
65lex_mode_terminator(const uint8_t start) {
66 switch (start) {
67 case '(':
68 return ')';
69 case '[':
70 return ']';
71 case '{':
72 return '}';
73 case '<':
74 return '>';
75 default:
76 return start;
77 }
78}
79
85static bool
86lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
87 lex_mode.prev = parser->lex_modes.current;
88 parser->lex_modes.index++;
89
90 if (parser->lex_modes.index > PM_LEX_STACK_SIZE - 1) {
92 if (parser->lex_modes.current == NULL) return false;
93
94 *parser->lex_modes.current = lex_mode;
95 } else {
96 parser->lex_modes.stack[parser->lex_modes.index] = lex_mode;
97 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
98 }
99
100 return true;
101}
102
106static inline bool
107lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
108 uint8_t incrementor = lex_mode_incrementor(delimiter);
109 uint8_t terminator = lex_mode_terminator(delimiter);
110
111 pm_lex_mode_t lex_mode = {
112 .mode = PM_LEX_LIST,
113 .as.list = {
114 .nesting = 0,
115 .interpolation = interpolation,
116 .incrementor = incrementor,
117 .terminator = terminator
118 }
119 };
120
121 // These are the places where we need to split up the content of the list.
122 // We'll use strpbrk to find the first of these characters.
123 uint8_t *breakpoints = lex_mode.as.list.breakpoints;
124 memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
125 size_t index = 7;
126
127 // Now we'll add the terminator to the list of breakpoints. If the
128 // terminator is not already a NULL byte, add it to the list.
129 if (terminator != '\0') {
130 breakpoints[index++] = terminator;
131 }
132
133 // If interpolation is allowed, then we're going to check for the #
134 // character. Otherwise we'll only look for escapes and the terminator.
135 if (interpolation) {
136 breakpoints[index++] = '#';
137 }
138
139 // If there is an incrementor, then we'll check for that as well.
140 if (incrementor != '\0') {
141 breakpoints[index++] = incrementor;
142 }
143
144 parser->explicit_encoding = NULL;
145 return lex_mode_push(parser, lex_mode);
146}
147
153static inline bool
154lex_mode_push_list_eof(pm_parser_t *parser) {
155 return lex_mode_push_list(parser, false, '\0');
156}
157
161static inline bool
162lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
163 pm_lex_mode_t lex_mode = {
164 .mode = PM_LEX_REGEXP,
165 .as.regexp = {
166 .nesting = 0,
167 .incrementor = incrementor,
168 .terminator = terminator
169 }
170 };
171
172 // These are the places where we need to split up the content of the
173 // regular expression. We'll use strpbrk to find the first of these
174 // characters.
175 uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
176 memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
177 size_t index = 4;
178
179 // First we'll add the terminator.
180 if (terminator != '\0') {
181 breakpoints[index++] = terminator;
182 }
183
184 // Next, if there is an incrementor, then we'll check for that as well.
185 if (incrementor != '\0') {
186 breakpoints[index++] = incrementor;
187 }
188
189 parser->explicit_encoding = NULL;
190 return lex_mode_push(parser, lex_mode);
191}
192
196static inline bool
197lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
198 pm_lex_mode_t lex_mode = {
199 .mode = PM_LEX_STRING,
200 .as.string = {
201 .nesting = 0,
202 .interpolation = interpolation,
203 .label_allowed = label_allowed,
204 .incrementor = incrementor,
205 .terminator = terminator
206 }
207 };
208
209 // These are the places where we need to split up the content of the
210 // string. We'll use strpbrk to find the first of these characters.
211 uint8_t *breakpoints = lex_mode.as.string.breakpoints;
212 memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
213 size_t index = 3;
214
215 // Now add in the terminator. If the terminator is not already a NULL byte,
216 // then we'll add it.
217 if (terminator != '\0') {
218 breakpoints[index++] = terminator;
219 }
220
221 // If interpolation is allowed, then we're going to check for the #
222 // character. Otherwise we'll only look for escapes and the terminator.
223 if (interpolation) {
224 breakpoints[index++] = '#';
225 }
226
227 // If we have an incrementor, then we'll add that in as a breakpoint as
228 // well.
229 if (incrementor != '\0') {
230 breakpoints[index++] = incrementor;
231 }
232
233 parser->explicit_encoding = NULL;
234 return lex_mode_push(parser, lex_mode);
235}
236
242static inline bool
243lex_mode_push_string_eof(pm_parser_t *parser) {
244 return lex_mode_push_string(parser, false, false, '\0', '\0');
245}
246
252static void
253lex_mode_pop(pm_parser_t *parser) {
254 if (parser->lex_modes.index == 0) {
255 parser->lex_modes.current->mode = PM_LEX_DEFAULT;
256 } else if (parser->lex_modes.index < PM_LEX_STACK_SIZE) {
257 parser->lex_modes.index--;
258 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
259 } else {
260 parser->lex_modes.index--;
261 pm_lex_mode_t *prev = parser->lex_modes.current->prev;
262 xfree(parser->lex_modes.current);
263 parser->lex_modes.current = prev;
264 }
265}
266
270static inline bool
271lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
272 return parser->lex_state & state;
273}
274
275typedef enum {
276 PM_IGNORED_NEWLINE_NONE = 0,
277 PM_IGNORED_NEWLINE_ALL,
278 PM_IGNORED_NEWLINE_PATTERN
279} pm_ignored_newline_type_t;
280
281static inline pm_ignored_newline_type_t
282lex_state_ignored_p(pm_parser_t *parser) {
283 bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
284
285 if (ignored) {
286 return PM_IGNORED_NEWLINE_ALL;
287 } else if ((parser->lex_state & ~((unsigned int) PM_LEX_STATE_LABEL)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) {
288 return PM_IGNORED_NEWLINE_PATTERN;
289 } else {
290 return PM_IGNORED_NEWLINE_NONE;
291 }
292}
293
294static inline bool
295lex_state_beg_p(pm_parser_t *parser) {
296 return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
297}
298
299static inline bool
300lex_state_arg_p(pm_parser_t *parser) {
301 return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
302}
303
304static inline bool
305lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
306 if (parser->current.end >= parser->end) {
307 return false;
308 }
309 return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
310}
311
312static inline bool
313lex_state_end_p(pm_parser_t *parser) {
314 return lex_state_p(parser, PM_LEX_STATE_END_ANY);
315}
316
320static inline bool
321lex_state_operator_p(pm_parser_t *parser) {
322 return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
323}
324
329static inline void
330lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
331 parser->lex_state = state;
332}
333
334#ifndef PM_DEBUG_LOGGING
339#define PM_DEBUG_LOGGING 0
340#endif
341
342#if PM_DEBUG_LOGGING
343PRISM_ATTRIBUTE_UNUSED static void
344debug_state(pm_parser_t *parser) {
345 fprintf(stderr, "STATE: ");
346 bool first = true;
347
348 if (parser->lex_state == PM_LEX_STATE_NONE) {
349 fprintf(stderr, "NONE\n");
350 return;
351 }
352
353#define CHECK_STATE(state) \
354 if (parser->lex_state & state) { \
355 if (!first) fprintf(stderr, "|"); \
356 fprintf(stderr, "%s", #state); \
357 first = false; \
358 }
359
360 CHECK_STATE(PM_LEX_STATE_BEG)
361 CHECK_STATE(PM_LEX_STATE_END)
362 CHECK_STATE(PM_LEX_STATE_ENDARG)
363 CHECK_STATE(PM_LEX_STATE_ENDFN)
364 CHECK_STATE(PM_LEX_STATE_ARG)
365 CHECK_STATE(PM_LEX_STATE_CMDARG)
366 CHECK_STATE(PM_LEX_STATE_MID)
367 CHECK_STATE(PM_LEX_STATE_FNAME)
368 CHECK_STATE(PM_LEX_STATE_DOT)
369 CHECK_STATE(PM_LEX_STATE_CLASS)
370 CHECK_STATE(PM_LEX_STATE_LABEL)
371 CHECK_STATE(PM_LEX_STATE_LABELED)
372 CHECK_STATE(PM_LEX_STATE_FITEM)
373
374#undef CHECK_STATE
375
376 fprintf(stderr, "\n");
377}
378
379static void
380debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * caller_name, int line_number) {
381 fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
382 debug_state(parser);
383 lex_state_set(parser, state);
384 fprintf(stderr, "Now: ");
385 debug_state(parser);
386 fprintf(stderr, "\n");
387}
388
389#define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
390#endif
391
392/******************************************************************************/
393/* Command-line macro helpers */
394/******************************************************************************/
395
397#define PM_PARSER_COMMAND_LINE_OPTION(parser, option) ((parser)->command_line & (option))
398
400#define PM_PARSER_COMMAND_LINE_OPTION_A(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_A)
401
403#define PM_PARSER_COMMAND_LINE_OPTION_E(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_E)
404
406#define PM_PARSER_COMMAND_LINE_OPTION_L(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_L)
407
409#define PM_PARSER_COMMAND_LINE_OPTION_N(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_N)
410
412#define PM_PARSER_COMMAND_LINE_OPTION_P(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_P)
413
415#define PM_PARSER_COMMAND_LINE_OPTION_X(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_X)
416
417/******************************************************************************/
418/* Diagnostic-related functions */
419/******************************************************************************/
420
424static inline void
425pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
426 pm_diagnostic_list_append(&parser->error_list, start, end, diag_id);
427}
428
432#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
433 pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
434
439static inline void
440pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
441 pm_parser_err(parser, parser->current.start, parser->current.end, diag_id);
442}
443
448#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
449 PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
450
455static inline void
456pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
457 pm_parser_err(parser, node->location.start, node->location.end, diag_id);
458}
459
464#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
465 PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
466
471#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
472 PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
473
478static inline void
479pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
480 pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id);
481}
482
487static inline void
488pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
489 pm_parser_err(parser, token->start, token->end, diag_id);
490}
491
496#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
497 PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
498
503#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
504 PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
505
509static inline void
510pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
511 pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
512}
513
518static inline void
519pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
520 pm_parser_warn(parser, token->start, token->end, diag_id);
521}
522
527static inline void
528pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
529 pm_parser_warn(parser, node->location.start, node->location.end, diag_id);
530}
531
535#define PM_PARSER_WARN_FORMAT(parser, start, end, diag_id, ...) \
536 pm_diagnostic_list_append_format(&parser->warning_list, start, end, diag_id, __VA_ARGS__)
537
542#define PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, ...) \
543 PM_PARSER_WARN_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
544
549#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
550 PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
551
556#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
557 PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
558
564static void
565pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
566 PM_PARSER_ERR_FORMAT(
567 parser,
568 ident_start,
569 ident_start + ident_length,
570 PM_ERR_HEREDOC_TERM,
571 (int) ident_length,
572 (const char *) ident_start
573 );
574}
575
576/******************************************************************************/
577/* Scope-related functions */
578/******************************************************************************/
579
583static bool
584pm_parser_scope_push(pm_parser_t *parser, bool closed) {
585 pm_scope_t *scope = (pm_scope_t *) xmalloc(sizeof(pm_scope_t));
586 if (scope == NULL) return false;
587
588 *scope = (pm_scope_t) {
589 .previous = parser->current_scope,
590 .locals = { 0 },
591 .parameters = PM_SCOPE_PARAMETERS_NONE,
592 .implicit_parameters = { 0 },
593 .shareable_constant = parser->current_scope == NULL ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
594 .closed = closed
595 };
596
597 parser->current_scope = scope;
598 return true;
599}
600
605static bool
606pm_parser_scope_toplevel_p(pm_parser_t *parser) {
607 pm_scope_t *scope = parser->current_scope;
608
609 do {
610 if (scope->previous == NULL) return true;
611 if (scope->closed) return false;
612 } while ((scope = scope->previous) != NULL);
613
614 assert(false && "unreachable");
615 return true;
616}
617
621static pm_scope_t *
622pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
623 pm_scope_t *scope = parser->current_scope;
624
625 while (depth-- > 0) {
626 assert(scope != NULL);
627 scope = scope->previous;
628 }
629
630 return scope;
631}
632
633typedef enum {
634 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
635 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
636 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
637} pm_scope_forwarding_param_check_result_t;
638
639static pm_scope_forwarding_param_check_result_t
640pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
641 pm_scope_t *scope = parser->current_scope;
642 bool conflict = false;
643
644 while (scope != NULL) {
645 if (scope->parameters & mask) {
646 if (scope->closed) {
647 if (conflict) {
648 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
649 } else {
650 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
651 }
652 }
653
654 conflict = true;
655 }
656
657 if (scope->closed) break;
658 scope = scope->previous;
659 }
660
661 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
662}
663
664static void
665pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
666 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
667 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
668 // Pass.
669 break;
670 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
671 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
672 break;
673 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
674 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
675 break;
676 }
677}
678
679static void
680pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
681 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
682 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
683 // Pass.
684 break;
685 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
686 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
687 break;
688 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
689 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
690 break;
691 }
692}
693
694static void
695pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
696 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
697 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
698 // Pass.
699 break;
700 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
701 // This shouldn't happen, because ... is not allowed in the
702 // declaration of blocks. If we get here, we assume we already have
703 // an error for this.
704 break;
705 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
706 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
707 break;
708 }
709}
710
711static void
712pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
713 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
714 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
715 // Pass.
716 break;
717 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
718 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
719 break;
720 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
721 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
722 break;
723 }
724}
725
730pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
731 return parser->current_scope->shareable_constant;
732}
733
738static void
739pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constant_value_t shareable_constant) {
740 pm_scope_t *scope = parser->current_scope;
741
742 do {
743 scope->shareable_constant = shareable_constant;
744 } while (!scope->closed && (scope = scope->previous) != NULL);
745}
746
747/******************************************************************************/
748/* Local variable-related functions */
749/******************************************************************************/
750
754#define PM_LOCALS_HASH_THRESHOLD 9
755
756static void
757pm_locals_free(pm_locals_t *locals) {
758 if (locals->capacity > 0) {
759 xfree(locals->locals);
760 }
761}
762
767static uint32_t
768pm_locals_hash(pm_constant_id_t name) {
769 name = ((name >> 16) ^ name) * 0x45d9f3b;
770 name = ((name >> 16) ^ name) * 0x45d9f3b;
771 name = (name >> 16) ^ name;
772 return name;
773}
774
779static void
780pm_locals_resize(pm_locals_t *locals) {
781 uint32_t next_capacity = locals->capacity == 0 ? 4 : (locals->capacity * 2);
782 assert(next_capacity > locals->capacity);
783
784 pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
785 if (next_locals == NULL) abort();
786
787 if (next_capacity < PM_LOCALS_HASH_THRESHOLD) {
788 if (locals->size > 0) {
789 memcpy(next_locals, locals->locals, locals->size * sizeof(pm_local_t));
790 }
791 } else {
792 // If we just switched from a list to a hash, then we need to fill in
793 // the hash values of all of the locals.
794 bool hash_needed = (locals->capacity <= PM_LOCALS_HASH_THRESHOLD);
795 uint32_t mask = next_capacity - 1;
796
797 for (uint32_t index = 0; index < locals->capacity; index++) {
798 pm_local_t *local = &locals->locals[index];
799
800 if (local->name != PM_CONSTANT_ID_UNSET) {
801 if (hash_needed) local->hash = pm_locals_hash(local->name);
802
803 uint32_t hash = local->hash;
804 while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
805 next_locals[hash & mask] = *local;
806 }
807 }
808 }
809
810 pm_locals_free(locals);
811 locals->locals = next_locals;
812 locals->capacity = next_capacity;
813}
814
830static bool
831pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start, const uint8_t *end, uint32_t reads) {
832 if (locals->size >= (locals->capacity / 4 * 3)) {
833 pm_locals_resize(locals);
834 }
835
836 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
837 for (uint32_t index = 0; index < locals->capacity; index++) {
838 pm_local_t *local = &locals->locals[index];
839
840 if (local->name == PM_CONSTANT_ID_UNSET) {
841 *local = (pm_local_t) {
842 .name = name,
843 .location = { .start = start, .end = end },
844 .index = locals->size++,
845 .reads = reads,
846 .hash = 0
847 };
848 return true;
849 } else if (local->name == name) {
850 return false;
851 }
852 }
853 } else {
854 uint32_t mask = locals->capacity - 1;
855 uint32_t hash = pm_locals_hash(name);
856 uint32_t initial_hash = hash;
857
858 do {
859 pm_local_t *local = &locals->locals[hash & mask];
860
861 if (local->name == PM_CONSTANT_ID_UNSET) {
862 *local = (pm_local_t) {
863 .name = name,
864 .location = { .start = start, .end = end },
865 .index = locals->size++,
866 .reads = reads,
867 .hash = initial_hash
868 };
869 return true;
870 } else if (local->name == name) {
871 return false;
872 } else {
873 hash++;
874 }
875 } while ((hash & mask) != initial_hash);
876 }
877
878 assert(false && "unreachable");
879 return true;
880}
881
886static uint32_t
887pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
888 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
889 for (uint32_t index = 0; index < locals->size; index++) {
890 pm_local_t *local = &locals->locals[index];
891 if (local->name == name) return index;
892 }
893 } else {
894 uint32_t mask = locals->capacity - 1;
895 uint32_t hash = pm_locals_hash(name);
896 uint32_t initial_hash = hash & mask;
897
898 do {
899 pm_local_t *local = &locals->locals[hash & mask];
900
901 if (local->name == PM_CONSTANT_ID_UNSET) {
902 return UINT32_MAX;
903 } else if (local->name == name) {
904 return hash & mask;
905 } else {
906 hash++;
907 }
908 } while ((hash & mask) != initial_hash);
909 }
910
911 return UINT32_MAX;
912}
913
918static void
919pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
920 uint32_t index = pm_locals_find(locals, name);
921 assert(index != UINT32_MAX);
922
923 pm_local_t *local = &locals->locals[index];
924 assert(local->reads < UINT32_MAX);
925
926 local->reads++;
927}
928
933static void
934pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
935 uint32_t index = pm_locals_find(locals, name);
936 assert(index != UINT32_MAX);
937
938 pm_local_t *local = &locals->locals[index];
939 assert(local->reads > 0);
940
941 local->reads--;
942}
943
947static uint32_t
948pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
949 uint32_t index = pm_locals_find(locals, name);
950 assert(index != UINT32_MAX);
951
952 return locals->locals[index].reads;
953}
954
963static void
964pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
965 pm_constant_id_list_init_capacity(list, locals->size);
966
967 // If we're still below the threshold for switching to a hash, then we only
968 // need to loop over the locals until we hit the size because the locals are
969 // stored in a list.
970 uint32_t capacity = locals->capacity < PM_LOCALS_HASH_THRESHOLD ? locals->size : locals->capacity;
971
972 // We will only warn for unused variables if we're not at the top level, or
973 // if we're parsing a file outside of eval or -e.
974 bool warn_unused = !toplevel || (!parser->parsing_eval && !PM_PARSER_COMMAND_LINE_OPTION_E(parser));
975
976 for (uint32_t index = 0; index < capacity; index++) {
977 pm_local_t *local = &locals->locals[index];
978
979 if (local->name != PM_CONSTANT_ID_UNSET) {
980 pm_constant_id_list_insert(list, (size_t) local->index, local->name);
981
982 if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_newline_list_line(&parser->newline_list, local->location.start, parser->start_line) >= 0))) {
983 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
984
985 if (constant->length >= 1 && *constant->start != '_') {
986 PM_PARSER_WARN_FORMAT(
987 parser,
988 local->location.start,
989 local->location.end,
990 PM_WARN_UNUSED_LOCAL_VARIABLE,
991 (int) constant->length,
992 (const char *) constant->start
993 );
994 }
995 }
996 }
997 }
998}
999
1000/******************************************************************************/
1001/* Node-related functions */
1002/******************************************************************************/
1003
1007static inline pm_constant_id_t
1008pm_parser_constant_id_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
1009 return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
1010}
1011
1015static inline pm_constant_id_t
1016pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
1017 return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
1018}
1019
1023static inline pm_constant_id_t
1024pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
1025 return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length);
1026}
1027
1031static inline pm_constant_id_t
1032pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1033 return pm_parser_constant_id_location(parser, token->start, token->end);
1034}
1035
1040static inline pm_constant_id_t
1041pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1042 return token->type == PM_TOKEN_NOT_PROVIDED ? 0 : pm_parser_constant_id_token(parser, token);
1043}
1044
1050static pm_node_t *
1051pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
1052 pm_node_t *void_node = NULL;
1053
1054 while (node != NULL) {
1055 switch (PM_NODE_TYPE(node)) {
1056 case PM_RETURN_NODE:
1057 case PM_BREAK_NODE:
1058 case PM_NEXT_NODE:
1059 case PM_REDO_NODE:
1060 case PM_RETRY_NODE:
1061 case PM_MATCH_REQUIRED_NODE:
1062 return void_node != NULL ? void_node : node;
1063 case PM_MATCH_PREDICATE_NODE:
1064 return NULL;
1065 case PM_BEGIN_NODE: {
1066 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1067
1068 if (cast->ensure_clause != NULL) {
1069 if (cast->rescue_clause != NULL) {
1070 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->rescue_clause));
1071 if (vn != NULL) return vn;
1072 }
1073
1074 if (cast->statements != NULL) {
1075 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1076 if (vn != NULL) return vn;
1077 }
1078
1079 node = UP(cast->ensure_clause);
1080 } else if (cast->rescue_clause != NULL) {
1081 if (cast->statements == NULL) return NULL;
1082
1083 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1084 if (vn == NULL) return NULL;
1085 if (void_node == NULL) void_node = vn;
1086
1087 for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
1088 pm_node_t *vn = pm_check_value_expression(parser, UP(rescue_clause->statements));
1089 if (vn == NULL) {
1090 void_node = NULL;
1091 break;
1092 }
1093 if (void_node == NULL) {
1094 void_node = vn;
1095 }
1096 }
1097
1098 if (cast->else_clause != NULL) {
1099 node = UP(cast->else_clause);
1100 } else {
1101 return void_node;
1102 }
1103 } else {
1104 node = UP(cast->statements);
1105 }
1106
1107 break;
1108 }
1109 case PM_ENSURE_NODE: {
1110 pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
1111 node = UP(cast->statements);
1112 break;
1113 }
1114 case PM_PARENTHESES_NODE: {
1116 node = UP(cast->body);
1117 break;
1118 }
1119 case PM_STATEMENTS_NODE: {
1121 node = cast->body.nodes[cast->body.size - 1];
1122 break;
1123 }
1124 case PM_IF_NODE: {
1125 pm_if_node_t *cast = (pm_if_node_t *) node;
1126 if (cast->statements == NULL || cast->subsequent == NULL) {
1127 return NULL;
1128 }
1129 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1130 if (vn == NULL) {
1131 return NULL;
1132 }
1133 if (void_node == NULL) {
1134 void_node = vn;
1135 }
1136 node = cast->subsequent;
1137 break;
1138 }
1139 case PM_UNLESS_NODE: {
1140 pm_unless_node_t *cast = (pm_unless_node_t *) node;
1141 if (cast->statements == NULL || cast->else_clause == NULL) {
1142 return NULL;
1143 }
1144 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1145 if (vn == NULL) {
1146 return NULL;
1147 }
1148 if (void_node == NULL) {
1149 void_node = vn;
1150 }
1151 node = UP(cast->else_clause);
1152 break;
1153 }
1154 case PM_ELSE_NODE: {
1155 pm_else_node_t *cast = (pm_else_node_t *) node;
1156 node = UP(cast->statements);
1157 break;
1158 }
1159 case PM_AND_NODE: {
1160 pm_and_node_t *cast = (pm_and_node_t *) node;
1161 node = cast->left;
1162 break;
1163 }
1164 case PM_OR_NODE: {
1165 pm_or_node_t *cast = (pm_or_node_t *) node;
1166 node = cast->left;
1167 break;
1168 }
1169 case PM_LOCAL_VARIABLE_WRITE_NODE: {
1171
1172 pm_scope_t *scope = parser->current_scope;
1173 for (uint32_t depth = 0; depth < cast->depth; depth++) scope = scope->previous;
1174
1175 pm_locals_read(&scope->locals, cast->name);
1176 return NULL;
1177 }
1178 default:
1179 return NULL;
1180 }
1181 }
1182
1183 return NULL;
1184}
1185
1186static inline void
1187pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
1188 pm_node_t *void_node = pm_check_value_expression(parser, node);
1189 if (void_node != NULL) {
1190 pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
1191 }
1192}
1193
1197static void
1198pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
1199 const char *type = NULL;
1200 int length = 0;
1201
1202 switch (PM_NODE_TYPE(node)) {
1203 case PM_BACK_REFERENCE_READ_NODE:
1204 case PM_CLASS_VARIABLE_READ_NODE:
1205 case PM_GLOBAL_VARIABLE_READ_NODE:
1206 case PM_INSTANCE_VARIABLE_READ_NODE:
1207 case PM_LOCAL_VARIABLE_READ_NODE:
1208 case PM_NUMBERED_REFERENCE_READ_NODE:
1209 type = "a variable";
1210 length = 10;
1211 break;
1212 case PM_CALL_NODE: {
1213 const pm_call_node_t *cast = (const pm_call_node_t *) node;
1214 if (cast->call_operator_loc.start != NULL || cast->message_loc.start == NULL) break;
1215
1216 const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
1217 switch (message->length) {
1218 case 1:
1219 switch (message->start[0]) {
1220 case '+':
1221 case '-':
1222 case '*':
1223 case '/':
1224 case '%':
1225 case '|':
1226 case '^':
1227 case '&':
1228 case '>':
1229 case '<':
1230 type = (const char *) message->start;
1231 length = 1;
1232 break;
1233 }
1234 break;
1235 case 2:
1236 switch (message->start[1]) {
1237 case '=':
1238 if (message->start[0] == '<' || message->start[0] == '>' || message->start[0] == '!' || message->start[0] == '=') {
1239 type = (const char *) message->start;
1240 length = 2;
1241 }
1242 break;
1243 case '@':
1244 if (message->start[0] == '+' || message->start[0] == '-') {
1245 type = (const char *) message->start;
1246 length = 2;
1247 }
1248 break;
1249 case '*':
1250 if (message->start[0] == '*') {
1251 type = (const char *) message->start;
1252 length = 2;
1253 }
1254 break;
1255 }
1256 break;
1257 case 3:
1258 if (memcmp(message->start, "<=>", 3) == 0) {
1259 type = "<=>";
1260 length = 3;
1261 }
1262 break;
1263 }
1264
1265 break;
1266 }
1267 case PM_CONSTANT_PATH_NODE:
1268 type = "::";
1269 length = 2;
1270 break;
1271 case PM_CONSTANT_READ_NODE:
1272 type = "a constant";
1273 length = 10;
1274 break;
1275 case PM_DEFINED_NODE:
1276 type = "defined?";
1277 length = 8;
1278 break;
1279 case PM_FALSE_NODE:
1280 type = "false";
1281 length = 5;
1282 break;
1283 case PM_FLOAT_NODE:
1284 case PM_IMAGINARY_NODE:
1285 case PM_INTEGER_NODE:
1286 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1287 case PM_INTERPOLATED_STRING_NODE:
1288 case PM_RATIONAL_NODE:
1289 case PM_REGULAR_EXPRESSION_NODE:
1290 case PM_SOURCE_ENCODING_NODE:
1291 case PM_SOURCE_FILE_NODE:
1292 case PM_SOURCE_LINE_NODE:
1293 case PM_STRING_NODE:
1294 case PM_SYMBOL_NODE:
1295 type = "a literal";
1296 length = 9;
1297 break;
1298 case PM_NIL_NODE:
1299 type = "nil";
1300 length = 3;
1301 break;
1302 case PM_RANGE_NODE: {
1303 const pm_range_node_t *cast = (const pm_range_node_t *) node;
1304
1305 if (PM_NODE_FLAG_P(cast, PM_RANGE_FLAGS_EXCLUDE_END)) {
1306 type = "...";
1307 length = 3;
1308 } else {
1309 type = "..";
1310 length = 2;
1311 }
1312
1313 break;
1314 }
1315 case PM_SELF_NODE:
1316 type = "self";
1317 length = 4;
1318 break;
1319 case PM_TRUE_NODE:
1320 type = "true";
1321 length = 4;
1322 break;
1323 default:
1324 break;
1325 }
1326
1327 if (type != NULL) {
1328 PM_PARSER_WARN_NODE_FORMAT(parser, node, PM_WARN_VOID_STATEMENT, length, type);
1329 }
1330}
1331
1336static void
1337pm_void_statements_check(pm_parser_t *parser, const pm_statements_node_t *node, bool last_value) {
1338 assert(node->body.size > 0);
1339 const size_t size = node->body.size - (last_value ? 1 : 0);
1340 for (size_t index = 0; index < size; index++) {
1341 pm_void_statement_check(parser, node->body.nodes[index]);
1342 }
1343}
1344
1350typedef enum {
1351 PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL,
1352 PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP,
1353 PM_CONDITIONAL_PREDICATE_TYPE_NOT
1354} pm_conditional_predicate_type_t;
1355
1359static void
1360pm_parser_warn_conditional_predicate_literal(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type, pm_diagnostic_id_t diag_id, const char *prefix) {
1361 switch (type) {
1362 case PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL:
1363 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "condition");
1364 break;
1365 case PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP:
1366 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "flip-flop");
1367 break;
1368 case PM_CONDITIONAL_PREDICATE_TYPE_NOT:
1369 break;
1370 }
1371}
1372
1377static bool
1378pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1379 switch (PM_NODE_TYPE(node)) {
1380 case PM_ARRAY_NODE: {
1381 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1382
1383 const pm_array_node_t *cast = (const pm_array_node_t *) node;
1384 for (size_t index = 0; index < cast->elements.size; index++) {
1385 if (!pm_conditional_predicate_warn_write_literal_p(cast->elements.nodes[index])) return false;
1386 }
1387
1388 return true;
1389 }
1390 case PM_HASH_NODE: {
1391 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1392
1393 const pm_hash_node_t *cast = (const pm_hash_node_t *) node;
1394 for (size_t index = 0; index < cast->elements.size; index++) {
1395 const pm_node_t *element = cast->elements.nodes[index];
1396 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) return false;
1397
1398 const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element;
1399 if (!pm_conditional_predicate_warn_write_literal_p(assoc->key) || !pm_conditional_predicate_warn_write_literal_p(assoc->value)) return false;
1400 }
1401
1402 return true;
1403 }
1404 case PM_FALSE_NODE:
1405 case PM_FLOAT_NODE:
1406 case PM_IMAGINARY_NODE:
1407 case PM_INTEGER_NODE:
1408 case PM_NIL_NODE:
1409 case PM_RATIONAL_NODE:
1410 case PM_REGULAR_EXPRESSION_NODE:
1411 case PM_SOURCE_ENCODING_NODE:
1412 case PM_SOURCE_FILE_NODE:
1413 case PM_SOURCE_LINE_NODE:
1414 case PM_STRING_NODE:
1415 case PM_SYMBOL_NODE:
1416 case PM_TRUE_NODE:
1417 return true;
1418 default:
1419 return false;
1420 }
1421}
1422
1427static inline void
1428pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1429 if (pm_conditional_predicate_warn_write_literal_p(node)) {
1430 pm_parser_warn_node(parser, node, parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1431 }
1432}
1433
1446static void
1447pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type) {
1448 switch (PM_NODE_TYPE(node)) {
1449 case PM_AND_NODE: {
1450 pm_and_node_t *cast = (pm_and_node_t *) node;
1451 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1452 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1453 break;
1454 }
1455 case PM_OR_NODE: {
1456 pm_or_node_t *cast = (pm_or_node_t *) node;
1457 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1458 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1459 break;
1460 }
1461 case PM_PARENTHESES_NODE: {
1463
1464 if ((cast->body != NULL) && PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE)) {
1465 pm_statements_node_t *statements = (pm_statements_node_t *) cast->body;
1466 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1467 }
1468
1469 break;
1470 }
1471 case PM_BEGIN_NODE: {
1472 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1473 if (cast->statements != NULL) {
1474 pm_statements_node_t *statements = cast->statements;
1475 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1476 }
1477 break;
1478 }
1479 case PM_RANGE_NODE: {
1480 pm_range_node_t *cast = (pm_range_node_t *) node;
1481
1482 if (cast->left != NULL) pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1483 if (cast->right != NULL) pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1484
1485 // Here we change the range node into a flip flop node. We can do
1486 // this since the nodes are exactly the same except for the type.
1487 // We're only asserting against the size when we should probably
1488 // assert against the entire layout, but we'll assume tests will
1489 // catch this.
1490 assert(sizeof(pm_range_node_t) == sizeof(pm_flip_flop_node_t));
1491 node->type = PM_FLIP_FLOP_NODE;
1492
1493 break;
1494 }
1495 case PM_REGULAR_EXPRESSION_NODE:
1496 // Here we change the regular expression node into a match last line
1497 // node. We can do this since the nodes are exactly the same except
1498 // for the type.
1500 node->type = PM_MATCH_LAST_LINE_NODE;
1501
1502 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1503 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "regex ");
1504 }
1505
1506 break;
1507 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1508 // Here we change the interpolated regular expression node into an
1509 // interpolated match last line node. We can do this since the nodes
1510 // are exactly the same except for the type.
1512 node->type = PM_INTERPOLATED_MATCH_LAST_LINE_NODE;
1513
1514 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1515 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "regex ");
1516 }
1517
1518 break;
1519 case PM_INTEGER_NODE:
1520 if (type == PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP) {
1521 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1522 pm_parser_warn_node(parser, node, PM_WARN_INTEGER_IN_FLIP_FLOP);
1523 }
1524 } else {
1525 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1526 }
1527 break;
1528 case PM_STRING_NODE:
1529 case PM_SOURCE_FILE_NODE:
1530 case PM_INTERPOLATED_STRING_NODE:
1531 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "string ");
1532 break;
1533 case PM_SYMBOL_NODE:
1534 case PM_INTERPOLATED_SYMBOL_NODE:
1535 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "symbol ");
1536 break;
1537 case PM_SOURCE_LINE_NODE:
1538 case PM_SOURCE_ENCODING_NODE:
1539 case PM_FLOAT_NODE:
1540 case PM_RATIONAL_NODE:
1541 case PM_IMAGINARY_NODE:
1542 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1543 break;
1544 case PM_CLASS_VARIABLE_WRITE_NODE:
1545 pm_conditional_predicate_warn_write_literal(parser, ((pm_class_variable_write_node_t *) node)->value);
1546 break;
1547 case PM_CONSTANT_WRITE_NODE:
1548 pm_conditional_predicate_warn_write_literal(parser, ((pm_constant_write_node_t *) node)->value);
1549 break;
1550 case PM_GLOBAL_VARIABLE_WRITE_NODE:
1551 pm_conditional_predicate_warn_write_literal(parser, ((pm_global_variable_write_node_t *) node)->value);
1552 break;
1553 case PM_INSTANCE_VARIABLE_WRITE_NODE:
1554 pm_conditional_predicate_warn_write_literal(parser, ((pm_instance_variable_write_node_t *) node)->value);
1555 break;
1556 case PM_LOCAL_VARIABLE_WRITE_NODE:
1557 pm_conditional_predicate_warn_write_literal(parser, ((pm_local_variable_write_node_t *) node)->value);
1558 break;
1559 case PM_MULTI_WRITE_NODE:
1560 pm_conditional_predicate_warn_write_literal(parser, ((pm_multi_write_node_t *) node)->value);
1561 break;
1562 default:
1563 break;
1564 }
1565}
1566
1575static inline pm_token_t
1576not_provided(pm_parser_t *parser) {
1577 return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
1578}
1579
1602
1606static inline const uint8_t *
1607pm_arguments_end(pm_arguments_t *arguments) {
1608 if (arguments->block != NULL) {
1609 const uint8_t *end = arguments->block->location.end;
1610 if (arguments->closing_loc.start != NULL && arguments->closing_loc.end > end) {
1611 end = arguments->closing_loc.end;
1612 }
1613 return end;
1614 }
1615 if (arguments->closing_loc.start != NULL) {
1616 return arguments->closing_loc.end;
1617 }
1618 if (arguments->arguments != NULL) {
1619 return arguments->arguments->base.location.end;
1620 }
1621 return arguments->closing_loc.end;
1622}
1623
1628static void
1629pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
1630 // First, check that we have arguments and that we don't have a closing
1631 // location for them.
1632 if (arguments->arguments == NULL || arguments->closing_loc.start != NULL) {
1633 return;
1634 }
1635
1636 // Next, check that we don't have a single parentheses argument. This would
1637 // look like:
1638 //
1639 // foo (1) {}
1640 //
1641 // In this case, it's actually okay for the block to be attached to the
1642 // call, even though it looks like it's attached to the argument.
1643 if (arguments->arguments->arguments.size == 1 && PM_NODE_TYPE_P(arguments->arguments->arguments.nodes[0], PM_PARENTHESES_NODE)) {
1644 return;
1645 }
1646
1647 // If we didn't hit a case before this check, then at this point we need to
1648 // add a syntax error.
1649 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
1650}
1651
1652/******************************************************************************/
1653/* Basic character checks */
1654/******************************************************************************/
1655
1662static inline size_t
1663char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1664 if (n <= 0) return 0;
1665
1666 if (parser->encoding_changed) {
1667 size_t width;
1668
1669 if ((width = parser->encoding->alpha_char(b, n)) != 0) {
1670 return width;
1671 } else if (*b == '_') {
1672 return 1;
1673 } else if (*b >= 0x80) {
1674 return parser->encoding->char_width(b, n);
1675 } else {
1676 return 0;
1677 }
1678 } else if (*b < 0x80) {
1679 return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
1680 } else {
1681 return pm_encoding_utf_8_char_width(b, n);
1682 }
1683}
1684
1689static inline size_t
1690char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
1691 if (n <= 0) {
1692 return 0;
1693 } else if (*b < 0x80) {
1694 return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
1695 } else {
1696 return pm_encoding_utf_8_char_width(b, n);
1697 }
1698}
1699
1705static inline size_t
1706char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1707 if (n <= 0) {
1708 return 0;
1709 } else if (parser->encoding_changed) {
1710 size_t width;
1711
1712 if ((width = parser->encoding->alnum_char(b, n)) != 0) {
1713 return width;
1714 } else if (*b == '_') {
1715 return 1;
1716 } else if (*b >= 0x80) {
1717 return parser->encoding->char_width(b, n);
1718 } else {
1719 return 0;
1720 }
1721 } else {
1722 return char_is_identifier_utf8(b, n);
1723 }
1724}
1725
1726// Here we're defining a perfect hash for the characters that are allowed in
1727// global names. This is used to quickly check the next character after a $ to
1728// see if it's a valid character for a global name.
1729#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
1730#define PUNCT(idx) ( \
1731 BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
1732 BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
1733 BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
1734 BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
1735 BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
1736 BIT('0', idx))
1737
1738const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
1739
1740#undef BIT
1741#undef PUNCT
1742
1743static inline bool
1744char_is_global_name_punctuation(const uint8_t b) {
1745 const unsigned int i = (const unsigned int) b;
1746 if (i <= 0x20 || 0x7e < i) return false;
1747
1748 return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
1749}
1750
1751static inline bool
1752token_is_setter_name(pm_token_t *token) {
1753 return (
1754 (token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
1755 ((token->type == PM_TOKEN_IDENTIFIER) &&
1756 (token->end - token->start >= 2) &&
1757 (token->end[-1] == '='))
1758 );
1759}
1760
1764static bool
1765pm_local_is_keyword(const char *source, size_t length) {
1766#define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
1767
1768 switch (length) {
1769 case 2:
1770 switch (source[0]) {
1771 case 'd': KEYWORD("do"); return false;
1772 case 'i': KEYWORD("if"); KEYWORD("in"); return false;
1773 case 'o': KEYWORD("or"); return false;
1774 default: return false;
1775 }
1776 case 3:
1777 switch (source[0]) {
1778 case 'a': KEYWORD("and"); return false;
1779 case 'd': KEYWORD("def"); return false;
1780 case 'e': KEYWORD("end"); return false;
1781 case 'f': KEYWORD("for"); return false;
1782 case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
1783 default: return false;
1784 }
1785 case 4:
1786 switch (source[0]) {
1787 case 'c': KEYWORD("case"); return false;
1788 case 'e': KEYWORD("else"); return false;
1789 case 'n': KEYWORD("next"); return false;
1790 case 'r': KEYWORD("redo"); return false;
1791 case 's': KEYWORD("self"); return false;
1792 case 't': KEYWORD("then"); KEYWORD("true"); return false;
1793 case 'w': KEYWORD("when"); return false;
1794 default: return false;
1795 }
1796 case 5:
1797 switch (source[0]) {
1798 case 'a': KEYWORD("alias"); return false;
1799 case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
1800 case 'c': KEYWORD("class"); return false;
1801 case 'e': KEYWORD("elsif"); return false;
1802 case 'f': KEYWORD("false"); return false;
1803 case 'r': KEYWORD("retry"); return false;
1804 case 's': KEYWORD("super"); return false;
1805 case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
1806 case 'w': KEYWORD("while"); return false;
1807 case 'y': KEYWORD("yield"); return false;
1808 default: return false;
1809 }
1810 case 6:
1811 switch (source[0]) {
1812 case 'e': KEYWORD("ensure"); return false;
1813 case 'm': KEYWORD("module"); return false;
1814 case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
1815 case 'u': KEYWORD("unless"); return false;
1816 default: return false;
1817 }
1818 case 8:
1819 KEYWORD("__LINE__");
1820 KEYWORD("__FILE__");
1821 return false;
1822 case 12:
1823 KEYWORD("__ENCODING__");
1824 return false;
1825 default:
1826 return false;
1827 }
1828
1829#undef KEYWORD
1830}
1831
1832/******************************************************************************/
1833/* Node flag handling functions */
1834/******************************************************************************/
1835
1839static inline void
1840pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
1841 node->flags |= flag;
1842}
1843
1847static inline void
1848pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
1849 node->flags &= (pm_node_flags_t) ~flag;
1850}
1851
1855static inline void
1856pm_node_flag_set_repeated_parameter(pm_node_t *node) {
1857 assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
1858 PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
1859 PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
1860 PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
1861 PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
1862 PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
1863 PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
1864 PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
1865
1866 pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
1867}
1868
1869/******************************************************************************/
1870/* Node creation functions */
1871/******************************************************************************/
1872
1878#define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
1879
1883static inline pm_node_flags_t
1884pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
1885 pm_node_flags_t flags = 0;
1886
1887 if (closing->type == PM_TOKEN_REGEXP_END) {
1888 pm_buffer_t unknown_flags = { 0 };
1889
1890 for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
1891 switch (*flag) {
1892 case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
1893 case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
1894 case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
1895 case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
1896
1897 case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
1898 case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
1899 case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
1900 case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
1901
1902 default: pm_buffer_append_byte(&unknown_flags, *flag);
1903 }
1904 }
1905
1906 size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
1907 if (unknown_flags_length != 0) {
1908 const char *word = unknown_flags_length >= 2 ? "options" : "option";
1909 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
1910 }
1911 pm_buffer_free(&unknown_flags);
1912 }
1913
1914 return flags;
1915}
1916
1917#undef PM_REGULAR_EXPRESSION_ENCODING_MASK
1918
1919static pm_statements_node_t *
1920pm_statements_node_create(pm_parser_t *parser);
1921
1922static void
1923pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline);
1924
1925static size_t
1926pm_statements_node_body_length(pm_statements_node_t *node);
1927
1932static inline void *
1933pm_node_alloc(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
1934 void *memory = xcalloc(1, size);
1935 if (memory == NULL) {
1936 fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
1937 abort();
1938 }
1939 return memory;
1940}
1941
1942#define PM_NODE_ALLOC(parser_, type_) (type_ *) pm_node_alloc(parser_, sizeof(type_))
1943#define PM_NODE_INIT(parser_, type_, flags_, start_, end_) (pm_node_t) { \
1944 .type = (type_), \
1945 .flags = (flags_), \
1946 .node_id = ++(parser_)->node_id, \
1947 .location = { .start = (start_), .end = (end_) } \
1948}
1949
1950#define PM_NODE_INIT_UNSET(parser_, type_, flags_) PM_NODE_INIT(parser_, type_, flags_, NULL, NULL)
1951#define PM_NODE_INIT_BASE(parser_, type_, flags_) PM_NODE_INIT(parser_, type_, flags_, (parser_)->start, (parser_)->start)
1952#define PM_NODE_INIT_TOKEN(parser_, type_, flags_, token_) PM_NODE_INIT(parser_, type_, flags_, PM_TOKEN_START(token_), PM_TOKEN_END(token_))
1953#define PM_NODE_INIT_NODE(parser_, type_, flags_, node_) PM_NODE_INIT(parser_, type_, flags_, PM_NODE_START(node_), PM_NODE_END(node_))
1954
1955#define PM_NODE_INIT_TOKENS(parser_, type_, flags_, left_, right_) PM_NODE_INIT(parser_, type_, flags_, PM_TOKEN_START(left_), PM_TOKEN_END(right_))
1956#define PM_NODE_INIT_NODES(parser_, type_, flags_, left_, right_) PM_NODE_INIT(parser_, type_, flags_, PM_NODE_START(left_), PM_NODE_END(right_))
1957#define PM_NODE_INIT_TOKEN_NODE(parser_, type_, flags_, token_, node_) PM_NODE_INIT(parser_, type_, flags_, PM_TOKEN_START(token_), PM_NODE_END(node_))
1958#define PM_NODE_INIT_NODE_TOKEN(parser_, type_, flags_, node_, token_) PM_NODE_INIT(parser_, type_, flags_, PM_NODE_START(node_), PM_TOKEN_END(token_))
1959
1963static pm_missing_node_t *
1964pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
1965 pm_missing_node_t *node = PM_NODE_ALLOC(parser, pm_missing_node_t);
1966
1967 *node = (pm_missing_node_t) {
1968 .base = PM_NODE_INIT(parser, PM_MISSING_NODE, 0, start, end)
1969 };
1970
1971 return node;
1972}
1973
1977static pm_alias_global_variable_node_t *
1978pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1979 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1980 pm_alias_global_variable_node_t *node = PM_NODE_ALLOC(parser, pm_alias_global_variable_node_t);
1981
1982 *node = (pm_alias_global_variable_node_t) {
1983 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_ALIAS_GLOBAL_VARIABLE_NODE, 0, keyword, old_name),
1984 .new_name = new_name,
1985 .old_name = old_name,
1986 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1987 };
1988
1989 return node;
1990}
1991
1995static pm_alias_method_node_t *
1996pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1997 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1998 pm_alias_method_node_t *node = PM_NODE_ALLOC(parser, pm_alias_method_node_t);
1999
2000 *node = (pm_alias_method_node_t) {
2001 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_ALIAS_METHOD_NODE, 0, keyword, old_name),
2002 .new_name = new_name,
2003 .old_name = old_name,
2004 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
2005 };
2006
2007 return node;
2008}
2009
2013static pm_alternation_pattern_node_t *
2014pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
2015 pm_alternation_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_alternation_pattern_node_t);
2016
2017 *node = (pm_alternation_pattern_node_t) {
2018 .base = PM_NODE_INIT_NODES(parser, PM_ALTERNATION_PATTERN_NODE, 0, left, right),
2019 .left = left,
2020 .right = right,
2021 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2022 };
2023
2024 return node;
2025}
2026
2030static pm_and_node_t *
2031pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
2032 pm_assert_value_expression(parser, left);
2033
2034 pm_and_node_t *node = PM_NODE_ALLOC(parser, pm_and_node_t);
2035
2036 *node = (pm_and_node_t) {
2037 .base = PM_NODE_INIT_NODES(parser, PM_AND_NODE, 0, left, right),
2038 .left = left,
2039 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2040 .right = right
2041 };
2042
2043 return node;
2044}
2045
2049static pm_arguments_node_t *
2050pm_arguments_node_create(pm_parser_t *parser) {
2051 pm_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_arguments_node_t);
2052
2053 *node = (pm_arguments_node_t) {
2054 .base = PM_NODE_INIT_BASE(parser, PM_ARGUMENTS_NODE, 0),
2055 .arguments = { 0 }
2056 };
2057
2058 return node;
2059}
2060
2064static size_t
2065pm_arguments_node_size(pm_arguments_node_t *node) {
2066 return node->arguments.size;
2067}
2068
2072static void
2073pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argument) {
2074 if (pm_arguments_node_size(node) == 0) {
2075 node->base.location.start = argument->location.start;
2076 }
2077
2078 if (node->base.location.end < argument->location.end) {
2079 node->base.location.end = argument->location.end;
2080 }
2081
2082 pm_node_list_append(&node->arguments, argument);
2083
2084 if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
2085 if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
2086 pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
2087 } else {
2088 pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
2089 }
2090 }
2091}
2092
2096static pm_array_node_t *
2097pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
2098 pm_array_node_t *node = PM_NODE_ALLOC(parser, pm_array_node_t);
2099
2100 *node = (pm_array_node_t) {
2101 .base = PM_NODE_INIT_TOKEN(parser, PM_ARRAY_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening),
2102 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2103 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2104 .elements = { 0 }
2105 };
2106
2107 return node;
2108}
2109
2113static inline void
2114pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
2115 if (!node->elements.size && !node->opening_loc.start) {
2116 node->base.location.start = element->location.start;
2117 }
2118
2119 pm_node_list_append(&node->elements, element);
2120 node->base.location.end = element->location.end;
2121
2122 // If the element is not a static literal, then the array is not a static
2123 // literal. Turn that flag off.
2124 if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
2125 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
2126 }
2127
2128 if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
2129 pm_node_flag_set(UP(node), PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
2130 }
2131}
2132
2136static void
2137pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) {
2138 assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == PM_TOKEN_MISSING || closing->type == PM_TOKEN_NOT_PROVIDED);
2139 node->base.location.end = closing->end;
2140 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2141}
2142
2147static pm_array_pattern_node_t *
2148pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
2149 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2150
2151 *node = (pm_array_pattern_node_t) {
2152 .base = PM_NODE_INIT_NODES(parser, PM_ARRAY_PATTERN_NODE, 0, nodes->nodes[0], nodes->nodes[nodes->size - 1]),
2153 .constant = NULL,
2154 .rest = NULL,
2155 .requireds = { 0 },
2156 .posts = { 0 },
2157 .opening_loc = { 0 },
2158 .closing_loc = { 0 }
2159 };
2160
2161 // For now we're going to just copy over each pointer manually. This could be
2162 // much more efficient, as we could instead resize the node list.
2163 bool found_rest = false;
2164 pm_node_t *child;
2165
2166 PM_NODE_LIST_FOREACH(nodes, index, child) {
2167 if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
2168 node->rest = child;
2169 found_rest = true;
2170 } else if (found_rest) {
2171 pm_node_list_append(&node->posts, child);
2172 } else {
2173 pm_node_list_append(&node->requireds, child);
2174 }
2175 }
2176
2177 return node;
2178}
2179
2183static pm_array_pattern_node_t *
2184pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
2185 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2186
2187 *node = (pm_array_pattern_node_t) {
2188 .base = PM_NODE_INIT_NODE(parser, PM_ARRAY_PATTERN_NODE, 0, rest),
2189 .constant = NULL,
2190 .rest = rest,
2191 .requireds = { 0 },
2192 .posts = { 0 },
2193 .opening_loc = { 0 },
2194 .closing_loc = { 0 }
2195 };
2196
2197 return node;
2198}
2199
2204static pm_array_pattern_node_t *
2205pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
2206 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2207
2208 *node = (pm_array_pattern_node_t) {
2209 .base = PM_NODE_INIT_NODE_TOKEN(parser, PM_ARRAY_PATTERN_NODE, 0, constant, closing),
2210 .constant = constant,
2211 .rest = NULL,
2212 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2213 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2214 .requireds = { 0 },
2215 .posts = { 0 }
2216 };
2217
2218 return node;
2219}
2220
2225static pm_array_pattern_node_t *
2226pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
2227 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2228
2229 *node = (pm_array_pattern_node_t) {
2230 .base = PM_NODE_INIT_TOKENS(parser, PM_ARRAY_PATTERN_NODE, 0, opening, closing),
2231 .constant = NULL,
2232 .rest = NULL,
2233 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2234 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2235 .requireds = { 0 },
2236 .posts = { 0 }
2237 };
2238
2239 return node;
2240}
2241
2242static inline void
2243pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t *inner) {
2244 pm_node_list_append(&node->requireds, inner);
2245}
2246
2250static pm_assoc_node_t *
2251pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
2252 pm_assoc_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_node_t);
2253 const uint8_t *end;
2254
2255 if (value != NULL && value->location.end > key->location.end) {
2256 end = value->location.end;
2257 } else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
2258 end = operator->end;
2259 } else {
2260 end = key->location.end;
2261 }
2262
2263 // Hash string keys will be frozen, so we can mark them as frozen here so
2264 // that the compiler picks them up and also when we check for static literal
2265 // on the keys it gets factored in.
2266 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
2267 key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
2268 }
2269
2270 // If the key and value of this assoc node are both static literals, then
2271 // we can mark this node as a static literal.
2272 pm_node_flags_t flags = 0;
2273 if (
2274 !PM_NODE_TYPE_P(key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(key, PM_HASH_NODE) && !PM_NODE_TYPE_P(key, PM_RANGE_NODE) &&
2275 value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)
2276 ) {
2277 flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
2278 }
2279
2280 *node = (pm_assoc_node_t) {
2281 .base = PM_NODE_INIT(parser, PM_ASSOC_NODE, flags, key->location.start, end),
2282 .key = key,
2283 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2284 .value = value
2285 };
2286
2287 return node;
2288}
2289
2293static pm_assoc_splat_node_t *
2294pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
2295 assert(operator->type == PM_TOKEN_USTAR_STAR);
2296 pm_assoc_splat_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_splat_node_t);
2297
2298 *node = (pm_assoc_splat_node_t) {
2299 .base = (
2300 (value == NULL)
2301 ? PM_NODE_INIT_TOKEN(parser, PM_ASSOC_SPLAT_NODE, 0, operator)
2302 : PM_NODE_INIT_TOKEN_NODE(parser, PM_ASSOC_SPLAT_NODE, 0, operator, value)
2303 ),
2304 .value = value,
2305 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2306 };
2307
2308 return node;
2309}
2310
2314static pm_back_reference_read_node_t *
2315pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
2316 assert(name->type == PM_TOKEN_BACK_REFERENCE);
2317 pm_back_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_back_reference_read_node_t);
2318
2319 *node = (pm_back_reference_read_node_t) {
2320 .base = PM_NODE_INIT_TOKEN(parser, PM_BACK_REFERENCE_READ_NODE, 0, name),
2321 .name = pm_parser_constant_id_token(parser, name)
2322 };
2323
2324 return node;
2325}
2326
2330static pm_begin_node_t *
2331pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
2332 pm_begin_node_t *node = PM_NODE_ALLOC(parser, pm_begin_node_t);
2333
2334 *node = (pm_begin_node_t) {
2335 .base = (
2336 (statements == NULL)
2337 ? PM_NODE_INIT_TOKEN(parser, PM_BEGIN_NODE, 0, begin_keyword)
2338 : PM_NODE_INIT_TOKEN_NODE(parser, PM_BEGIN_NODE, 0, begin_keyword, statements)
2339 ),
2340 .begin_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword),
2341 .statements = statements,
2342 .end_keyword_loc = { 0 }
2343 };
2344
2345 return node;
2346}
2347
2351static void
2352pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
2353 // If the begin keyword doesn't exist, we set the start on the begin_node
2354 if (!node->begin_keyword_loc.start) {
2355 node->base.location.start = rescue_clause->base.location.start;
2356 }
2357 node->base.location.end = rescue_clause->base.location.end;
2358 node->rescue_clause = rescue_clause;
2359}
2360
2364static void
2365pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
2366 node->base.location.end = else_clause->base.location.end;
2367 node->else_clause = else_clause;
2368}
2369
2373static void
2374pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
2375 node->base.location.end = ensure_clause->base.location.end;
2376 node->ensure_clause = ensure_clause;
2377}
2378
2382static void
2383pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keyword) {
2384 assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == PM_TOKEN_MISSING);
2385
2386 node->base.location.end = end_keyword->end;
2387 node->end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword);
2388}
2389
2393static pm_block_argument_node_t *
2394pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
2395 pm_block_argument_node_t *node = PM_NODE_ALLOC(parser, pm_block_argument_node_t);
2396
2397 *node = (pm_block_argument_node_t) {
2398 .base = (
2399 (expression == NULL)
2400 ? PM_NODE_INIT_TOKEN(parser, PM_BLOCK_ARGUMENT_NODE, 0, operator)
2401 : PM_NODE_INIT_TOKEN_NODE(parser, PM_BLOCK_ARGUMENT_NODE, 0, operator, expression)
2402 ),
2403 .expression = expression,
2404 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2405 };
2406
2407 return node;
2408}
2409
2413static pm_block_node_t *
2414pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
2415 pm_block_node_t *node = PM_NODE_ALLOC(parser, pm_block_node_t);
2416
2417 *node = (pm_block_node_t) {
2418 .base = PM_NODE_INIT_TOKENS(parser, PM_BLOCK_NODE, 0, opening, closing),
2419 .locals = *locals,
2420 .parameters = parameters,
2421 .body = body,
2422 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2423 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
2424 };
2425
2426 return node;
2427}
2428
2432static pm_block_parameter_node_t *
2433pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
2434 assert(operator->type == PM_TOKEN_NOT_PROVIDED || operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
2435 pm_block_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameter_node_t);
2436
2437 *node = (pm_block_parameter_node_t) {
2438 .base = (
2439 (name->type == PM_TOKEN_NOT_PROVIDED)
2440 ? PM_NODE_INIT_TOKEN(parser, PM_BLOCK_PARAMETER_NODE, 0, operator)
2441 : PM_NODE_INIT_TOKENS(parser, PM_BLOCK_PARAMETER_NODE, 0, operator, name)
2442 ),
2443 .name = pm_parser_optional_constant_id_token(parser, name),
2444 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
2445 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2446 };
2447
2448 return node;
2449}
2450
2454static pm_block_parameters_node_t *
2455pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
2456 pm_block_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameters_node_t);
2457
2458 const uint8_t *start;
2459 if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2460 start = opening->start;
2461 } else if (parameters != NULL) {
2462 start = parameters->base.location.start;
2463 } else {
2464 start = NULL;
2465 }
2466
2467 const uint8_t *end;
2468 if (parameters != NULL) {
2469 end = parameters->base.location.end;
2470 } else if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2471 end = opening->end;
2472 } else {
2473 end = NULL;
2474 }
2475
2476 *node = (pm_block_parameters_node_t) {
2477 .base = PM_NODE_INIT(parser, PM_BLOCK_PARAMETERS_NODE, 0, start, end),
2478 .parameters = parameters,
2479 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2480 .closing_loc = { 0 },
2481 .locals = { 0 }
2482 };
2483
2484 return node;
2485}
2486
2490static void
2491pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_token_t *closing) {
2492 assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == PM_TOKEN_MISSING);
2493
2494 node->base.location.end = closing->end;
2495 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2496}
2497
2501static pm_block_local_variable_node_t *
2502pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
2503 pm_block_local_variable_node_t *node = PM_NODE_ALLOC(parser, pm_block_local_variable_node_t);
2504
2505 *node = (pm_block_local_variable_node_t) {
2506 .base = PM_NODE_INIT_TOKEN(parser, PM_BLOCK_LOCAL_VARIABLE_NODE, 0, name),
2507 .name = pm_parser_constant_id_token(parser, name)
2508 };
2509
2510 return node;
2511}
2512
2516static void
2517pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
2518 pm_node_list_append(&node->locals, UP(local));
2519
2520 if (node->base.location.start == NULL) node->base.location.start = local->base.location.start;
2521 node->base.location.end = local->base.location.end;
2522}
2523
2527static pm_break_node_t *
2528pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
2529 assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
2530 pm_break_node_t *node = PM_NODE_ALLOC(parser, pm_break_node_t);
2531
2532 *node = (pm_break_node_t) {
2533 .base = (
2534 (arguments == NULL)
2535 ? PM_NODE_INIT_TOKEN(parser, PM_BREAK_NODE, 0, keyword)
2536 : PM_NODE_INIT_TOKEN_NODE(parser, PM_BREAK_NODE, 0, keyword, arguments)
2537 ),
2538 .arguments = arguments,
2539 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
2540 };
2541
2542 return node;
2543}
2544
2545// There are certain flags that we want to use internally but don't want to
2546// expose because they are not relevant beyond parsing. Therefore we'll define
2547// them here and not define them in config.yml/a header file.
2548static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = (1 << 2);
2549
2550static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = ((PM_CALL_NODE_FLAGS_LAST - 1) << 1);
2551static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = ((PM_CALL_NODE_FLAGS_LAST - 1) << 2);
2552static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = ((PM_CALL_NODE_FLAGS_LAST - 1) << 3);
2553
2559static pm_call_node_t *
2560pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
2561 pm_call_node_t *node = PM_NODE_ALLOC(parser, pm_call_node_t);
2562
2563 *node = (pm_call_node_t) {
2564 .base = PM_NODE_INIT_BASE(parser, PM_CALL_NODE, flags),
2565 .receiver = NULL,
2566 .call_operator_loc = { 0 },
2567 .message_loc = { 0 },
2568 .opening_loc = { 0 },
2569 .arguments = NULL,
2570 .closing_loc = { 0 },
2571 .equal_loc = { 0 },
2572 .block = NULL,
2573 .name = 0
2574 };
2575
2576 return node;
2577}
2578
2583static inline pm_node_flags_t
2584pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
2585 return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
2586}
2587
2592static pm_call_node_t *
2593pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
2594 pm_assert_value_expression(parser, receiver);
2595
2596 pm_node_flags_t flags = pm_call_node_ignore_visibility_flag(receiver);
2597 if (arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_ARGUMENT_NODE)) {
2598 flags |= PM_CALL_NODE_FLAGS_INDEX;
2599 }
2600
2601 pm_call_node_t *node = pm_call_node_create(parser, flags);
2602
2603 node->base.location.start = receiver->location.start;
2604 node->base.location.end = pm_arguments_end(arguments);
2605
2606 node->receiver = receiver;
2607 node->message_loc.start = arguments->opening_loc.start;
2608 node->message_loc.end = arguments->closing_loc.end;
2609
2610 node->opening_loc = arguments->opening_loc;
2611 node->arguments = arguments->arguments;
2612 node->closing_loc = arguments->closing_loc;
2613 node->block = arguments->block;
2614
2615 node->name = pm_parser_constant_id_constant(parser, "[]", 2);
2616 return node;
2617}
2618
2622static pm_call_node_t *
2623pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument, pm_node_flags_t flags) {
2624 pm_assert_value_expression(parser, receiver);
2625 pm_assert_value_expression(parser, argument);
2626
2627 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
2628
2629 node->base.location.start = MIN(receiver->location.start, argument->location.start);
2630 node->base.location.end = MAX(receiver->location.end, argument->location.end);
2631
2632 node->receiver = receiver;
2633 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2634
2635 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
2636 pm_arguments_node_arguments_append(arguments, argument);
2637 node->arguments = arguments;
2638
2639 node->name = pm_parser_constant_id_token(parser, operator);
2640 return node;
2641}
2642
2643static const uint8_t * parse_operator_symbol_name(const pm_token_t *);
2644
2648static pm_call_node_t *
2649pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
2650 pm_assert_value_expression(parser, receiver);
2651
2652 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2653
2654 node->base.location.start = receiver->location.start;
2655 const uint8_t *end = pm_arguments_end(arguments);
2656 if (end == NULL) {
2657 end = message->end;
2658 }
2659 node->base.location.end = end;
2660
2661 node->receiver = receiver;
2662 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2663 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2664 node->opening_loc = arguments->opening_loc;
2665 node->arguments = arguments->arguments;
2666 node->closing_loc = arguments->closing_loc;
2667 node->block = arguments->block;
2668
2669 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2670 pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2671 }
2672
2677 node->name = pm_parser_constant_id_location(parser, message->start, parse_operator_symbol_name(message));
2678 return node;
2679}
2680
2684static pm_call_node_t *
2685pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
2686 pm_call_node_t *node = pm_call_node_create(parser, 0);
2687 node->base.location.start = parser->start;
2688 node->base.location.end = parser->end;
2689
2690 node->receiver = receiver;
2691 node->call_operator_loc = (pm_location_t) { .start = NULL, .end = NULL };
2692 node->message_loc = (pm_location_t) { .start = NULL, .end = NULL };
2693 node->arguments = arguments;
2694
2695 node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
2696 return node;
2697}
2698
2703static pm_call_node_t *
2704pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
2705 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2706
2707 node->base.location.start = message->start;
2708 node->base.location.end = pm_arguments_end(arguments);
2709
2710 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2711 node->opening_loc = arguments->opening_loc;
2712 node->arguments = arguments->arguments;
2713 node->closing_loc = arguments->closing_loc;
2714 node->block = arguments->block;
2715
2716 node->name = pm_parser_constant_id_token(parser, message);
2717 return node;
2718}
2719
2724static pm_call_node_t *
2725pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
2726 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2727
2728 node->base.location = PM_LOCATION_NULL_VALUE(parser);
2729 node->arguments = arguments;
2730
2731 node->name = name;
2732 return node;
2733}
2734
2738static pm_call_node_t *
2739pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
2740 pm_assert_value_expression(parser, receiver);
2741 if (receiver != NULL) pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
2742
2743 pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
2744
2745 node->base.location.start = message->start;
2746 if (arguments->closing_loc.start != NULL) {
2747 node->base.location.end = arguments->closing_loc.end;
2748 } else {
2749 assert(receiver != NULL);
2750 node->base.location.end = receiver->location.end;
2751 }
2752
2753 node->receiver = receiver;
2754 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2755 node->opening_loc = arguments->opening_loc;
2756 node->arguments = arguments->arguments;
2757 node->closing_loc = arguments->closing_loc;
2758
2759 node->name = pm_parser_constant_id_constant(parser, "!", 1);
2760 return node;
2761}
2762
2766static pm_call_node_t *
2767pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
2768 pm_assert_value_expression(parser, receiver);
2769
2770 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2771
2772 node->base.location.start = receiver->location.start;
2773 node->base.location.end = pm_arguments_end(arguments);
2774
2775 node->receiver = receiver;
2776 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2777 node->opening_loc = arguments->opening_loc;
2778 node->arguments = arguments->arguments;
2779 node->closing_loc = arguments->closing_loc;
2780 node->block = arguments->block;
2781
2782 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2783 pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2784 }
2785
2786 node->name = pm_parser_constant_id_constant(parser, "call", 4);
2787 return node;
2788}
2789
2793static pm_call_node_t *
2794pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
2795 pm_assert_value_expression(parser, receiver);
2796
2797 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2798
2799 node->base.location.start = operator->start;
2800 node->base.location.end = receiver->location.end;
2801
2802 node->receiver = receiver;
2803 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2804
2805 node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
2806 return node;
2807}
2808
2813static pm_call_node_t *
2814pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
2815 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2816
2817 node->base.location = PM_LOCATION_TOKEN_VALUE(message);
2818 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2819
2820 node->name = pm_parser_constant_id_token(parser, message);
2821 return node;
2822}
2823
2828static inline bool
2829pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
2830 return (
2831 (node->message_loc.start != NULL) &&
2832 (node->message_loc.end[-1] != '!') &&
2833 (node->message_loc.end[-1] != '?') &&
2834 char_is_identifier_start(parser, node->message_loc.start, parser->end - node->message_loc.start) &&
2835 (node->opening_loc.start == NULL) &&
2836 (node->arguments == NULL) &&
2837 (node->block == NULL)
2838 );
2839}
2840
2844static void
2845pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
2846 pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
2847
2848 if (write_constant->length > 0) {
2849 size_t length = write_constant->length - 1;
2850
2851 void *memory = xmalloc(length);
2852 memcpy(memory, write_constant->start, length);
2853
2854 *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
2855 } else {
2856 // We can get here if the message was missing because of a syntax error.
2857 *read_name = pm_parser_constant_id_constant(parser, "", 0);
2858 }
2859}
2860
2864static pm_call_and_write_node_t *
2865pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2866 assert(target->block == NULL);
2867 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2868 pm_call_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_and_write_node_t);
2869
2870 *node = (pm_call_and_write_node_t) {
2871 .base = PM_NODE_INIT_NODES(parser, PM_CALL_AND_WRITE_NODE, FL(target), target, value),
2872 .receiver = target->receiver,
2873 .call_operator_loc = target->call_operator_loc,
2874 .message_loc = target->message_loc,
2875 .read_name = 0,
2876 .write_name = target->name,
2877 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2878 .value = value
2879 };
2880
2881 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
2882
2883 // Here we're going to free the target, since it is no longer necessary.
2884 // However, we don't want to call `pm_node_destroy` because we want to keep
2885 // around all of its children since we just reused them.
2886 xfree(target);
2887
2888 return node;
2889}
2890
2895static void
2896pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
2897 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
2898 if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
2899 pm_node_t *node;
2900 PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
2901 if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
2902 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
2903 break;
2904 }
2905 }
2906 }
2907
2908 if (block != NULL) {
2909 pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
2910 }
2911 }
2912}
2913
2917static pm_index_and_write_node_t *
2918pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2919 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2920 pm_index_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_and_write_node_t);
2921
2922 pm_index_arguments_check(parser, target->arguments, target->block);
2923
2924 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
2925 *node = (pm_index_and_write_node_t) {
2926 .base = PM_NODE_INIT_NODES(parser, PM_INDEX_AND_WRITE_NODE, FL(target), target, value),
2927 .receiver = target->receiver,
2928 .call_operator_loc = target->call_operator_loc,
2929 .opening_loc = target->opening_loc,
2930 .arguments = target->arguments,
2931 .closing_loc = target->closing_loc,
2932 .block = (pm_block_argument_node_t *) target->block,
2933 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2934 .value = value
2935 };
2936
2937 // Here we're going to free the target, since it is no longer necessary.
2938 // However, we don't want to call `pm_node_destroy` because we want to keep
2939 // around all of its children since we just reused them.
2940 xfree(target);
2941
2942 return node;
2943}
2944
2948static pm_call_operator_write_node_t *
2949pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2950 assert(target->block == NULL);
2951 pm_call_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_operator_write_node_t);
2952
2953 *node = (pm_call_operator_write_node_t) {
2954 .base = PM_NODE_INIT_NODES(parser, PM_CALL_OPERATOR_WRITE_NODE, FL(target), target, value),
2955 .receiver = target->receiver,
2956 .call_operator_loc = target->call_operator_loc,
2957 .message_loc = target->message_loc,
2958 .read_name = 0,
2959 .write_name = target->name,
2960 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
2961 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2962 .value = value
2963 };
2964
2965 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
2966
2967 // Here we're going to free the target, since it is no longer necessary.
2968 // However, we don't want to call `pm_node_destroy` because we want to keep
2969 // around all of its children since we just reused them.
2970 xfree(target);
2971
2972 return node;
2973}
2974
2978static pm_index_operator_write_node_t *
2979pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2980 pm_index_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_operator_write_node_t);
2981
2982 pm_index_arguments_check(parser, target->arguments, target->block);
2983
2984 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
2985 *node = (pm_index_operator_write_node_t) {
2986 .base = PM_NODE_INIT_NODES(parser, PM_INDEX_OPERATOR_WRITE_NODE, FL(target), target, value),
2987 .receiver = target->receiver,
2988 .call_operator_loc = target->call_operator_loc,
2989 .opening_loc = target->opening_loc,
2990 .arguments = target->arguments,
2991 .closing_loc = target->closing_loc,
2992 .block = (pm_block_argument_node_t *) target->block,
2993 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
2994 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2995 .value = value
2996 };
2997
2998 // Here we're going to free the target, since it is no longer necessary.
2999 // However, we don't want to call `pm_node_destroy` because we want to keep
3000 // around all of its children since we just reused them.
3001 xfree(target);
3002
3003 return node;
3004}
3005
3009static pm_call_or_write_node_t *
3010pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3011 assert(target->block == NULL);
3012 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3013 pm_call_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_or_write_node_t);
3014
3015 *node = (pm_call_or_write_node_t) {
3016 .base = PM_NODE_INIT_NODES(parser, PM_CALL_OR_WRITE_NODE, FL(target), target, value),
3017 .receiver = target->receiver,
3018 .call_operator_loc = target->call_operator_loc,
3019 .message_loc = target->message_loc,
3020 .read_name = 0,
3021 .write_name = target->name,
3022 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3023 .value = value
3024 };
3025
3026 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3027
3028 // Here we're going to free the target, since it is no longer necessary.
3029 // However, we don't want to call `pm_node_destroy` because we want to keep
3030 // around all of its children since we just reused them.
3031 xfree(target);
3032
3033 return node;
3034}
3035
3039static pm_index_or_write_node_t *
3040pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3041 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3042 pm_index_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_or_write_node_t);
3043
3044 pm_index_arguments_check(parser, target->arguments, target->block);
3045
3046 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3047 *node = (pm_index_or_write_node_t) {
3048 .base = PM_NODE_INIT_NODES(parser, PM_INDEX_OR_WRITE_NODE, FL(target), target, value),
3049 .receiver = target->receiver,
3050 .call_operator_loc = target->call_operator_loc,
3051 .opening_loc = target->opening_loc,
3052 .arguments = target->arguments,
3053 .closing_loc = target->closing_loc,
3054 .block = (pm_block_argument_node_t *) target->block,
3055 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3056 .value = value
3057 };
3058
3059 // Here we're going to free the target, since it is no longer necessary.
3060 // However, we don't want to call `pm_node_destroy` because we want to keep
3061 // around all of its children since we just reused them.
3062 xfree(target);
3063
3064 return node;
3065}
3066
3071static pm_call_target_node_t *
3072pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3073 pm_call_target_node_t *node = PM_NODE_ALLOC(parser, pm_call_target_node_t);
3074
3075 *node = (pm_call_target_node_t) {
3076 .base = PM_NODE_INIT_NODE(parser, PM_CALL_TARGET_NODE, FL(target), target),
3077 .receiver = target->receiver,
3078 .call_operator_loc = target->call_operator_loc,
3079 .name = target->name,
3080 .message_loc = target->message_loc
3081 };
3082
3083 /* It is possible to get here where we have parsed an invalid syntax tree
3084 * where the call operator was not present. In that case we will have a
3085 * problem because it is a required location. In this case we need to fill
3086 * it in with a fake location so that the syntax tree remains valid. */
3087 if (node->call_operator_loc.start == NULL) {
3088 node->call_operator_loc = (pm_location_t) {
3089 .start = target->base.location.start,
3090 .end = target->base.location.start
3091 };
3092 }
3093
3094 // Here we're going to free the target, since it is no longer necessary.
3095 // However, we don't want to call `pm_node_destroy` because we want to keep
3096 // around all of its children since we just reused them.
3097 xfree(target);
3098
3099 return node;
3100}
3101
3106static pm_index_target_node_t *
3107pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3108 pm_index_target_node_t *node = PM_NODE_ALLOC(parser, pm_index_target_node_t);
3109
3110 pm_index_arguments_check(parser, target->arguments, target->block);
3111 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3112
3113 *node = (pm_index_target_node_t) {
3114 .base = PM_NODE_INIT_NODE(parser, PM_INDEX_TARGET_NODE, FL(target) | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE, target),
3115 .receiver = target->receiver,
3116 .opening_loc = target->opening_loc,
3117 .arguments = target->arguments,
3118 .closing_loc = target->closing_loc,
3119 .block = (pm_block_argument_node_t *) target->block,
3120 };
3121
3122 // Here we're going to free the target, since it is no longer necessary.
3123 // However, we don't want to call `pm_node_destroy` because we want to keep
3124 // around all of its children since we just reused them.
3125 xfree(target);
3126
3127 return node;
3128}
3129
3133static pm_capture_pattern_node_t *
3134pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
3135 pm_capture_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_capture_pattern_node_t);
3136
3137 *node = (pm_capture_pattern_node_t) {
3138 .base = PM_NODE_INIT_NODES(parser, PM_CAPTURE_PATTERN_NODE, 0, value, target),
3139 .value = value,
3140 .target = target,
3141 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
3142 };
3143
3144 return node;
3145}
3146
3150static pm_case_node_t *
3151pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3152 pm_case_node_t *node = PM_NODE_ALLOC(parser, pm_case_node_t);
3153
3154 *node = (pm_case_node_t) {
3155 .base = PM_NODE_INIT_TOKENS(parser, PM_CASE_NODE, 0, case_keyword, end_keyword),
3156 .predicate = predicate,
3157 .else_clause = NULL,
3158 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3159 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3160 .conditions = { 0 }
3161 };
3162
3163 return node;
3164}
3165
3169static void
3170pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
3171 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
3172
3173 pm_node_list_append(&node->conditions, condition);
3174 node->base.location.end = condition->location.end;
3175}
3176
3180static void
3181pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
3182 node->else_clause = else_clause;
3183 node->base.location.end = else_clause->base.location.end;
3184}
3185
3189static void
3190pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_keyword) {
3191 node->base.location.end = end_keyword->end;
3192 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3193}
3194
3198static pm_case_match_node_t *
3199pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3200 pm_case_match_node_t *node = PM_NODE_ALLOC(parser, pm_case_match_node_t);
3201
3202 *node = (pm_case_match_node_t) {
3203 .base = PM_NODE_INIT_TOKENS(parser, PM_CASE_MATCH_NODE, 0, case_keyword, end_keyword),
3204 .predicate = predicate,
3205 .else_clause = NULL,
3206 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3207 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3208 .conditions = { 0 }
3209 };
3210
3211 return node;
3212}
3213
3217static void
3218pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condition) {
3219 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
3220
3221 pm_node_list_append(&node->conditions, condition);
3222 node->base.location.end = condition->location.end;
3223}
3224
3228static void
3229pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
3230 node->else_clause = else_clause;
3231 node->base.location.end = else_clause->base.location.end;
3232}
3233
3237static void
3238pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_token_t *end_keyword) {
3239 node->base.location.end = end_keyword->end;
3240 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3241}
3242
3246static pm_class_node_t *
3247pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
3248 pm_class_node_t *node = PM_NODE_ALLOC(parser, pm_class_node_t);
3249
3250 *node = (pm_class_node_t) {
3251 .base = PM_NODE_INIT_TOKENS(parser, PM_CLASS_NODE, 0, class_keyword, end_keyword),
3252 .locals = *locals,
3253 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
3254 .constant_path = constant_path,
3255 .inheritance_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator),
3256 .superclass = superclass,
3257 .body = body,
3258 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3259 .name = pm_parser_constant_id_token(parser, name)
3260 };
3261
3262 return node;
3263}
3264
3268static pm_class_variable_and_write_node_t *
3269pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3270 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3271 pm_class_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_and_write_node_t);
3272
3273 *node = (pm_class_variable_and_write_node_t) {
3274 .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_AND_WRITE_NODE, 0, target, value),
3275 .name = target->name,
3276 .name_loc = target->base.location,
3277 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3278 .value = value
3279 };
3280
3281 return node;
3282}
3283
3287static pm_class_variable_operator_write_node_t *
3288pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3289 pm_class_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_operator_write_node_t);
3290
3291 *node = (pm_class_variable_operator_write_node_t) {
3292 .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value),
3293 .name = target->name,
3294 .name_loc = target->base.location,
3295 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3296 .value = value,
3297 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3298 };
3299
3300 return node;
3301}
3302
3306static pm_class_variable_or_write_node_t *
3307pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3308 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3309 pm_class_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_or_write_node_t);
3310
3311 *node = (pm_class_variable_or_write_node_t) {
3312 .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_OR_WRITE_NODE, 0, target, value),
3313 .name = target->name,
3314 .name_loc = target->base.location,
3315 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3316 .value = value
3317 };
3318
3319 return node;
3320}
3321
3325static pm_class_variable_read_node_t *
3326pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
3327 assert(token->type == PM_TOKEN_CLASS_VARIABLE);
3328 pm_class_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_read_node_t);
3329
3330 *node = (pm_class_variable_read_node_t) {
3331 .base = PM_NODE_INIT_TOKEN(parser, PM_CLASS_VARIABLE_READ_NODE, 0, token),
3332 .name = pm_parser_constant_id_token(parser, token)
3333 };
3334
3335 return node;
3336}
3337
3344static inline pm_node_flags_t
3345pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
3346 if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.start == NULL) {
3347 return flags;
3348 }
3349 return 0;
3350}
3351
3355static pm_class_variable_write_node_t *
3356pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
3357 pm_class_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_write_node_t);
3358 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
3359
3360 *node = (pm_class_variable_write_node_t) {
3361 .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_WRITE_NODE, flags, read_node, value),
3362 .name = read_node->name,
3363 .name_loc = PM_LOCATION_NODE_VALUE(UP(read_node)),
3364 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3365 .value = value
3366 };
3367
3368 return node;
3369}
3370
3374static pm_constant_path_and_write_node_t *
3375pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3376 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3377 pm_constant_path_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_and_write_node_t);
3378
3379 *node = (pm_constant_path_and_write_node_t) {
3380 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_AND_WRITE_NODE, 0, target, value),
3381 .target = target,
3382 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3383 .value = value
3384 };
3385
3386 return node;
3387}
3388
3392static pm_constant_path_operator_write_node_t *
3393pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3394 pm_constant_path_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_operator_write_node_t);
3395
3396 *node = (pm_constant_path_operator_write_node_t) {
3397 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_OPERATOR_WRITE_NODE, 0, target, value),
3398 .target = target,
3399 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3400 .value = value,
3401 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3402 };
3403
3404 return node;
3405}
3406
3410static pm_constant_path_or_write_node_t *
3411pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3412 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3413 pm_constant_path_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_or_write_node_t);
3414
3415 *node = (pm_constant_path_or_write_node_t) {
3416 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_OR_WRITE_NODE, 0, target, value),
3417 .target = target,
3418 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3419 .value = value
3420 };
3421
3422 return node;
3423}
3424
3428static pm_constant_path_node_t *
3429pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3430 pm_assert_value_expression(parser, parent);
3431 pm_constant_path_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_node_t);
3432
3433 pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3434 if (name_token->type == PM_TOKEN_CONSTANT) {
3435 name = pm_parser_constant_id_token(parser, name_token);
3436 }
3437
3438 if (parent == NULL) {
3439 *node = (pm_constant_path_node_t) {
3440 .base = PM_NODE_INIT_TOKENS(parser, PM_CONSTANT_PATH_NODE, 0, delimiter, name_token),
3441 .parent = parent,
3442 .name = name,
3443 .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
3444 .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
3445 };
3446 } else {
3447 *node = (pm_constant_path_node_t) {
3448 .base = PM_NODE_INIT_NODE_TOKEN(parser, PM_CONSTANT_PATH_NODE, 0, parent, name_token),
3449 .parent = parent,
3450 .name = name,
3451 .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
3452 .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
3453 };
3454 }
3455
3456 return node;
3457}
3458
3462static pm_constant_path_write_node_t *
3463pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3464 pm_constant_path_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_write_node_t);
3465 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
3466
3467 *node = (pm_constant_path_write_node_t) {
3468 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_WRITE_NODE, flags, target, value),
3469 .target = target,
3470 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3471 .value = value
3472 };
3473
3474 return node;
3475}
3476
3480static pm_constant_and_write_node_t *
3481pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3482 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3483 pm_constant_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_and_write_node_t);
3484
3485 *node = (pm_constant_and_write_node_t) {
3486 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_AND_WRITE_NODE, 0, target, value),
3487 .name = target->name,
3488 .name_loc = target->base.location,
3489 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3490 .value = value
3491 };
3492
3493 return node;
3494}
3495
3499static pm_constant_operator_write_node_t *
3500pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3501 pm_constant_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_operator_write_node_t);
3502
3503 *node = (pm_constant_operator_write_node_t) {
3504 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_OPERATOR_WRITE_NODE, 0, target, value),
3505 .name = target->name,
3506 .name_loc = target->base.location,
3507 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3508 .value = value,
3509 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3510 };
3511
3512 return node;
3513}
3514
3518static pm_constant_or_write_node_t *
3519pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3520 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3521 pm_constant_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_or_write_node_t);
3522
3523 *node = (pm_constant_or_write_node_t) {
3524 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_OR_WRITE_NODE, 0, target, value),
3525 .name = target->name,
3526 .name_loc = target->base.location,
3527 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3528 .value = value
3529 };
3530
3531 return node;
3532}
3533
3537static pm_constant_read_node_t *
3538pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
3539 assert(name->type == PM_TOKEN_CONSTANT || name->type == PM_TOKEN_MISSING);
3540 pm_constant_read_node_t *node = PM_NODE_ALLOC(parser, pm_constant_read_node_t);
3541
3542 *node = (pm_constant_read_node_t) {
3543 .base = PM_NODE_INIT_TOKEN(parser, PM_CONSTANT_READ_NODE, 0, name),
3544 .name = pm_parser_constant_id_token(parser, name)
3545 };
3546
3547 return node;
3548}
3549
3553static pm_constant_write_node_t *
3554pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3555 pm_constant_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_write_node_t);
3556 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
3557
3558 *node = (pm_constant_write_node_t) {
3559 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_WRITE_NODE, flags, target, value),
3560 .name = target->name,
3561 .name_loc = target->base.location,
3562 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3563 .value = value
3564 };
3565
3566 return node;
3567}
3568
3572static void
3573pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3574 switch (PM_NODE_TYPE(node)) {
3575 case PM_BEGIN_NODE: {
3576 const pm_begin_node_t *cast = (pm_begin_node_t *) node;
3577 if (cast->statements != NULL) pm_def_node_receiver_check(parser, UP(cast->statements));
3578 break;
3579 }
3580 case PM_PARENTHESES_NODE: {
3581 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
3582 if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
3583 break;
3584 }
3585 case PM_STATEMENTS_NODE: {
3586 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
3587 pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
3588 break;
3589 }
3590 case PM_ARRAY_NODE:
3591 case PM_FLOAT_NODE:
3592 case PM_IMAGINARY_NODE:
3593 case PM_INTEGER_NODE:
3594 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
3595 case PM_INTERPOLATED_STRING_NODE:
3596 case PM_INTERPOLATED_SYMBOL_NODE:
3597 case PM_INTERPOLATED_X_STRING_NODE:
3598 case PM_RATIONAL_NODE:
3599 case PM_REGULAR_EXPRESSION_NODE:
3600 case PM_SOURCE_ENCODING_NODE:
3601 case PM_SOURCE_FILE_NODE:
3602 case PM_SOURCE_LINE_NODE:
3603 case PM_STRING_NODE:
3604 case PM_SYMBOL_NODE:
3605 case PM_X_STRING_NODE:
3606 pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
3607 break;
3608 default:
3609 break;
3610 }
3611}
3612
3616static pm_def_node_t *
3617pm_def_node_create(
3618 pm_parser_t *parser,
3619 pm_constant_id_t name,
3620 const pm_token_t *name_loc,
3621 pm_node_t *receiver,
3622 pm_parameters_node_t *parameters,
3623 pm_node_t *body,
3624 pm_constant_id_list_t *locals,
3625 const pm_token_t *def_keyword,
3626 const pm_token_t *operator,
3627 const pm_token_t *lparen,
3628 const pm_token_t *rparen,
3629 const pm_token_t *equal,
3630 const pm_token_t *end_keyword
3631) {
3632 pm_def_node_t *node = PM_NODE_ALLOC(parser, pm_def_node_t);
3633
3634 if (receiver != NULL) {
3635 pm_def_node_receiver_check(parser, receiver);
3636 }
3637
3638 *node = (pm_def_node_t) {
3639 .base = (
3640 (end_keyword->type == PM_TOKEN_NOT_PROVIDED)
3641 ? PM_NODE_INIT_TOKEN_NODE(parser, PM_DEF_NODE, 0, def_keyword, body)
3642 : PM_NODE_INIT_TOKENS(parser, PM_DEF_NODE, 0, def_keyword, end_keyword)
3643 ),
3644 .name = name,
3645 .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
3646 .receiver = receiver,
3647 .parameters = parameters,
3648 .body = body,
3649 .locals = *locals,
3650 .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
3651 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3652 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3653 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3654 .equal_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(equal),
3655 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3656 };
3657
3658 return node;
3659}
3660
3664static pm_defined_node_t *
3665pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_token_t *keyword) {
3666 pm_defined_node_t *node = PM_NODE_ALLOC(parser, pm_defined_node_t);
3667
3668 *node = (pm_defined_node_t) {
3669 .base = (
3670 (rparen->type == PM_TOKEN_NOT_PROVIDED)
3671 ? PM_NODE_INIT_TOKEN_NODE(parser, PM_DEFINED_NODE, 0, keyword, value)
3672 : PM_NODE_INIT_TOKENS(parser, PM_DEFINED_NODE, 0, keyword, rparen)
3673 ),
3674 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3675 .value = value,
3676 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3677 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
3678 };
3679
3680 return node;
3681}
3682
3686static pm_else_node_t *
3687pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3688 pm_else_node_t *node = PM_NODE_ALLOC(parser, pm_else_node_t);
3689
3690 *node = (pm_else_node_t) {
3691 .base = (
3692 ((end_keyword->type == PM_TOKEN_NOT_PROVIDED) && (statements != NULL))
3693 ? PM_NODE_INIT_TOKEN_NODE(parser, PM_ELSE_NODE, 0, else_keyword, statements)
3694 : PM_NODE_INIT_TOKENS(parser, PM_ELSE_NODE, 0, else_keyword, end_keyword)
3695 ),
3696 .else_keyword_loc = PM_LOCATION_TOKEN_VALUE(else_keyword),
3697 .statements = statements,
3698 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3699 };
3700
3701 return node;
3702}
3703
3707static pm_embedded_statements_node_t *
3708pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
3709 pm_embedded_statements_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_statements_node_t);
3710
3711 *node = (pm_embedded_statements_node_t) {
3712 .base = PM_NODE_INIT_TOKENS(parser, PM_EMBEDDED_STATEMENTS_NODE, 0, opening, closing),
3713 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
3714 .statements = statements,
3715 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
3716 };
3717
3718 return node;
3719}
3720
3724static pm_embedded_variable_node_t *
3725pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
3726 pm_embedded_variable_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_variable_node_t);
3727
3728 *node = (pm_embedded_variable_node_t) {
3729 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_EMBEDDED_VARIABLE_NODE, 0, operator, variable),
3730 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3731 .variable = variable
3732 };
3733
3734 return node;
3735}
3736
3740static pm_ensure_node_t *
3741pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3742 pm_ensure_node_t *node = PM_NODE_ALLOC(parser, pm_ensure_node_t);
3743
3744 *node = (pm_ensure_node_t) {
3745 .base = PM_NODE_INIT_TOKENS(parser, PM_ENSURE_NODE, 0, ensure_keyword, end_keyword),
3746 .ensure_keyword_loc = PM_LOCATION_TOKEN_VALUE(ensure_keyword),
3747 .statements = statements,
3748 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
3749 };
3750
3751 return node;
3752}
3753
3757static pm_false_node_t *
3758pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
3759 assert(token->type == PM_TOKEN_KEYWORD_FALSE);
3760 pm_false_node_t *node = PM_NODE_ALLOC(parser, pm_false_node_t);
3761
3762 *node = (pm_false_node_t) {
3763 .base = PM_NODE_INIT_TOKEN(parser, PM_FALSE_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
3764 };
3765
3766 return node;
3767}
3768
3773static pm_find_pattern_node_t *
3774pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
3775 pm_find_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_find_pattern_node_t);
3776
3777 pm_node_t *left = nodes->nodes[0];
3778 assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
3779 pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left;
3780
3781 pm_node_t *right;
3782
3783 if (nodes->size == 1) {
3784 right = UP(pm_missing_node_create(parser, left->location.end, left->location.end));
3785 } else {
3786 right = nodes->nodes[nodes->size - 1];
3787 assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
3788 }
3789
3790#if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
3791 // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode.
3792 // The resulting AST will anyway be ignored, but this file still needs to compile.
3793 pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node;
3794#else
3795 pm_node_t *right_splat_node = right;
3796#endif
3797 *node = (pm_find_pattern_node_t) {
3798 .base = PM_NODE_INIT_NODES(parser, PM_FIND_PATTERN_NODE, 0, left, right),
3799 .constant = NULL,
3800 .left = left_splat_node,
3801 .right = right_splat_node,
3802 .requireds = { 0 },
3803 .opening_loc = { 0 },
3804 .closing_loc = { 0 }
3805 };
3806
3807 // For now we're going to just copy over each pointer manually. This could be
3808 // much more efficient, as we could instead resize the node list to only point
3809 // to 1...-1.
3810 for (size_t index = 1; index < nodes->size - 1; index++) {
3811 pm_node_list_append(&node->requireds, nodes->nodes[index]);
3812 }
3813
3814 return node;
3815}
3816
3821static double
3822pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
3823 ptrdiff_t diff = token->end - token->start;
3824 if (diff <= 0) return 0.0;
3825
3826 // First, get a buffer of the content.
3827 size_t length = (size_t) diff;
3828 char *buffer = xmalloc(sizeof(char) * (length + 1));
3829 memcpy((void *) buffer, token->start, length);
3830
3831 // Next, determine if we need to replace the decimal point because of
3832 // locale-specific options, and then normalize them if we have to.
3833 char decimal_point = *localeconv()->decimal_point;
3834 if (decimal_point != '.') {
3835 for (size_t index = 0; index < length; index++) {
3836 if (buffer[index] == '.') buffer[index] = decimal_point;
3837 }
3838 }
3839
3840 // Next, handle underscores by removing them from the buffer.
3841 for (size_t index = 0; index < length; index++) {
3842 if (buffer[index] == '_') {
3843 memmove((void *) (buffer + index), (void *) (buffer + index + 1), length - index);
3844 length--;
3845 }
3846 }
3847
3848 // Null-terminate the buffer so that strtod cannot read off the end.
3849 buffer[length] = '\0';
3850
3851 // Now, call strtod to parse the value. Note that CRuby has their own
3852 // version of strtod which avoids locales. We're okay using the locale-aware
3853 // version because we've already validated through the parser that the token
3854 // is in a valid format.
3855 errno = 0;
3856 char *eptr;
3857 double value = strtod(buffer, &eptr);
3858
3859 // This should never happen, because we've already checked that the token
3860 // is in a valid format. However it's good to be safe.
3861 if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
3862 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, (*token), PM_ERR_FLOAT_PARSE);
3863 xfree((void *) buffer);
3864 return 0.0;
3865 }
3866
3867 // If errno is set, then it should only be ERANGE. At this point we need to
3868 // check if it's infinity (it should be).
3869 if (errno == ERANGE && PRISM_ISINF(value)) {
3870 int warn_width;
3871 const char *ellipsis;
3872
3873 if (length > 20) {
3874 warn_width = 20;
3875 ellipsis = "...";
3876 } else {
3877 warn_width = (int) length;
3878 ellipsis = "";
3879 }
3880
3881 pm_diagnostic_list_append_format(&parser->warning_list, token->start, token->end, PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
3882 value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
3883 }
3884
3885 // Finally we can free the buffer and return the value.
3886 xfree((void *) buffer);
3887 return value;
3888}
3889
3893static pm_float_node_t *
3894pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
3895 assert(token->type == PM_TOKEN_FLOAT);
3896 pm_float_node_t *node = PM_NODE_ALLOC(parser, pm_float_node_t);
3897
3898 *node = (pm_float_node_t) {
3899 .base = PM_NODE_INIT_TOKEN(parser, PM_FLOAT_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
3900 .value = pm_double_parse(parser, token)
3901 };
3902
3903 return node;
3904}
3905
3909static pm_imaginary_node_t *
3910pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
3911 assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
3912
3913 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
3914 *node = (pm_imaginary_node_t) {
3915 .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
3916 .numeric = UP(pm_float_node_create(parser, &((pm_token_t) {
3917 .type = PM_TOKEN_FLOAT,
3918 .start = token->start,
3919 .end = token->end - 1
3920 })))
3921 };
3922
3923 return node;
3924}
3925
3929static pm_rational_node_t *
3930pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
3931 assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
3932
3933 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
3934 *node = (pm_rational_node_t) {
3935 .base = PM_NODE_INIT_TOKEN(parser, PM_RATIONAL_NODE, PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL, token),
3936 .numerator = { 0 },
3937 .denominator = { 0 }
3938 };
3939
3940 const uint8_t *start = token->start;
3941 const uint8_t *end = token->end - 1; // r
3942
3943 while (start < end && *start == '0') start++; // 0.1 -> .1
3944 while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
3945
3946 size_t length = (size_t) (end - start);
3947 if (length == 1) {
3948 node->denominator.value = 1;
3949 return node;
3950 }
3951
3952 const uint8_t *point = memchr(start, '.', length);
3953 assert(point && "should have a decimal point");
3954
3955 uint8_t *digits = xmalloc(length);
3956 if (digits == NULL) {
3957 fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
3958 abort();
3959 }
3960
3961 memcpy(digits, start, (unsigned long) (point - start));
3962 memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
3963 pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
3964
3965 digits[0] = '1';
3966 if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
3967 pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
3968 xfree(digits);
3969
3970 pm_integers_reduce(&node->numerator, &node->denominator);
3971 return node;
3972}
3973
3978static pm_imaginary_node_t *
3979pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
3980 assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
3981
3982 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
3983 *node = (pm_imaginary_node_t) {
3984 .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
3985 .numeric = UP(pm_float_node_rational_create(parser, &((pm_token_t) {
3986 .type = PM_TOKEN_FLOAT_RATIONAL,
3987 .start = token->start,
3988 .end = token->end - 1
3989 })))
3990 };
3991
3992 return node;
3993}
3994
3998static pm_for_node_t *
3999pm_for_node_create(
4000 pm_parser_t *parser,
4001 pm_node_t *index,
4002 pm_node_t *collection,
4003 pm_statements_node_t *statements,
4004 const pm_token_t *for_keyword,
4005 const pm_token_t *in_keyword,
4006 const pm_token_t *do_keyword,
4007 const pm_token_t *end_keyword
4008) {
4009 pm_for_node_t *node = PM_NODE_ALLOC(parser, pm_for_node_t);
4010
4011 *node = (pm_for_node_t) {
4012 .base = PM_NODE_INIT_TOKENS(parser, PM_FOR_NODE, 0, for_keyword, end_keyword),
4013 .index = index,
4014 .collection = collection,
4015 .statements = statements,
4016 .for_keyword_loc = PM_LOCATION_TOKEN_VALUE(for_keyword),
4017 .in_keyword_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
4018 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
4019 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4020 };
4021
4022 return node;
4023}
4024
4028static pm_forwarding_arguments_node_t *
4029pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
4030 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4031 pm_forwarding_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_arguments_node_t);
4032
4033 *node = (pm_forwarding_arguments_node_t) {
4034 .base = PM_NODE_INIT_TOKEN(parser, PM_FORWARDING_ARGUMENTS_NODE, 0, token)
4035 };
4036
4037 return node;
4038}
4039
4043static pm_forwarding_parameter_node_t *
4044pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
4045 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4046 pm_forwarding_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_parameter_node_t);
4047
4048 *node = (pm_forwarding_parameter_node_t) {
4049 .base = PM_NODE_INIT_TOKEN(parser, PM_FORWARDING_PARAMETER_NODE, 0, token)
4050 };
4051
4052 return node;
4053}
4054
4058static pm_forwarding_super_node_t *
4059pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
4060 assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
4061 assert(token->type == PM_TOKEN_KEYWORD_SUPER);
4062 pm_forwarding_super_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_super_node_t);
4063
4064 pm_block_node_t *block = NULL;
4065 if (arguments->block != NULL) {
4066 block = (pm_block_node_t *) arguments->block;
4067 }
4068
4069 *node = (pm_forwarding_super_node_t) {
4070 .base = (
4071 (block == NULL)
4072 ? PM_NODE_INIT_TOKEN(parser, PM_FORWARDING_SUPER_NODE, 0, token)
4073 : PM_NODE_INIT_TOKEN_NODE(parser, PM_FORWARDING_SUPER_NODE, 0, token, block)
4074 ),
4075 .block = block
4076 };
4077
4078 return node;
4079}
4080
4085static pm_hash_pattern_node_t *
4086pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4087 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4088
4089 *node = (pm_hash_pattern_node_t) {
4090 .base = PM_NODE_INIT_TOKENS(parser, PM_HASH_PATTERN_NODE, 0, opening, closing),
4091 .constant = NULL,
4092 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4093 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
4094 .elements = { 0 },
4095 .rest = NULL
4096 };
4097
4098 return node;
4099}
4100
4104static pm_hash_pattern_node_t *
4105pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
4106 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4107
4108 const uint8_t *start;
4109 const uint8_t *end;
4110
4111 if (elements->size > 0) {
4112 if (rest) {
4113 start = MIN(rest->location.start, elements->nodes[0]->location.start);
4114 end = MAX(rest->location.end, elements->nodes[elements->size - 1]->location.end);
4115 } else {
4116 start = elements->nodes[0]->location.start;
4117 end = elements->nodes[elements->size - 1]->location.end;
4118 }
4119 } else {
4120 assert(rest != NULL);
4121 start = rest->location.start;
4122 end = rest->location.end;
4123 }
4124
4125 *node = (pm_hash_pattern_node_t) {
4126 .base = PM_NODE_INIT(parser, PM_HASH_PATTERN_NODE, 0, start, end),
4127 .constant = NULL,
4128 .elements = { 0 },
4129 .rest = rest,
4130 .opening_loc = { 0 },
4131 .closing_loc = { 0 }
4132 };
4133
4134 pm_node_list_concat(&node->elements, elements);
4135 return node;
4136}
4137
4141static pm_constant_id_t
4142pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
4143 switch (PM_NODE_TYPE(target)) {
4144 case PM_GLOBAL_VARIABLE_READ_NODE:
4145 return ((pm_global_variable_read_node_t *) target)->name;
4146 case PM_BACK_REFERENCE_READ_NODE:
4147 return ((pm_back_reference_read_node_t *) target)->name;
4148 case PM_NUMBERED_REFERENCE_READ_NODE:
4149 // This will only ever happen in the event of a syntax error, but we
4150 // still need to provide something for the node.
4151 return pm_parser_constant_id_location(parser, target->location.start, target->location.end);
4152 default:
4153 assert(false && "unreachable");
4154 return (pm_constant_id_t) -1;
4155 }
4156}
4157
4161static pm_global_variable_and_write_node_t *
4162pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4163 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4164 pm_global_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_and_write_node_t);
4165
4166 *node = (pm_global_variable_and_write_node_t) {
4167 .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_AND_WRITE_NODE, 0, target, value),
4168 .name = pm_global_variable_write_name(parser, target),
4169 .name_loc = target->location,
4170 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4171 .value = value
4172 };
4173
4174 return node;
4175}
4176
4180static pm_global_variable_operator_write_node_t *
4181pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4182 pm_global_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_operator_write_node_t);
4183
4184 *node = (pm_global_variable_operator_write_node_t) {
4185 .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value),
4186 .name = pm_global_variable_write_name(parser, target),
4187 .name_loc = target->location,
4188 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4189 .value = value,
4190 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4191 };
4192
4193 return node;
4194}
4195
4199static pm_global_variable_or_write_node_t *
4200pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4201 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4202 pm_global_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_or_write_node_t);
4203
4204 *node = (pm_global_variable_or_write_node_t) {
4205 .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_OR_WRITE_NODE, 0, target, value),
4206 .name = pm_global_variable_write_name(parser, target),
4207 .name_loc = target->location,
4208 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4209 .value = value
4210 };
4211
4212 return node;
4213}
4214
4218static pm_global_variable_read_node_t *
4219pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4220 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4221
4222 *node = (pm_global_variable_read_node_t) {
4223 .base = PM_NODE_INIT_TOKEN(parser, PM_GLOBAL_VARIABLE_READ_NODE, 0, name),
4224 .name = pm_parser_constant_id_token(parser, name)
4225 };
4226
4227 return node;
4228}
4229
4233static pm_global_variable_read_node_t *
4234pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
4235 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4236
4237 *node = (pm_global_variable_read_node_t) {
4238 .base = PM_NODE_INIT_BASE(parser, PM_GLOBAL_VARIABLE_READ_NODE, 0),
4239 .name = name
4240 };
4241
4242 return node;
4243}
4244
4248static pm_global_variable_write_node_t *
4249pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4250 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4251 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
4252
4253 *node = (pm_global_variable_write_node_t) {
4254 .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_WRITE_NODE, flags, target, value),
4255 .name = pm_global_variable_write_name(parser, target),
4256 .name_loc = PM_LOCATION_NODE_VALUE(target),
4257 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
4258 .value = value
4259 };
4260
4261 return node;
4262}
4263
4267static pm_global_variable_write_node_t *
4268pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
4269 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4270
4271 *node = (pm_global_variable_write_node_t) {
4272 .base = PM_NODE_INIT_BASE(parser, PM_GLOBAL_VARIABLE_WRITE_NODE, 0),
4273 .name = name,
4274 .name_loc = PM_LOCATION_NULL_VALUE(parser),
4275 .operator_loc = PM_LOCATION_NULL_VALUE(parser),
4276 .value = value
4277 };
4278
4279 return node;
4280}
4281
4285static pm_hash_node_t *
4286pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4287 assert(opening != NULL);
4288 pm_hash_node_t *node = PM_NODE_ALLOC(parser, pm_hash_node_t);
4289
4290 *node = (pm_hash_node_t) {
4291 .base = PM_NODE_INIT_TOKEN(parser, PM_HASH_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening),
4292 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4293 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
4294 .elements = { 0 }
4295 };
4296
4297 return node;
4298}
4299
4303static inline void
4304pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
4305 pm_node_list_append(&hash->elements, element);
4306
4307 bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
4308 if (static_literal) {
4309 pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
4310 static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
4311 static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
4312 static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
4313 }
4314
4315 if (!static_literal) {
4316 pm_node_flag_unset(UP(hash), PM_NODE_FLAG_STATIC_LITERAL);
4317 }
4318}
4319
4320static inline void
4321pm_hash_node_closing_loc_set(pm_hash_node_t *hash, pm_token_t *token) {
4322 hash->base.location.end = token->end;
4323 hash->closing_loc = PM_LOCATION_TOKEN_VALUE(token);
4324}
4325
4329static pm_if_node_t *
4330pm_if_node_create(pm_parser_t *parser,
4331 const pm_token_t *if_keyword,
4332 pm_node_t *predicate,
4333 const pm_token_t *then_keyword,
4334 pm_statements_node_t *statements,
4335 pm_node_t *subsequent,
4336 const pm_token_t *end_keyword
4337) {
4338 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4339 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4340
4341 const uint8_t *end;
4342 if (end_keyword->type != PM_TOKEN_NOT_PROVIDED) {
4343 end = end_keyword->end;
4344 } else if (subsequent != NULL) {
4345 end = subsequent->location.end;
4346 } else if (pm_statements_node_body_length(statements) != 0) {
4347 end = statements->base.location.end;
4348 } else {
4349 end = predicate->location.end;
4350 }
4351
4352 *node = (pm_if_node_t) {
4353 .base = PM_NODE_INIT(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, if_keyword->start, end),
4354 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4355 .predicate = predicate,
4356 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
4357 .statements = statements,
4358 .subsequent = subsequent,
4359 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
4360 };
4361
4362 return node;
4363}
4364
4368static pm_if_node_t *
4369pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
4370 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4371 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4372
4373 pm_statements_node_t *statements = pm_statements_node_create(parser);
4374 pm_statements_node_body_append(parser, statements, statement, true);
4375
4376 *node = (pm_if_node_t) {
4377 .base = PM_NODE_INIT_NODES(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, statement, predicate),
4378 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4379 .predicate = predicate,
4380 .then_keyword_loc = { 0 },
4381 .statements = statements,
4382 .subsequent = NULL,
4383 .end_keyword_loc = { 0 }
4384 };
4385
4386 return node;
4387}
4388
4392static pm_if_node_t *
4393pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
4394 pm_assert_value_expression(parser, predicate);
4395 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4396
4397 pm_statements_node_t *if_statements = pm_statements_node_create(parser);
4398 pm_statements_node_body_append(parser, if_statements, true_expression, true);
4399
4400 pm_statements_node_t *else_statements = pm_statements_node_create(parser);
4401 pm_statements_node_body_append(parser, else_statements, false_expression, true);
4402
4403 pm_token_t end_keyword = not_provided(parser);
4404 pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, &end_keyword);
4405
4406 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4407
4408 *node = (pm_if_node_t) {
4409 .base = PM_NODE_INIT_NODES(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, predicate, false_expression),
4410 .if_keyword_loc = { 0 },
4411 .predicate = predicate,
4412 .then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark),
4413 .statements = if_statements,
4414 .subsequent = UP(else_node),
4415 .end_keyword_loc = { 0 }
4416 };
4417
4418 return node;
4419
4420}
4421
4422static inline void
4423pm_if_node_end_keyword_loc_set(pm_if_node_t *node, const pm_token_t *keyword) {
4424 node->base.location.end = keyword->end;
4425 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4426}
4427
4428static inline void
4429pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword) {
4430 node->base.location.end = keyword->end;
4431 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4432}
4433
4437static pm_implicit_node_t *
4438pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
4439 pm_implicit_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_node_t);
4440
4441 *node = (pm_implicit_node_t) {
4442 .base = PM_NODE_INIT_NODE(parser, PM_IMPLICIT_NODE, 0, value),
4443 .value = value
4444 };
4445
4446 return node;
4447}
4448
4452static pm_implicit_rest_node_t *
4453pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
4454 assert(token->type == PM_TOKEN_COMMA);
4455
4456 pm_implicit_rest_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_rest_node_t);
4457
4458 *node = (pm_implicit_rest_node_t) {
4459 .base = PM_NODE_INIT_TOKEN(parser, PM_IMPLICIT_REST_NODE, 0, token)
4460 };
4461
4462 return node;
4463}
4464
4468static pm_integer_node_t *
4469pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4470 assert(token->type == PM_TOKEN_INTEGER);
4471 pm_integer_node_t *node = PM_NODE_ALLOC(parser, pm_integer_node_t);
4472
4473 *node = (pm_integer_node_t) {
4474 .base = PM_NODE_INIT_TOKEN(parser, PM_INTEGER_NODE, base | PM_NODE_FLAG_STATIC_LITERAL, token),
4475 .value = { 0 }
4476 };
4477
4478 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4479 switch (base) {
4480 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4481 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4482 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4483 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4484 default: assert(false && "unreachable"); break;
4485 }
4486
4487 pm_integer_parse(&node->value, integer_base, token->start, token->end);
4488 return node;
4489}
4490
4495static pm_imaginary_node_t *
4496pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4497 assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
4498
4499 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4500 *node = (pm_imaginary_node_t) {
4501 .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
4502 .numeric = UP(pm_integer_node_create(parser, base, &((pm_token_t) {
4503 .type = PM_TOKEN_INTEGER,
4504 .start = token->start,
4505 .end = token->end - 1
4506 })))
4507 };
4508
4509 return node;
4510}
4511
4516static pm_rational_node_t *
4517pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4518 assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
4519
4520 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4521 *node = (pm_rational_node_t) {
4522 .base = PM_NODE_INIT_TOKEN(parser, PM_RATIONAL_NODE, base | PM_NODE_FLAG_STATIC_LITERAL, token),
4523 .numerator = { 0 },
4524 .denominator = { .value = 1, 0 }
4525 };
4526
4527 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4528 switch (base) {
4529 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4530 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4531 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4532 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4533 default: assert(false && "unreachable"); break;
4534 }
4535
4536 pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
4537
4538 return node;
4539}
4540
4545static pm_imaginary_node_t *
4546pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4547 assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
4548
4549 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4550 *node = (pm_imaginary_node_t) {
4551 .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
4552 .numeric = UP(pm_integer_node_rational_create(parser, base, &((pm_token_t) {
4553 .type = PM_TOKEN_INTEGER_RATIONAL,
4554 .start = token->start,
4555 .end = token->end - 1
4556 })))
4557 };
4558
4559 return node;
4560}
4561
4565static pm_in_node_t *
4566pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
4567 pm_in_node_t *node = PM_NODE_ALLOC(parser, pm_in_node_t);
4568
4569 const uint8_t *end;
4570 if (statements != NULL) {
4571 end = statements->base.location.end;
4572 } else if (then_keyword->type != PM_TOKEN_NOT_PROVIDED) {
4573 end = then_keyword->end;
4574 } else {
4575 end = pattern->location.end;
4576 }
4577
4578 *node = (pm_in_node_t) {
4579 .base = PM_NODE_INIT(parser, PM_IN_NODE, 0, in_keyword->start, end),
4580 .pattern = pattern,
4581 .statements = statements,
4582 .in_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
4583 .then_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword)
4584 };
4585
4586 return node;
4587}
4588
4592static pm_instance_variable_and_write_node_t *
4593pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4594 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4595 pm_instance_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_and_write_node_t);
4596
4597 *node = (pm_instance_variable_and_write_node_t) {
4598 .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_AND_WRITE_NODE, 0, target, value),
4599 .name = target->name,
4600 .name_loc = target->base.location,
4601 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4602 .value = value
4603 };
4604
4605 return node;
4606}
4607
4611static pm_instance_variable_operator_write_node_t *
4612pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4613 pm_instance_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_operator_write_node_t);
4614
4615 *node = (pm_instance_variable_operator_write_node_t) {
4616 .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value),
4617 .name = target->name,
4618 .name_loc = target->base.location,
4619 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4620 .value = value,
4621 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4622 };
4623
4624 return node;
4625}
4626
4630static pm_instance_variable_or_write_node_t *
4631pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4632 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4633 pm_instance_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_or_write_node_t);
4634
4635 *node = (pm_instance_variable_or_write_node_t) {
4636 .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_OR_WRITE_NODE, 0, target, value),
4637 .name = target->name,
4638 .name_loc = target->base.location,
4639 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4640 .value = value
4641 };
4642
4643 return node;
4644}
4645
4649static pm_instance_variable_read_node_t *
4650pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
4651 assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
4652 pm_instance_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_read_node_t);
4653
4654 *node = (pm_instance_variable_read_node_t) {
4655 .base = PM_NODE_INIT_TOKEN(parser, PM_INSTANCE_VARIABLE_READ_NODE, 0, token),
4656 .name = pm_parser_constant_id_token(parser, token)
4657 };
4658
4659 return node;
4660}
4661
4666static pm_instance_variable_write_node_t *
4667pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
4668 pm_instance_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_write_node_t);
4669 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
4670
4671 *node = (pm_instance_variable_write_node_t) {
4672 .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_WRITE_NODE, flags, read_node, value),
4673 .name = read_node->name,
4674 .name_loc = PM_LOCATION_NODE_VALUE(read_node),
4675 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
4676 .value = value
4677 };
4678
4679 return node;
4680}
4681
4687static void
4688pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
4689 switch (PM_NODE_TYPE(part)) {
4690 case PM_STRING_NODE:
4691 pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4692 break;
4693 case PM_EMBEDDED_STATEMENTS_NODE: {
4694 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
4695 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
4696
4697 if (embedded == NULL) {
4698 // If there are no statements or more than one statement, then
4699 // we lose the static literal flag.
4700 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
4701 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
4702 // If the embedded statement is a string, then we can keep the
4703 // static literal flag and mark the string as frozen.
4704 pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4705 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
4706 // If the embedded statement is an interpolated string and it's
4707 // a static literal, then we can keep the static literal flag.
4708 } else {
4709 // Otherwise we lose the static literal flag.
4710 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
4711 }
4712
4713 break;
4714 }
4715 case PM_EMBEDDED_VARIABLE_NODE:
4716 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
4717 break;
4718 default:
4719 assert(false && "unexpected node type");
4720 break;
4721 }
4722
4723 pm_node_list_append(parts, part);
4724}
4725
4729static pm_interpolated_regular_expression_node_t *
4730pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4731 pm_interpolated_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_regular_expression_node_t);
4732
4733 *node = (pm_interpolated_regular_expression_node_t) {
4734 .base = PM_NODE_INIT_TOKEN(parser, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening),
4735 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4736 .closing_loc = PM_LOCATION_TOKEN_VALUE(opening),
4737 .parts = { 0 }
4738 };
4739
4740 return node;
4741}
4742
4743static inline void
4744pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
4745 if (node->base.location.start > part->location.start) {
4746 node->base.location.start = part->location.start;
4747 }
4748 if (node->base.location.end < part->location.end) {
4749 node->base.location.end = part->location.end;
4750 }
4751
4752 pm_interpolated_node_append(UP(node), &node->parts, part);
4753}
4754
4755static inline void
4756pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
4757 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
4758 node->base.location.end = closing->end;
4759 pm_node_flag_set(UP(node), pm_regular_expression_flags_create(parser, closing));
4760}
4761
4785static inline void
4786pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
4787#define CLEAR_FLAGS(node) \
4788 node->base.flags = (pm_node_flags_t) (FL(node) & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
4789
4790#define MUTABLE_FLAGS(node) \
4791 node->base.flags = (pm_node_flags_t) ((FL(node) | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
4792
4793 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
4794 node->base.location.start = part->location.start;
4795 }
4796
4797 node->base.location.end = MAX(node->base.location.end, part->location.end);
4798
4799 switch (PM_NODE_TYPE(part)) {
4800 case PM_STRING_NODE:
4801 // If inner string is not frozen, it stops being a static literal. We should *not* clear other flags,
4802 // because concatenating two frozen strings (`'foo' 'bar'`) is still frozen. This holds true for
4803 // as long as this interpolation only consists of other string literals.
4804 if (!PM_NODE_FLAG_P(part, PM_STRING_FLAGS_FROZEN)) {
4805 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
4806 }
4807 part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
4808 break;
4809 case PM_INTERPOLATED_STRING_NODE:
4810 if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
4811 // If the string that we're concatenating is a static literal,
4812 // then we can keep the static literal flag for this string.
4813 } else {
4814 // Otherwise, we lose the static literal flag here and we should
4815 // also clear the mutability flags.
4816 CLEAR_FLAGS(node);
4817 }
4818 break;
4819 case PM_EMBEDDED_STATEMENTS_NODE: {
4820 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
4821 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
4822
4823 if (embedded == NULL) {
4824 // If we're embedding multiple statements or no statements, then
4825 // the string is not longer a static literal.
4826 CLEAR_FLAGS(node);
4827 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
4828 // If the embedded statement is a string, then we can make that
4829 // string as frozen and static literal, and not touch the static
4830 // literal status of this string.
4831 embedded->flags = (pm_node_flags_t) ((embedded->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
4832
4833 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
4834 MUTABLE_FLAGS(node);
4835 }
4836 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
4837 // If the embedded statement is an interpolated string, but that
4838 // string is marked as static literal, then we can keep our
4839 // static literal status for this string.
4840 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
4841 MUTABLE_FLAGS(node);
4842 }
4843 } else {
4844 // In all other cases, we lose the static literal flag here and
4845 // become mutable.
4846 CLEAR_FLAGS(node);
4847 }
4848
4849 break;
4850 }
4851 case PM_EMBEDDED_VARIABLE_NODE:
4852 // Embedded variables clear static literal, which means we also
4853 // should clear the mutability flags.
4854 CLEAR_FLAGS(node);
4855 break;
4856 case PM_X_STRING_NODE:
4857 case PM_INTERPOLATED_X_STRING_NODE:
4858 case PM_SYMBOL_NODE:
4859 case PM_INTERPOLATED_SYMBOL_NODE:
4860 // These will only happen in error cases. But we want to handle it
4861 // here so that we don't fail the assertion.
4862 CLEAR_FLAGS(node);
4863 break;
4864 default:
4865 assert(false && "unexpected node type");
4866 break;
4867 }
4868
4869 pm_node_list_append(&node->parts, part);
4870
4871#undef CLEAR_FLAGS
4872#undef MUTABLE_FLAGS
4873}
4874
4878static pm_interpolated_string_node_t *
4879pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
4880 pm_interpolated_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_string_node_t);
4881 pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
4882
4883 switch (parser->frozen_string_literal) {
4884 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
4885 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
4886 break;
4887 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
4888 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
4889 break;
4890 }
4891
4892 *node = (pm_interpolated_string_node_t) {
4893 .base = PM_NODE_INIT_TOKENS(parser, PM_INTERPOLATED_STRING_NODE, flags, opening, closing),
4894 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
4895 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
4896 .parts = { 0 }
4897 };
4898
4899 if (parts != NULL) {
4900 pm_node_t *part;
4901 PM_NODE_LIST_FOREACH(parts, index, part) {
4902 pm_interpolated_string_node_append(node, part);
4903 }
4904 }
4905
4906 return node;
4907}
4908
4912static void
4913pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, const pm_token_t *closing) {
4914 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
4915 node->base.location.end = closing->end;
4916}
4917
4918static void
4919pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) {
4920 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
4921 node->base.location.start = part->location.start;
4922 }
4923
4924 pm_interpolated_node_append(UP(node), &node->parts, part);
4925 node->base.location.end = MAX(node->base.location.end, part->location.end);
4926}
4927
4928static void
4929pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
4930 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
4931 node->base.location.end = closing->end;
4932}
4933
4937static pm_interpolated_symbol_node_t *
4938pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
4939 pm_interpolated_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_symbol_node_t);
4940
4941 *node = (pm_interpolated_symbol_node_t) {
4942 .base = PM_NODE_INIT_TOKENS(parser, PM_INTERPOLATED_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening, closing),
4943 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
4944 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
4945 .parts = { 0 }
4946 };
4947
4948 if (parts != NULL) {
4949 pm_node_t *part;
4950 PM_NODE_LIST_FOREACH(parts, index, part) {
4951 pm_interpolated_symbol_node_append(node, part);
4952 }
4953 }
4954
4955 return node;
4956}
4957
4961static pm_interpolated_x_string_node_t *
4962pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4963 pm_interpolated_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_x_string_node_t);
4964
4965 *node = (pm_interpolated_x_string_node_t) {
4966 .base = PM_NODE_INIT_TOKENS(parser, PM_INTERPOLATED_X_STRING_NODE, 0, opening, closing),
4967 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
4968 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
4969 .parts = { 0 }
4970 };
4971
4972 return node;
4973}
4974
4975static inline void
4976pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
4977 pm_interpolated_node_append(UP(node), &node->parts, part);
4978 node->base.location.end = part->location.end;
4979}
4980
4981static inline void
4982pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
4983 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
4984 node->base.location.end = closing->end;
4985}
4986
4990static pm_it_local_variable_read_node_t *
4991pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4992 pm_it_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_it_local_variable_read_node_t);
4993
4994 *node = (pm_it_local_variable_read_node_t) {
4995 .base = PM_NODE_INIT_TOKEN(parser, PM_IT_LOCAL_VARIABLE_READ_NODE, 0, name),
4996 };
4997
4998 return node;
4999}
5000
5004static pm_it_parameters_node_t *
5005pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5006 pm_it_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_it_parameters_node_t);
5007
5008 *node = (pm_it_parameters_node_t) {
5009 .base = PM_NODE_INIT_TOKENS(parser, PM_IT_PARAMETERS_NODE, 0, opening, closing),
5010 };
5011
5012 return node;
5013}
5014
5018static pm_keyword_hash_node_t *
5019pm_keyword_hash_node_create(pm_parser_t *parser) {
5020 pm_keyword_hash_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_hash_node_t);
5021
5022 *node = (pm_keyword_hash_node_t) {
5023 .base = PM_NODE_INIT_UNSET(parser, PM_KEYWORD_HASH_NODE, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS),
5024 .elements = { 0 }
5025 };
5026
5027 return node;
5028}
5029
5033static void
5034pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
5035 // If the element being added is not an AssocNode or does not have a symbol
5036 // key, then we want to turn the SYMBOL_KEYS flag off.
5037 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
5038 pm_node_flag_unset(UP(hash), PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
5039 }
5040
5041 pm_node_list_append(&hash->elements, element);
5042 if (hash->base.location.start == NULL) {
5043 hash->base.location.start = element->location.start;
5044 }
5045 hash->base.location.end = element->location.end;
5046}
5047
5051static pm_required_keyword_parameter_node_t *
5052pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
5053 pm_required_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_keyword_parameter_node_t);
5054
5055 *node = (pm_required_keyword_parameter_node_t) {
5056 .base = PM_NODE_INIT_TOKEN(parser, PM_REQUIRED_KEYWORD_PARAMETER_NODE, 0, name),
5057 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5058 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5059 };
5060
5061 return node;
5062}
5063
5067static pm_optional_keyword_parameter_node_t *
5068pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
5069 pm_optional_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_keyword_parameter_node_t);
5070
5071 *node = (pm_optional_keyword_parameter_node_t) {
5072 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_OPTIONAL_KEYWORD_PARAMETER_NODE, 0, name, value),
5073 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5074 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5075 .value = value
5076 };
5077
5078 return node;
5079}
5080
5084static pm_keyword_rest_parameter_node_t *
5085pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
5086 pm_keyword_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_rest_parameter_node_t);
5087
5088 *node = (pm_keyword_rest_parameter_node_t) {
5089 .base = (
5090 (name->type == PM_TOKEN_NOT_PROVIDED)
5091 ? PM_NODE_INIT_TOKEN(parser, PM_KEYWORD_REST_PARAMETER_NODE, 0, operator)
5092 : PM_NODE_INIT_TOKENS(parser, PM_KEYWORD_REST_PARAMETER_NODE, 0, operator, name)
5093 ),
5094 .name = pm_parser_optional_constant_id_token(parser, name),
5095 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
5096 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5097 };
5098
5099 return node;
5100}
5101
5105static pm_lambda_node_t *
5106pm_lambda_node_create(
5107 pm_parser_t *parser,
5108 pm_constant_id_list_t *locals,
5109 const pm_token_t *operator,
5110 const pm_token_t *opening,
5111 const pm_token_t *closing,
5112 pm_node_t *parameters,
5113 pm_node_t *body
5114) {
5115 pm_lambda_node_t *node = PM_NODE_ALLOC(parser, pm_lambda_node_t);
5116
5117 *node = (pm_lambda_node_t) {
5118 .base = PM_NODE_INIT_TOKENS(parser, PM_LAMBDA_NODE, 0, operator, closing),
5119 .locals = *locals,
5120 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5121 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5122 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
5123 .parameters = parameters,
5124 .body = body
5125 };
5126
5127 return node;
5128}
5129
5133static pm_local_variable_and_write_node_t *
5134pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5135 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5136 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5137 pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t);
5138
5139 *node = (pm_local_variable_and_write_node_t) {
5140 .base = PM_NODE_INIT_NODES(parser, PM_LOCAL_VARIABLE_AND_WRITE_NODE, 0, target, value),
5141 .name_loc = target->location,
5142 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5143 .value = value,
5144 .name = name,
5145 .depth = depth
5146 };
5147
5148 return node;
5149}
5150
5154static pm_local_variable_operator_write_node_t *
5155pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5156 pm_local_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_operator_write_node_t);
5157
5158 *node = (pm_local_variable_operator_write_node_t) {
5159 .base = PM_NODE_INIT_NODES(parser, PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value),
5160 .name_loc = target->location,
5161 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5162 .value = value,
5163 .name = name,
5164 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5165 .depth = depth
5166 };
5167
5168 return node;
5169}
5170
5174static pm_local_variable_or_write_node_t *
5175pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5176 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5177 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5178 pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t);
5179
5180 *node = (pm_local_variable_or_write_node_t) {
5181 .base = PM_NODE_INIT_NODES(parser, PM_LOCAL_VARIABLE_OR_WRITE_NODE, 0, target, value),
5182 .name_loc = target->location,
5183 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5184 .value = value,
5185 .name = name,
5186 .depth = depth
5187 };
5188
5189 return node;
5190}
5191
5195static pm_local_variable_read_node_t *
5196pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
5197 if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
5198
5199 pm_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_read_node_t);
5200
5201 *node = (pm_local_variable_read_node_t) {
5202 .base = PM_NODE_INIT_TOKEN(parser, PM_LOCAL_VARIABLE_READ_NODE, 0, name),
5203 .name = name_id,
5204 .depth = depth
5205 };
5206
5207 return node;
5208}
5209
5213static pm_local_variable_read_node_t *
5214pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5215 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5216 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, false);
5217}
5218
5223static pm_local_variable_read_node_t *
5224pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5225 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5226 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, true);
5227}
5228
5232static pm_local_variable_write_node_t *
5233pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
5234 pm_local_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_write_node_t);
5235 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
5236
5237 *node = (pm_local_variable_write_node_t) {
5238 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_LOCAL_VARIABLE_WRITE_NODE, flags, name_loc, value),
5239 .name = name,
5240 .depth = depth,
5241 .value = value,
5242 .name_loc = *name_loc,
5243 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator)
5244 };
5245
5246 return node;
5247}
5248
5252static inline bool
5253pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5254 return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5255}
5256
5261static inline bool
5262pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
5263 return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
5264}
5265
5270static inline void
5271pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
5272 if (pm_token_is_numbered_parameter(start, end)) {
5273 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start);
5274 }
5275}
5276
5281static pm_local_variable_target_node_t *
5282pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
5283 pm_refute_numbered_parameter(parser, location->start, location->end);
5284 pm_local_variable_target_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_target_node_t);
5285
5286 *node = (pm_local_variable_target_node_t) {
5287 .base = PM_NODE_INIT_TOKEN(parser, PM_LOCAL_VARIABLE_TARGET_NODE, 0, location),
5288 .name = name,
5289 .depth = depth
5290 };
5291
5292 return node;
5293}
5294
5298static pm_match_predicate_node_t *
5299pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5300 pm_assert_value_expression(parser, value);
5301
5302 pm_match_predicate_node_t *node = PM_NODE_ALLOC(parser, pm_match_predicate_node_t);
5303
5304 *node = (pm_match_predicate_node_t) {
5305 .base = PM_NODE_INIT_NODES(parser, PM_MATCH_PREDICATE_NODE, 0, value, pattern),
5306 .value = value,
5307 .pattern = pattern,
5308 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5309 };
5310
5311 return node;
5312}
5313
5317static pm_match_required_node_t *
5318pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5319 pm_assert_value_expression(parser, value);
5320
5321 pm_match_required_node_t *node = PM_NODE_ALLOC(parser, pm_match_required_node_t);
5322
5323 *node = (pm_match_required_node_t) {
5324 .base = PM_NODE_INIT_NODES(parser, PM_MATCH_REQUIRED_NODE, 0, value, pattern),
5325 .value = value,
5326 .pattern = pattern,
5327 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5328 };
5329
5330 return node;
5331}
5332
5336static pm_match_write_node_t *
5337pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
5338 pm_match_write_node_t *node = PM_NODE_ALLOC(parser, pm_match_write_node_t);
5339
5340 *node = (pm_match_write_node_t) {
5341 .base = PM_NODE_INIT_NODE(parser, PM_MATCH_WRITE_NODE, 0, call),
5342 .call = call,
5343 .targets = { 0 }
5344 };
5345
5346 return node;
5347}
5348
5352static pm_module_node_t *
5353pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
5354 pm_module_node_t *node = PM_NODE_ALLOC(parser, pm_module_node_t);
5355
5356 *node = (pm_module_node_t) {
5357 .base = PM_NODE_INIT_TOKENS(parser, PM_MODULE_NODE, 0, module_keyword, end_keyword),
5358 .locals = (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
5359 .module_keyword_loc = PM_LOCATION_TOKEN_VALUE(module_keyword),
5360 .constant_path = constant_path,
5361 .body = body,
5362 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
5363 .name = pm_parser_constant_id_token(parser, name)
5364 };
5365
5366 return node;
5367}
5368
5372static pm_multi_target_node_t *
5373pm_multi_target_node_create(pm_parser_t *parser) {
5374 pm_multi_target_node_t *node = PM_NODE_ALLOC(parser, pm_multi_target_node_t);
5375
5376 *node = (pm_multi_target_node_t) {
5377 .base = PM_NODE_INIT_UNSET(parser, PM_MULTI_TARGET_NODE, 0),
5378 .lefts = { 0 },
5379 .rest = NULL,
5380 .rights = { 0 },
5381 .lparen_loc = { 0 },
5382 .rparen_loc = { 0 }
5383 };
5384
5385 return node;
5386}
5387
5391static void
5392pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
5393 if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
5394 if (node->rest == NULL) {
5395 node->rest = target;
5396 } else {
5397 pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
5398 pm_node_list_append(&node->rights, target);
5399 }
5400 } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
5401 if (node->rest == NULL) {
5402 node->rest = target;
5403 } else {
5404 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
5405 pm_node_list_append(&node->rights, target);
5406 }
5407 } else if (node->rest == NULL) {
5408 pm_node_list_append(&node->lefts, target);
5409 } else {
5410 pm_node_list_append(&node->rights, target);
5411 }
5412
5413 if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) {
5414 node->base.location.start = target->location.start;
5415 }
5416
5417 if (node->base.location.end == NULL || (node->base.location.end < target->location.end)) {
5418 node->base.location.end = target->location.end;
5419 }
5420}
5421
5425static void
5426pm_multi_target_node_opening_set(pm_multi_target_node_t *node, const pm_token_t *lparen) {
5427 node->base.location.start = lparen->start;
5428 node->lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen);
5429}
5430
5434static void
5435pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t *rparen) {
5436 node->base.location.end = rparen->end;
5437 node->rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen);
5438}
5439
5443static pm_multi_write_node_t *
5444pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5445 pm_multi_write_node_t *node = PM_NODE_ALLOC(parser, pm_multi_write_node_t);
5446 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
5447
5448 *node = (pm_multi_write_node_t) {
5449 .base = PM_NODE_INIT_NODES(parser, PM_MULTI_WRITE_NODE, flags, target, value),
5450 .lefts = target->lefts,
5451 .rest = target->rest,
5452 .rights = target->rights,
5453 .lparen_loc = target->lparen_loc,
5454 .rparen_loc = target->rparen_loc,
5455 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5456 .value = value
5457 };
5458
5459 // Explicitly do not call pm_node_destroy here because we want to keep
5460 // around all of the information within the MultiWriteNode node.
5461 xfree(target);
5462
5463 return node;
5464}
5465
5469static pm_next_node_t *
5470pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
5471 assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
5472 pm_next_node_t *node = PM_NODE_ALLOC(parser, pm_next_node_t);
5473
5474 *node = (pm_next_node_t) {
5475 .base = (
5476 (arguments == NULL)
5477 ? PM_NODE_INIT_TOKEN(parser, PM_NEXT_NODE, 0, keyword)
5478 : PM_NODE_INIT_TOKEN_NODE(parser, PM_NEXT_NODE, 0, keyword, arguments)
5479 ),
5480 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5481 .arguments = arguments
5482 };
5483
5484 return node;
5485}
5486
5490static pm_nil_node_t *
5491pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
5492 assert(token->type == PM_TOKEN_KEYWORD_NIL);
5493 pm_nil_node_t *node = PM_NODE_ALLOC(parser, pm_nil_node_t);
5494
5495 *node = (pm_nil_node_t) {
5496 .base = PM_NODE_INIT_TOKEN(parser, PM_NIL_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
5497 };
5498
5499 return node;
5500}
5501
5505static pm_no_keywords_parameter_node_t *
5506pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
5507 assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
5508 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
5509 pm_no_keywords_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_no_keywords_parameter_node_t);
5510
5511 *node = (pm_no_keywords_parameter_node_t) {
5512 .base = PM_NODE_INIT_TOKENS(parser, PM_NO_KEYWORDS_PARAMETER_NODE, 0, operator, keyword),
5513 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5514 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
5515 };
5516
5517 return node;
5518}
5519
5523static pm_numbered_parameters_node_t *
5524pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) {
5525 pm_numbered_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_parameters_node_t);
5526
5527 *node = (pm_numbered_parameters_node_t) {
5528 .base = PM_NODE_INIT_TOKEN(parser, PM_NUMBERED_PARAMETERS_NODE, 0, location),
5529 .maximum = maximum
5530 };
5531
5532 return node;
5533}
5534
5539#define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1))
5540
5547static uint32_t
5548pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) {
5549 const uint8_t *start = token->start + 1;
5550 const uint8_t *end = token->end;
5551
5552 ptrdiff_t diff = end - start;
5553 assert(diff > 0);
5554#if PTRDIFF_MAX > SIZE_MAX
5555 assert(diff < (ptrdiff_t) SIZE_MAX);
5556#endif
5557 size_t length = (size_t) diff;
5558
5559 char *digits = xcalloc(length + 1, sizeof(char));
5560 memcpy(digits, start, length);
5561 digits[length] = '\0';
5562
5563 char *endptr;
5564 errno = 0;
5565 unsigned long value = strtoul(digits, &endptr, 10);
5566
5567 if ((digits == endptr) || (*endptr != '\0')) {
5568 pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
5569 value = 0;
5570 }
5571
5572 xfree(digits);
5573
5574 if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
5575 PM_PARSER_WARN_FORMAT(parser, start, end, PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
5576 value = 0;
5577 }
5578
5579 return (uint32_t) value;
5580}
5581
5582#undef NTH_REF_MAX
5583
5587static pm_numbered_reference_read_node_t *
5588pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5589 assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
5590 pm_numbered_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_reference_read_node_t);
5591
5592 *node = (pm_numbered_reference_read_node_t) {
5593 .base = PM_NODE_INIT_TOKEN(parser, PM_NUMBERED_REFERENCE_READ_NODE, 0, name),
5594 .number = pm_numbered_reference_read_node_number(parser, name)
5595 };
5596
5597 return node;
5598}
5599
5603static pm_optional_parameter_node_t *
5604pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
5605 pm_optional_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_parameter_node_t);
5606
5607 *node = (pm_optional_parameter_node_t) {
5608 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_OPTIONAL_PARAMETER_NODE, 0, name, value),
5609 .name = pm_parser_constant_id_token(parser, name),
5610 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5611 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5612 .value = value
5613 };
5614
5615 return node;
5616}
5617
5621static pm_or_node_t *
5622pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
5623 pm_assert_value_expression(parser, left);
5624
5625 pm_or_node_t *node = PM_NODE_ALLOC(parser, pm_or_node_t);
5626
5627 *node = (pm_or_node_t) {
5628 .base = PM_NODE_INIT_NODES(parser, PM_OR_NODE, 0, left, right),
5629 .left = left,
5630 .right = right,
5631 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5632 };
5633
5634 return node;
5635}
5636
5640static pm_parameters_node_t *
5641pm_parameters_node_create(pm_parser_t *parser) {
5642 pm_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_parameters_node_t);
5643
5644 *node = (pm_parameters_node_t) {
5645 .base = PM_NODE_INIT_UNSET(parser, PM_PARAMETERS_NODE, 0),
5646 .rest = NULL,
5647 .keyword_rest = NULL,
5648 .block = NULL,
5649 .requireds = { 0 },
5650 .optionals = { 0 },
5651 .posts = { 0 },
5652 .keywords = { 0 }
5653 };
5654
5655 return node;
5656}
5657
5661static void
5662pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
5663 if (params->base.location.start == NULL) {
5664 params->base.location.start = param->location.start;
5665 } else {
5666 params->base.location.start = params->base.location.start < param->location.start ? params->base.location.start : param->location.start;
5667 }
5668
5669 if (params->base.location.end == NULL) {
5670 params->base.location.end = param->location.end;
5671 } else {
5672 params->base.location.end = params->base.location.end > param->location.end ? params->base.location.end : param->location.end;
5673 }
5674}
5675
5679static void
5680pm_parameters_node_requireds_append(pm_parameters_node_t *params, pm_node_t *param) {
5681 pm_parameters_node_location_set(params, param);
5682 pm_node_list_append(&params->requireds, param);
5683}
5684
5688static void
5689pm_parameters_node_optionals_append(pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
5690 pm_parameters_node_location_set(params, UP(param));
5691 pm_node_list_append(&params->optionals, UP(param));
5692}
5693
5697static void
5698pm_parameters_node_posts_append(pm_parameters_node_t *params, pm_node_t *param) {
5699 pm_parameters_node_location_set(params, param);
5700 pm_node_list_append(&params->posts, param);
5701}
5702
5706static void
5707pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
5708 pm_parameters_node_location_set(params, param);
5709 params->rest = param;
5710}
5711
5715static void
5716pm_parameters_node_keywords_append(pm_parameters_node_t *params, pm_node_t *param) {
5717 pm_parameters_node_location_set(params, param);
5718 pm_node_list_append(&params->keywords, param);
5719}
5720
5724static void
5725pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
5726 assert(params->keyword_rest == NULL);
5727 pm_parameters_node_location_set(params, param);
5728 params->keyword_rest = param;
5729}
5730
5734static void
5735pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_node_t *param) {
5736 assert(params->block == NULL);
5737 pm_parameters_node_location_set(params, UP(param));
5738 params->block = param;
5739}
5740
5744static pm_program_node_t *
5745pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
5746 pm_program_node_t *node = PM_NODE_ALLOC(parser, pm_program_node_t);
5747
5748 *node = (pm_program_node_t) {
5749 .base = PM_NODE_INIT_NODE(parser, PM_PROGRAM_NODE, 0, statements),
5750 .locals = *locals,
5751 .statements = statements
5752 };
5753
5754 return node;
5755}
5756
5760static pm_parentheses_node_t *
5761pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing, pm_node_flags_t flags) {
5762 pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t);
5763
5764 *node = (pm_parentheses_node_t) {
5765 .base = PM_NODE_INIT_TOKENS(parser, PM_PARENTHESES_NODE, flags, opening, closing),
5766 .body = body,
5767 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5768 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
5769 };
5770
5771 return node;
5772}
5773
5777static pm_pinned_expression_node_t *
5778pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
5779 pm_pinned_expression_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_expression_node_t);
5780
5781 *node = (pm_pinned_expression_node_t) {
5782 .base = PM_NODE_INIT_TOKENS(parser, PM_PINNED_EXPRESSION_NODE, 0, operator, rparen),
5783 .expression = expression,
5784 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5785 .lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen),
5786 .rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen)
5787 };
5788
5789 return node;
5790}
5791
5795static pm_pinned_variable_node_t *
5796pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
5797 pm_pinned_variable_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_variable_node_t);
5798
5799 *node = (pm_pinned_variable_node_t) {
5800 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_PINNED_VARIABLE_NODE, 0, operator, variable),
5801 .variable = variable,
5802 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5803 };
5804
5805 return node;
5806}
5807
5811static pm_post_execution_node_t *
5812pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
5813 pm_post_execution_node_t *node = PM_NODE_ALLOC(parser, pm_post_execution_node_t);
5814
5815 *node = (pm_post_execution_node_t) {
5816 .base = PM_NODE_INIT_TOKENS(parser, PM_POST_EXECUTION_NODE, 0, keyword, closing),
5817 .statements = statements,
5818 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5819 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5820 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
5821 };
5822
5823 return node;
5824}
5825
5829static pm_pre_execution_node_t *
5830pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
5831 pm_pre_execution_node_t *node = PM_NODE_ALLOC(parser, pm_pre_execution_node_t);
5832
5833 *node = (pm_pre_execution_node_t) {
5834 .base = PM_NODE_INIT_TOKENS(parser, PM_PRE_EXECUTION_NODE, 0, keyword, closing),
5835 .statements = statements,
5836 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5837 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5838 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
5839 };
5840
5841 return node;
5842}
5843
5847static pm_range_node_t *
5848pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
5849 pm_assert_value_expression(parser, left);
5850 pm_assert_value_expression(parser, right);
5851
5852 pm_range_node_t *node = PM_NODE_ALLOC(parser, pm_range_node_t);
5853 pm_node_flags_t flags = 0;
5854
5855 // Indicate that this node is an exclusive range if the operator is `...`.
5856 if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
5857 flags |= PM_RANGE_FLAGS_EXCLUDE_END;
5858 }
5859
5860 // Indicate that this node is a static literal (i.e., can be compiled with
5861 // a putobject in CRuby) if the left and right are implicit nil, explicit
5862 // nil, or integers.
5863 if (
5864 (left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
5865 (right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
5866 ) {
5867 flags |= PM_NODE_FLAG_STATIC_LITERAL;
5868 }
5869
5870 *node = (pm_range_node_t) {
5871 .base = PM_NODE_INIT(parser, PM_RANGE_NODE, flags, (left == NULL ? operator->start : left->location.start), (right == NULL ? operator->end : right->location.end)),
5872 .left = left,
5873 .right = right,
5874 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5875 };
5876
5877 return node;
5878}
5879
5883static pm_redo_node_t *
5884pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
5885 assert(token->type == PM_TOKEN_KEYWORD_REDO);
5886 pm_redo_node_t *node = PM_NODE_ALLOC(parser, pm_redo_node_t);
5887
5888 *node = (pm_redo_node_t) {
5889 .base = PM_NODE_INIT_TOKEN(parser, PM_REDO_NODE, 0, token)
5890 };
5891
5892 return node;
5893}
5894
5899static pm_regular_expression_node_t *
5900pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
5901 pm_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_regular_expression_node_t);
5902 pm_node_flags_t flags = pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL;
5903
5904 *node = (pm_regular_expression_node_t) {
5905 .base = PM_NODE_INIT_TOKENS(parser, PM_REGULAR_EXPRESSION_NODE, flags, opening, closing),
5906 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5907 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
5908 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
5909 .unescaped = *unescaped
5910 };
5911
5912 return node;
5913}
5914
5918static inline pm_regular_expression_node_t *
5919pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
5920 return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
5921}
5922
5926static pm_required_parameter_node_t *
5927pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
5928 pm_required_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_parameter_node_t);
5929
5930 *node = (pm_required_parameter_node_t) {
5931 .base = PM_NODE_INIT_TOKEN(parser, PM_REQUIRED_PARAMETER_NODE, 0, token),
5932 .name = pm_parser_constant_id_token(parser, token)
5933 };
5934
5935 return node;
5936}
5937
5941static pm_rescue_modifier_node_t *
5942pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
5943 pm_rescue_modifier_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_modifier_node_t);
5944
5945 *node = (pm_rescue_modifier_node_t) {
5946 .base = PM_NODE_INIT_NODES(parser, PM_RESCUE_MODIFIER_NODE, 0, expression, rescue_expression),
5947 .expression = expression,
5948 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5949 .rescue_expression = rescue_expression
5950 };
5951
5952 return node;
5953}
5954
5958static pm_rescue_node_t *
5959pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
5960 pm_rescue_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_node_t);
5961
5962 *node = (pm_rescue_node_t) {
5963 .base = PM_NODE_INIT_TOKEN(parser, PM_RESCUE_NODE, 0, keyword),
5964 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5965 .operator_loc = { 0 },
5966 .then_keyword_loc = { 0 },
5967 .reference = NULL,
5968 .statements = NULL,
5969 .subsequent = NULL,
5970 .exceptions = { 0 }
5971 };
5972
5973 return node;
5974}
5975
5976static inline void
5977pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) {
5978 node->operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
5979}
5980
5984static void
5985pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
5986 node->reference = reference;
5987 node->base.location.end = reference->location.end;
5988}
5989
5993static void
5994pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
5995 node->statements = statements;
5996 if (pm_statements_node_body_length(statements) > 0) {
5997 node->base.location.end = statements->base.location.end;
5998 }
5999}
6000
6004static void
6005pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
6006 node->subsequent = subsequent;
6007 node->base.location.end = subsequent->base.location.end;
6008}
6009
6013static void
6014pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) {
6015 pm_node_list_append(&node->exceptions, exception);
6016 node->base.location.end = exception->location.end;
6017}
6018
6022static pm_rest_parameter_node_t *
6023pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
6024 pm_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_rest_parameter_node_t);
6025
6026 *node = (pm_rest_parameter_node_t) {
6027 .base = (
6028 (name->type == PM_TOKEN_NOT_PROVIDED)
6029 ? PM_NODE_INIT_TOKEN(parser, PM_REST_PARAMETER_NODE, 0, operator)
6030 : PM_NODE_INIT_TOKENS(parser, PM_REST_PARAMETER_NODE, 0, operator, name)
6031 ),
6032 .name = pm_parser_optional_constant_id_token(parser, name),
6033 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
6034 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6035 };
6036
6037 return node;
6038}
6039
6043static pm_retry_node_t *
6044pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
6045 assert(token->type == PM_TOKEN_KEYWORD_RETRY);
6046 pm_retry_node_t *node = PM_NODE_ALLOC(parser, pm_retry_node_t);
6047
6048 *node = (pm_retry_node_t) {
6049 .base = PM_NODE_INIT_TOKEN(parser, PM_RETRY_NODE, 0, token)
6050 };
6051
6052 return node;
6053}
6054
6058static pm_return_node_t *
6059pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6060 pm_return_node_t *node = PM_NODE_ALLOC(parser, pm_return_node_t);
6061
6062 *node = (pm_return_node_t) {
6063 .base = (
6064 (arguments == NULL)
6065 ? PM_NODE_INIT_TOKEN(parser, PM_RETURN_NODE, 0, keyword)
6066 : PM_NODE_INIT_TOKEN_NODE(parser, PM_RETURN_NODE, 0, keyword, arguments)
6067 ),
6068 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6069 .arguments = arguments
6070 };
6071
6072 return node;
6073}
6074
6078static pm_self_node_t *
6079pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
6080 assert(token->type == PM_TOKEN_KEYWORD_SELF);
6081 pm_self_node_t *node = PM_NODE_ALLOC(parser, pm_self_node_t);
6082
6083 *node = (pm_self_node_t) {
6084 .base = PM_NODE_INIT_TOKEN(parser, PM_SELF_NODE, 0, token)
6085 };
6086
6087 return node;
6088}
6089
6093static pm_shareable_constant_node_t *
6094pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
6095 pm_shareable_constant_node_t *node = PM_NODE_ALLOC(parser, pm_shareable_constant_node_t);
6096
6097 *node = (pm_shareable_constant_node_t) {
6098 .base = PM_NODE_INIT_NODE(parser, PM_SHAREABLE_CONSTANT_NODE, (pm_node_flags_t) value, write),
6099 .write = write
6100 };
6101
6102 return node;
6103}
6104
6108static pm_singleton_class_node_t *
6109pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
6110 pm_singleton_class_node_t *node = PM_NODE_ALLOC(parser, pm_singleton_class_node_t);
6111
6112 *node = (pm_singleton_class_node_t) {
6113 .base = PM_NODE_INIT_TOKENS(parser, PM_SINGLETON_CLASS_NODE, 0, class_keyword, end_keyword),
6114 .locals = *locals,
6115 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
6116 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6117 .expression = expression,
6118 .body = body,
6119 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
6120 };
6121
6122 return node;
6123}
6124
6128static pm_source_encoding_node_t *
6129pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
6130 assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
6131 pm_source_encoding_node_t *node = PM_NODE_ALLOC(parser, pm_source_encoding_node_t);
6132
6133 *node = (pm_source_encoding_node_t) {
6134 .base = PM_NODE_INIT_TOKEN(parser, PM_SOURCE_ENCODING_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
6135 };
6136
6137 return node;
6138}
6139
6143static pm_source_file_node_t*
6144pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
6145 pm_source_file_node_t *node = PM_NODE_ALLOC(parser, pm_source_file_node_t);
6146 assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
6147
6148 pm_node_flags_t flags = 0;
6149
6150 switch (parser->frozen_string_literal) {
6151 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6152 flags |= PM_STRING_FLAGS_MUTABLE;
6153 break;
6154 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6155 flags |= PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6156 break;
6157 }
6158
6159 *node = (pm_source_file_node_t) {
6160 .base = PM_NODE_INIT_TOKEN(parser, PM_SOURCE_FILE_NODE, flags, file_keyword),
6161 .filepath = parser->filepath
6162 };
6163
6164 return node;
6165}
6166
6170static pm_source_line_node_t *
6171pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
6172 assert(token->type == PM_TOKEN_KEYWORD___LINE__);
6173 pm_source_line_node_t *node = PM_NODE_ALLOC(parser, pm_source_line_node_t);
6174
6175 *node = (pm_source_line_node_t) {
6176 .base = PM_NODE_INIT_TOKEN(parser, PM_SOURCE_LINE_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
6177 };
6178
6179 return node;
6180}
6181
6185static pm_splat_node_t *
6186pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
6187 pm_splat_node_t *node = PM_NODE_ALLOC(parser, pm_splat_node_t);
6188
6189 *node = (pm_splat_node_t) {
6190 .base = (
6191 (expression == NULL)
6192 ? PM_NODE_INIT_TOKEN(parser, PM_SPLAT_NODE, 0, operator)
6193 : PM_NODE_INIT_TOKEN_NODE(parser, PM_SPLAT_NODE, 0, operator, expression)
6194 ),
6195 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6196 .expression = expression
6197 };
6198
6199 return node;
6200}
6201
6205static pm_statements_node_t *
6206pm_statements_node_create(pm_parser_t *parser) {
6207 pm_statements_node_t *node = PM_NODE_ALLOC(parser, pm_statements_node_t);
6208
6209 *node = (pm_statements_node_t) {
6210 .base = PM_NODE_INIT_BASE(parser, PM_STATEMENTS_NODE, 0),
6211 .body = { 0 }
6212 };
6213
6214 return node;
6215}
6216
6220static size_t
6221pm_statements_node_body_length(pm_statements_node_t *node) {
6222 return node && node->body.size;
6223}
6224
6228static void
6229pm_statements_node_location_set(pm_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
6230 node->base.location = (pm_location_t) { .start = start, .end = end };
6231}
6232
6237static inline void
6238pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
6239 if (pm_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
6240 node->base.location.start = statement->location.start;
6241 }
6242
6243 if (statement->location.end > node->base.location.end) {
6244 node->base.location.end = statement->location.end;
6245 }
6246}
6247
6251static void
6252pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline) {
6253 pm_statements_node_body_update(node, statement);
6254
6255 if (node->body.size > 0) {
6256 const pm_node_t *previous = node->body.nodes[node->body.size - 1];
6257
6258 switch (PM_NODE_TYPE(previous)) {
6259 case PM_BREAK_NODE:
6260 case PM_NEXT_NODE:
6261 case PM_REDO_NODE:
6262 case PM_RETRY_NODE:
6263 case PM_RETURN_NODE:
6264 pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
6265 break;
6266 default:
6267 break;
6268 }
6269 }
6270
6271 pm_node_list_append(&node->body, statement);
6272 if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
6273}
6274
6278static void
6279pm_statements_node_body_prepend(pm_statements_node_t *node, pm_node_t *statement) {
6280 pm_statements_node_body_update(node, statement);
6281 pm_node_list_prepend(&node->body, statement);
6282 pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
6283}
6284
6288static inline pm_string_node_t *
6289pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
6290 pm_string_node_t *node = PM_NODE_ALLOC(parser, pm_string_node_t);
6291 pm_node_flags_t flags = 0;
6292
6293 switch (parser->frozen_string_literal) {
6294 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6295 flags = PM_STRING_FLAGS_MUTABLE;
6296 break;
6297 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6298 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6299 break;
6300 }
6301
6302 const uint8_t *start = (opening->type == PM_TOKEN_NOT_PROVIDED ? content->start : opening->start);
6303 const uint8_t *end = (closing->type == PM_TOKEN_NOT_PROVIDED ? content->end : closing->end);
6304
6305 *node = (pm_string_node_t) {
6306 .base = PM_NODE_INIT(parser, PM_STRING_NODE, flags, start, end),
6307 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
6308 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
6309 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
6310 .unescaped = *string
6311 };
6312
6313 return node;
6314}
6315
6319static pm_string_node_t *
6320pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6321 return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6322}
6323
6328static pm_string_node_t *
6329pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6330 pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
6331 parser->current_string = PM_STRING_EMPTY;
6332 return node;
6333}
6334
6338static pm_super_node_t *
6339pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
6340 assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
6341 pm_super_node_t *node = PM_NODE_ALLOC(parser, pm_super_node_t);
6342
6343 const uint8_t *end = pm_arguments_end(arguments);
6344 if (end == NULL) {
6345 assert(false && "unreachable");
6346 }
6347
6348 *node = (pm_super_node_t) {
6349 .base = PM_NODE_INIT(parser, PM_SUPER_NODE, 0, keyword->start, end),
6350 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6351 .lparen_loc = arguments->opening_loc,
6352 .arguments = arguments->arguments,
6353 .rparen_loc = arguments->closing_loc,
6354 .block = arguments->block
6355 };
6356
6357 return node;
6358}
6359
6364static bool
6365pm_ascii_only_p(const pm_string_t *contents) {
6366 const size_t length = pm_string_length(contents);
6367 const uint8_t *source = pm_string_source(contents);
6368
6369 for (size_t index = 0; index < length; index++) {
6370 if (source[index] & 0x80) return false;
6371 }
6372
6373 return true;
6374}
6375
6379static void
6380parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
6381 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
6382 size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
6383
6384 if (width == 0) {
6385 pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
6386 break;
6387 }
6388
6389 cursor += width;
6390 }
6391}
6392
6397static void
6398parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
6399 const pm_encoding_t *encoding = parser->encoding;
6400
6401 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
6402 size_t width = encoding->char_width(cursor, end - cursor);
6403
6404 if (width == 0) {
6405 pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
6406 break;
6407 }
6408
6409 cursor += width;
6410 }
6411}
6412
6422static inline pm_node_flags_t
6423parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
6424 if (parser->explicit_encoding != NULL) {
6425 // A Symbol may optionally have its encoding explicitly set. This will
6426 // happen if an escape sequence results in a non-ASCII code point.
6427 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
6428 if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
6429 return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
6430 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6431 return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
6432 } else if (validate) {
6433 parse_symbol_encoding_validate_other(parser, location, contents);
6434 }
6435 } else if (pm_ascii_only_p(contents)) {
6436 // Ruby stipulates that all source files must use an ASCII-compatible
6437 // encoding. Thus, all symbols appearing in source are eligible for
6438 // "downgrading" to US-ASCII.
6439 return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
6440 } else if (validate) {
6441 parse_symbol_encoding_validate_other(parser, location, contents);
6442 }
6443
6444 return 0;
6445}
6446
6447static pm_node_flags_t
6448parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding) {
6449 assert ((modifier == 'n' && modifier_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) ||
6450 (modifier == 'u' && modifier_encoding == PM_ENCODING_UTF_8_ENTRY) ||
6451 (modifier == 'e' && modifier_encoding == PM_ENCODING_EUC_JP_ENTRY) ||
6452 (modifier == 's' && modifier_encoding == PM_ENCODING_WINDOWS_31J_ENTRY));
6453
6454 // There's special validation logic used if a string does not contain any character escape sequences.
6455 if (parser->explicit_encoding == NULL) {
6456 // If an ASCII-only string without character escapes is used with an encoding modifier, then resulting Regexp
6457 // has the modifier encoding, unless the ASCII-8BIT modifier is used, in which case the Regexp "downgrades" to
6458 // the US-ASCII encoding.
6459 if (ascii_only) {
6460 return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags;
6461 }
6462
6463 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6464 if (!ascii_only) {
6465 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
6466 }
6467 } else if (parser->encoding != modifier_encoding) {
6468 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name);
6469
6470 if (modifier == 'n' && !ascii_only) {
6471 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source));
6472 }
6473 }
6474
6475 return flags;
6476 }
6477
6478 // TODO (nirvdrum 21-Feb-2024): To validate regexp sources with character escape sequences we need to know whether hex or Unicode escape sequences were used and Prism doesn't currently provide that data. We handle a subset of unambiguous cases in the meanwhile.
6479 bool mixed_encoding = false;
6480
6481 if (mixed_encoding) {
6482 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
6483 } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
6484 // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily.
6485 bool valid_string_in_modifier_encoding = true;
6486
6487 if (!valid_string_in_modifier_encoding) {
6488 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
6489 }
6490 } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
6491 // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now.
6492 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
6493 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source));
6494 }
6495 }
6496
6497 // We've determined the encoding would naturally be EUC-JP and there is no need to force the encoding to anything else.
6498 return flags;
6499}
6500
6507static pm_node_flags_t
6508parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags) {
6509 // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report.
6510 bool valid_unicode_range = true;
6511 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) {
6512 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source));
6513 return flags;
6514 }
6515
6516 // US-ASCII strings do not admit multi-byte character literals. However, character escape sequences corresponding
6517 // to multi-byte characters are allowed.
6518 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) {
6519 // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the
6520 // following error message appearing twice. We do the same for compatibility.
6521 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
6522 }
6523
6532 if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
6533 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY);
6534 }
6535
6536 if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
6537 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY);
6538 }
6539
6540 if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
6541 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY);
6542 }
6543
6544 if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
6545 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY);
6546 }
6547
6548 // At this point no encoding modifiers will be present on the regular expression as they would have already
6549 // been processed. Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all
6550 // regular expressions without an encoding modifier appearing in source are eligible for "downgrading" to US-ASCII.
6551 if (ascii_only) {
6552 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
6553 }
6554
6555 // A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string
6556 // or by specifying a modifier.
6557 //
6558 // NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points.
6559 if (parser->explicit_encoding != NULL) {
6560 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
6561 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
6562 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6563 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
6564 }
6565 }
6566
6567 return 0;
6568}
6569
6574static pm_symbol_node_t *
6575pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
6576 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
6577
6578 const uint8_t *start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start);
6579 const uint8_t *end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end);
6580
6581 *node = (pm_symbol_node_t) {
6582 .base = PM_NODE_INIT(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL | flags, start, end),
6583 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
6584 .value_loc = PM_LOCATION_TOKEN_VALUE(value),
6585 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
6586 .unescaped = *unescaped
6587 };
6588
6589 return node;
6590}
6591
6595static inline pm_symbol_node_t *
6596pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
6597 return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
6598}
6599
6603static pm_symbol_node_t *
6604pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
6605 pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
6606 parser->current_string = PM_STRING_EMPTY;
6607 return node;
6608}
6609
6613static pm_symbol_node_t *
6614pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
6615 pm_symbol_node_t *node;
6616
6617 switch (token->type) {
6618 case PM_TOKEN_LABEL: {
6619 pm_token_t opening = not_provided(parser);
6620 pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
6621
6622 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
6623 node = pm_symbol_node_create(parser, &opening, &label, &closing);
6624
6625 assert((label.end - label.start) >= 0);
6626 pm_string_shared_init(&node->unescaped, label.start, label.end);
6627 pm_node_flag_set(UP(node), parse_symbol_encoding(parser, &label, &node->unescaped, false));
6628
6629 break;
6630 }
6631 case PM_TOKEN_MISSING: {
6632 pm_token_t opening = not_provided(parser);
6633 pm_token_t closing = not_provided(parser);
6634
6635 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end };
6636 node = pm_symbol_node_create(parser, &opening, &label, &closing);
6637 break;
6638 }
6639 default:
6640 assert(false && "unreachable");
6641 node = NULL;
6642 break;
6643 }
6644
6645 return node;
6646}
6647
6651static pm_symbol_node_t *
6652pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
6653 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
6654
6655 *node = (pm_symbol_node_t) {
6656 .base = PM_NODE_INIT_BASE(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING),
6657 .value_loc = PM_LOCATION_NULL_VALUE(parser),
6658 .unescaped = { 0 }
6659 };
6660
6661 pm_string_constant_init(&node->unescaped, content, strlen(content));
6662 return node;
6663}
6664
6668static bool
6669pm_symbol_node_label_p(pm_node_t *node) {
6670 const uint8_t *end = NULL;
6671
6672 switch (PM_NODE_TYPE(node)) {
6673 case PM_SYMBOL_NODE:
6674 end = ((pm_symbol_node_t *) node)->closing_loc.end;
6675 break;
6676 case PM_INTERPOLATED_SYMBOL_NODE:
6677 end = ((pm_interpolated_symbol_node_t *) node)->closing_loc.end;
6678 break;
6679 default:
6680 return false;
6681 }
6682
6683 return (end != NULL) && (end[-1] == ':');
6684}
6685
6689static pm_symbol_node_t *
6690pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
6691 pm_symbol_node_t *new_node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
6692
6693 *new_node = (pm_symbol_node_t) {
6694 .base = PM_NODE_INIT_TOKENS(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening, closing),
6695 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
6696 .value_loc = node->content_loc,
6697 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
6698 .unescaped = node->unescaped
6699 };
6700
6701 pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
6702 pm_node_flag_set(UP(new_node), parse_symbol_encoding(parser, &content, &node->unescaped, true));
6703
6704 // We are explicitly _not_ using pm_node_destroy here because we don't want
6705 // to trash the unescaped string. We could instead copy the string if we
6706 // know that it is owned, but we're taking the fast path for now.
6707 xfree(node);
6708
6709 return new_node;
6710}
6711
6715static pm_string_node_t *
6716pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
6717 pm_string_node_t *new_node = PM_NODE_ALLOC(parser, pm_string_node_t);
6718 pm_node_flags_t flags = 0;
6719
6720 switch (parser->frozen_string_literal) {
6721 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6722 flags = PM_STRING_FLAGS_MUTABLE;
6723 break;
6724 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6725 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6726 break;
6727 }
6728
6729 *new_node = (pm_string_node_t) {
6730 .base = PM_NODE_INIT_NODE(parser, PM_STRING_NODE, flags, node),
6731 .opening_loc = node->opening_loc,
6732 .content_loc = node->value_loc,
6733 .closing_loc = node->closing_loc,
6734 .unescaped = node->unescaped
6735 };
6736
6737 // We are explicitly _not_ using pm_node_destroy here because we don't want
6738 // to trash the unescaped string. We could instead copy the string if we
6739 // know that it is owned, but we're taking the fast path for now.
6740 xfree(node);
6741
6742 return new_node;
6743}
6744
6748static pm_true_node_t *
6749pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
6750 assert(token->type == PM_TOKEN_KEYWORD_TRUE);
6751 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
6752
6753 *node = (pm_true_node_t) {
6754 .base = PM_NODE_INIT_TOKEN(parser, PM_TRUE_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
6755 };
6756
6757 return node;
6758}
6759
6763static pm_true_node_t *
6764pm_true_node_synthesized_create(pm_parser_t *parser) {
6765 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
6766
6767 *node = (pm_true_node_t) {
6768 .base = PM_NODE_INIT_BASE(parser, PM_TRUE_NODE, PM_NODE_FLAG_STATIC_LITERAL)
6769 };
6770
6771 return node;
6772}
6773
6777static pm_undef_node_t *
6778pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
6779 assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
6780 pm_undef_node_t *node = PM_NODE_ALLOC(parser, pm_undef_node_t);
6781
6782 *node = (pm_undef_node_t) {
6783 .base = PM_NODE_INIT_TOKEN(parser, PM_UNDEF_NODE, 0, token),
6784 .keyword_loc = PM_LOCATION_TOKEN_VALUE(token),
6785 .names = { 0 }
6786 };
6787
6788 return node;
6789}
6790
6794static void
6795pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
6796 node->base.location.end = name->location.end;
6797 pm_node_list_append(&node->names, name);
6798}
6799
6803static pm_unless_node_t *
6804pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
6805 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6806
6807 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
6808 pm_node_t *end = statements == NULL ? predicate : UP(statements);
6809
6810 *node = (pm_unless_node_t) {
6811 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_UNLESS_NODE, PM_NODE_FLAG_NEWLINE, keyword, end),
6812 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6813 .predicate = predicate,
6814 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
6815 .statements = statements,
6816 .else_clause = NULL,
6817 .end_keyword_loc = { 0 }
6818 };
6819
6820 return node;
6821}
6822
6826static pm_unless_node_t *
6827pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
6828 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6829 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
6830
6831 pm_statements_node_t *statements = pm_statements_node_create(parser);
6832 pm_statements_node_body_append(parser, statements, statement, true);
6833
6834 *node = (pm_unless_node_t) {
6835 .base = PM_NODE_INIT_NODES(parser, PM_UNLESS_NODE, PM_NODE_FLAG_NEWLINE, statement, predicate),
6836 .keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword),
6837 .predicate = predicate,
6838 .then_keyword_loc = { 0 },
6839 .statements = statements,
6840 .else_clause = NULL,
6841 .end_keyword_loc = { 0 }
6842 };
6843
6844 return node;
6845}
6846
6847static inline void
6848pm_unless_node_end_keyword_loc_set(pm_unless_node_t *node, const pm_token_t *end_keyword) {
6849 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
6850 node->base.location.end = end_keyword->end;
6851}
6852
6858static void
6859pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statements) {
6860 assert(parser->current_block_exits != NULL);
6861
6862 // All of the block exits that we want to remove should be within the
6863 // statements, and since we are modifying the statements, we shouldn't have
6864 // to check the end location.
6865 const uint8_t *start = statements->base.location.start;
6866
6867 for (size_t index = parser->current_block_exits->size; index > 0; index--) {
6868 pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
6869 if (block_exit->location.start < start) break;
6870
6871 // Implicitly remove from the list by lowering the size.
6872 parser->current_block_exits->size--;
6873 }
6874}
6875
6879static pm_until_node_t *
6880pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6881 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
6882 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6883
6884 *node = (pm_until_node_t) {
6885 .base = PM_NODE_INIT_TOKENS(parser, PM_UNTIL_NODE, flags, keyword, closing),
6886 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6887 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
6888 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
6889 .predicate = predicate,
6890 .statements = statements
6891 };
6892
6893 return node;
6894}
6895
6899static pm_until_node_t *
6900pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6901 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
6902 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6903 pm_loop_modifier_block_exits(parser, statements);
6904
6905 *node = (pm_until_node_t) {
6906 .base = PM_NODE_INIT_NODES(parser, PM_UNTIL_NODE, flags, statements, predicate),
6907 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6908 .do_keyword_loc = { 0 },
6909 .closing_loc = { 0 },
6910 .predicate = predicate,
6911 .statements = statements
6912 };
6913
6914 return node;
6915}
6916
6920static pm_when_node_t *
6921pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
6922 pm_when_node_t *node = PM_NODE_ALLOC(parser, pm_when_node_t);
6923
6924 *node = (pm_when_node_t) {
6925 .base = PM_NODE_INIT_TOKEN(parser, PM_WHEN_NODE, 0, keyword),
6926 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6927 .statements = NULL,
6928 .then_keyword_loc = { 0 },
6929 .conditions = { 0 }
6930 };
6931
6932 return node;
6933}
6934
6938static void
6939pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) {
6940 node->base.location.end = condition->location.end;
6941 pm_node_list_append(&node->conditions, condition);
6942}
6943
6947static inline void
6948pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_keyword) {
6949 node->base.location.end = then_keyword->end;
6950 node->then_keyword_loc = PM_LOCATION_TOKEN_VALUE(then_keyword);
6951}
6952
6956static void
6957pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
6958 if (statements->base.location.end > node->base.location.end) {
6959 node->base.location.end = statements->base.location.end;
6960 }
6961
6962 node->statements = statements;
6963}
6964
6968static pm_while_node_t *
6969pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6970 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
6971 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6972
6973 *node = (pm_while_node_t) {
6974 .base = PM_NODE_INIT_TOKENS(parser, PM_WHILE_NODE, flags, keyword, closing),
6975 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6976 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
6977 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
6978 .predicate = predicate,
6979 .statements = statements
6980 };
6981
6982 return node;
6983}
6984
6988static pm_while_node_t *
6989pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6990 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
6991 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6992 pm_loop_modifier_block_exits(parser, statements);
6993
6994 *node = (pm_while_node_t) {
6995 .base = PM_NODE_INIT_NODES(parser, PM_WHILE_NODE, flags, statements, predicate),
6996 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6997 .do_keyword_loc = { 0 },
6998 .closing_loc = { 0 },
6999 .predicate = predicate,
7000 .statements = statements
7001 };
7002
7003 return node;
7004}
7005
7009static pm_while_node_t *
7010pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
7011 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7012
7013 *node = (pm_while_node_t) {
7014 .base = PM_NODE_INIT_BASE(parser, PM_WHILE_NODE, 0),
7015 .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7016 .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7017 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
7018 .predicate = predicate,
7019 .statements = statements
7020 };
7021
7022 return node;
7023}
7024
7029static pm_x_string_node_t *
7030pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
7031 pm_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_x_string_node_t);
7032
7033 *node = (pm_x_string_node_t) {
7034 .base = PM_NODE_INIT_TOKENS(parser, PM_X_STRING_NODE, PM_STRING_FLAGS_FROZEN, opening, closing),
7035 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
7036 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7037 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
7038 .unescaped = *unescaped
7039 };
7040
7041 return node;
7042}
7043
7047static inline pm_x_string_node_t *
7048pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7049 return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7050}
7051
7055static pm_yield_node_t *
7056pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
7057 pm_yield_node_t *node = PM_NODE_ALLOC(parser, pm_yield_node_t);
7058
7059 const uint8_t *end;
7060 if (rparen_loc->start != NULL) {
7061 end = rparen_loc->end;
7062 } else if (arguments != NULL) {
7063 end = arguments->base.location.end;
7064 } else if (lparen_loc->start != NULL) {
7065 end = lparen_loc->end;
7066 } else {
7067 end = keyword->end;
7068 }
7069
7070 *node = (pm_yield_node_t) {
7071 .base = PM_NODE_INIT(parser, PM_YIELD_NODE, 0, keyword->start, end),
7072 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7073 .lparen_loc = *lparen_loc,
7074 .arguments = arguments,
7075 .rparen_loc = *rparen_loc
7076 };
7077
7078 return node;
7079}
7080
7085static int
7086pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
7087 pm_scope_t *scope = parser->current_scope;
7088 int depth = 0;
7089
7090 while (scope != NULL) {
7091 if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
7092 if (scope->closed) break;
7093
7094 scope = scope->previous;
7095 depth++;
7096 }
7097
7098 return -1;
7099}
7100
7106static inline int
7107pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
7108 return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
7109}
7110
7114static inline void
7115pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7116 pm_locals_write(&parser->current_scope->locals, constant_id, start, end, reads);
7117}
7118
7122static pm_constant_id_t
7123pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7124 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, start, end);
7125 if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
7126 return constant_id;
7127}
7128
7132static inline pm_constant_id_t
7133pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
7134 return pm_parser_local_add_location(parser, token->start, token->end, reads);
7135}
7136
7140static pm_constant_id_t
7141pm_parser_local_add_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
7142 pm_constant_id_t constant_id = pm_parser_constant_id_owned(parser, start, length);
7143 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
7144 return constant_id;
7145}
7146
7150static pm_constant_id_t
7151pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
7152 pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
7153 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
7154 return constant_id;
7155}
7156
7164static bool
7165pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
7166 // We want to check whether the parameter name is a numbered parameter or
7167 // not.
7168 pm_refute_numbered_parameter(parser, name->start, name->end);
7169
7170 // Otherwise we'll fetch the constant id for the parameter name and check
7171 // whether it's already in the current scope.
7172 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
7173
7174 if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
7175 // Add an error if the parameter doesn't start with _ and has been seen before
7176 if ((name->start < name->end) && (*name->start != '_')) {
7177 pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
7178 }
7179 return true;
7180 }
7181 return false;
7182}
7183
7187static void
7188pm_parser_scope_pop(pm_parser_t *parser) {
7189 pm_scope_t *scope = parser->current_scope;
7190 parser->current_scope = scope->previous;
7191 pm_locals_free(&scope->locals);
7192 pm_node_list_free(&scope->implicit_parameters);
7193 xfree(scope);
7194}
7195
7196/******************************************************************************/
7197/* Stack helpers */
7198/******************************************************************************/
7199
7203static inline void
7204pm_state_stack_push(pm_state_stack_t *stack, bool value) {
7205 *stack = (*stack << 1) | (value & 1);
7206}
7207
7211static inline void
7212pm_state_stack_pop(pm_state_stack_t *stack) {
7213 *stack >>= 1;
7214}
7215
7219static inline bool
7220pm_state_stack_p(const pm_state_stack_t *stack) {
7221 return *stack & 1;
7222}
7223
7224static inline void
7225pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
7226 // Use the negation of the value to prevent stack overflow.
7227 pm_state_stack_push(&parser->accepts_block_stack, !value);
7228}
7229
7230static inline void
7231pm_accepts_block_stack_pop(pm_parser_t *parser) {
7232 pm_state_stack_pop(&parser->accepts_block_stack);
7233}
7234
7235static inline bool
7236pm_accepts_block_stack_p(pm_parser_t *parser) {
7237 return !pm_state_stack_p(&parser->accepts_block_stack);
7238}
7239
7240static inline void
7241pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
7242 pm_state_stack_push(&parser->do_loop_stack, value);
7243}
7244
7245static inline void
7246pm_do_loop_stack_pop(pm_parser_t *parser) {
7247 pm_state_stack_pop(&parser->do_loop_stack);
7248}
7249
7250static inline bool
7251pm_do_loop_stack_p(pm_parser_t *parser) {
7252 return pm_state_stack_p(&parser->do_loop_stack);
7253}
7254
7255/******************************************************************************/
7256/* Lexer check helpers */
7257/******************************************************************************/
7258
7263static inline uint8_t
7264peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
7265 if (cursor < parser->end) {
7266 return *cursor;
7267 } else {
7268 return '\0';
7269 }
7270}
7271
7277static inline uint8_t
7278peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
7279 return peek_at(parser, parser->current.end + offset);
7280}
7281
7286static inline uint8_t
7287peek(const pm_parser_t *parser) {
7288 return peek_at(parser, parser->current.end);
7289}
7290
7295static inline bool
7296match(pm_parser_t *parser, uint8_t value) {
7297 if (peek(parser) == value) {
7298 parser->current.end++;
7299 return true;
7300 }
7301 return false;
7302}
7303
7308static inline size_t
7309match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
7310 if (peek_at(parser, cursor) == '\n') {
7311 return 1;
7312 }
7313 if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
7314 return 2;
7315 }
7316 return 0;
7317}
7318
7324static inline size_t
7325match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
7326 return match_eol_at(parser, parser->current.end + offset);
7327}
7328
7334static inline size_t
7335match_eol(pm_parser_t *parser) {
7336 return match_eol_at(parser, parser->current.end);
7337}
7338
7342static inline const uint8_t *
7343next_newline(const uint8_t *cursor, ptrdiff_t length) {
7344 assert(length >= 0);
7345
7346 // Note that it's okay for us to use memchr here to look for \n because none
7347 // of the encodings that we support have \n as a component of a multi-byte
7348 // character.
7349 return memchr(cursor, '\n', (size_t) length);
7350}
7351
7355static inline bool
7356ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
7357 return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
7358}
7359
7364static bool
7365parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
7366 const pm_encoding_t *encoding = pm_encoding_find(start, end);
7367
7368 if (encoding != NULL) {
7369 if (parser->encoding != encoding) {
7370 parser->encoding = encoding;
7371 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
7372 }
7373
7374 parser->encoding_changed = (encoding != PM_ENCODING_UTF_8_ENTRY);
7375 return true;
7376 }
7377
7378 return false;
7379}
7380
7385static void
7386parser_lex_magic_comment_encoding(pm_parser_t *parser) {
7387 const uint8_t *cursor = parser->current.start + 1;
7388 const uint8_t *end = parser->current.end;
7389
7390 bool separator = false;
7391 while (true) {
7392 if (end - cursor <= 6) return;
7393 switch (cursor[6]) {
7394 case 'C': case 'c': cursor += 6; continue;
7395 case 'O': case 'o': cursor += 5; continue;
7396 case 'D': case 'd': cursor += 4; continue;
7397 case 'I': case 'i': cursor += 3; continue;
7398 case 'N': case 'n': cursor += 2; continue;
7399 case 'G': case 'g': cursor += 1; continue;
7400 case '=': case ':':
7401 separator = true;
7402 cursor += 6;
7403 break;
7404 default:
7405 cursor += 6;
7406 if (pm_char_is_whitespace(*cursor)) break;
7407 continue;
7408 }
7409 if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
7410 separator = false;
7411 }
7412
7413 while (true) {
7414 do {
7415 if (++cursor >= end) return;
7416 } while (pm_char_is_whitespace(*cursor));
7417
7418 if (separator) break;
7419 if (*cursor != '=' && *cursor != ':') return;
7420
7421 separator = true;
7422 cursor++;
7423 }
7424
7425 const uint8_t *value_start = cursor;
7426 while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
7427
7428 if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
7429 // If we were unable to parse the encoding value, then we've got an
7430 // issue because we didn't understand the encoding that the user was
7431 // trying to use. In this case we'll keep using the default encoding but
7432 // add an error to the parser to indicate an unsuccessful parse.
7433 pm_parser_err(parser, value_start, cursor, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
7434 }
7435}
7436
7437typedef enum {
7438 PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE,
7439 PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE,
7440 PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID
7441} pm_magic_comment_boolean_value_t;
7442
7447static pm_magic_comment_boolean_value_t
7448parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t value_length) {
7449 if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "true", 4) == 0) {
7450 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE;
7451 } else if (value_length == 5 && pm_strncasecmp(value_start, (const uint8_t *) "false", 5) == 0) {
7452 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE;
7453 } else {
7454 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID;
7455 }
7456}
7457
7458static inline bool
7459pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
7460 return b == '\'' || b == '"' || b == ':' || b == ';';
7461}
7462
7468static inline const uint8_t *
7469parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
7470 while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
7471 if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
7472 return cursor;
7473 }
7474 cursor++;
7475 }
7476 return NULL;
7477}
7478
7489static inline bool
7490parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
7491 bool result = true;
7492
7493 const uint8_t *start = parser->current.start + 1;
7494 const uint8_t *end = parser->current.end;
7495 if (end - start <= 7) return false;
7496
7497 const uint8_t *cursor;
7498 bool indicator = false;
7499
7500 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
7501 start = cursor + 3;
7502
7503 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
7504 end = cursor;
7505 indicator = true;
7506 } else {
7507 // If we have a start marker but not an end marker, then we cannot
7508 // have a magic comment.
7509 return false;
7510 }
7511 }
7512
7513 cursor = start;
7514 while (cursor < end) {
7515 while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
7516
7517 const uint8_t *key_start = cursor;
7518 while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
7519
7520 const uint8_t *key_end = cursor;
7521 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7522 if (cursor == end) break;
7523
7524 if (*cursor == ':') {
7525 cursor++;
7526 } else {
7527 if (!indicator) return false;
7528 continue;
7529 }
7530
7531 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7532 if (cursor == end) break;
7533
7534 const uint8_t *value_start;
7535 const uint8_t *value_end;
7536
7537 if (*cursor == '"') {
7538 value_start = ++cursor;
7539 for (; cursor < end && *cursor != '"'; cursor++) {
7540 if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
7541 }
7542 value_end = cursor;
7543 if (cursor < end && *cursor == '"') cursor++;
7544 } else {
7545 value_start = cursor;
7546 while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
7547 value_end = cursor;
7548 }
7549
7550 if (indicator) {
7551 while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
7552 } else {
7553 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7554 if (cursor != end) return false;
7555 }
7556
7557 // Here, we need to do some processing on the key to swap out dashes for
7558 // underscores. We only need to do this if there _is_ a dash in the key.
7559 pm_string_t key;
7560 const size_t key_length = (size_t) (key_end - key_start);
7561 const uint8_t *dash = pm_memchr(key_start, '-', key_length, parser->encoding_changed, parser->encoding);
7562
7563 if (dash == NULL) {
7564 pm_string_shared_init(&key, key_start, key_end);
7565 } else {
7566 uint8_t *buffer = xmalloc(key_length);
7567 if (buffer == NULL) break;
7568
7569 memcpy(buffer, key_start, key_length);
7570 buffer[dash - key_start] = '_';
7571
7572 while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
7573 buffer[dash - key_start] = '_';
7574 }
7575
7576 pm_string_owned_init(&key, buffer, key_length);
7577 }
7578
7579 // Finally, we can start checking the key against the list of known
7580 // magic comment keys, and potentially change state based on that.
7581 const uint8_t *key_source = pm_string_source(&key);
7582 uint32_t value_length = (uint32_t) (value_end - value_start);
7583
7584 // We only want to attempt to compare against encoding comments if it's
7585 // the first line in the file (or the second in the case of a shebang).
7586 if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
7587 if (
7588 (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
7589 (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
7590 ) {
7591 result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
7592 }
7593 }
7594
7595 if (key_length == 11) {
7596 if (pm_strncasecmp(key_source, (const uint8_t *) "warn_indent", 11) == 0) {
7597 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
7598 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
7599 PM_PARSER_WARN_TOKEN_FORMAT(
7600 parser,
7601 parser->current,
7602 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7603 (int) key_length,
7604 (const char *) key_source,
7605 (int) value_length,
7606 (const char *) value_start
7607 );
7608 break;
7609 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
7610 parser->warn_mismatched_indentation = false;
7611 break;
7612 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
7613 parser->warn_mismatched_indentation = true;
7614 break;
7615 }
7616 }
7617 } else if (key_length == 21) {
7618 if (pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
7619 // We only want to handle frozen string literal comments if it's
7620 // before any semantic tokens have been seen.
7621 if (semantic_token_seen) {
7622 pm_parser_warn_token(parser, &parser->current, PM_WARN_IGNORED_FROZEN_STRING_LITERAL);
7623 } else {
7624 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
7625 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
7626 PM_PARSER_WARN_TOKEN_FORMAT(
7627 parser,
7628 parser->current,
7629 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7630 (int) key_length,
7631 (const char *) key_source,
7632 (int) value_length,
7633 (const char *) value_start
7634 );
7635 break;
7636 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
7638 break;
7639 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
7641 break;
7642 }
7643 }
7644 }
7645 } else if (key_length == 24) {
7646 if (pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
7647 const uint8_t *cursor = parser->current.start;
7648 while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
7649
7650 if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
7651 pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
7652 } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
7653 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
7654 } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
7655 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
7656 } else if (value_length == 23 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_everything", 23) == 0) {
7657 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING);
7658 } else if (value_length == 17 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_copy", 17) == 0) {
7659 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY);
7660 } else {
7661 PM_PARSER_WARN_TOKEN_FORMAT(
7662 parser,
7663 parser->current,
7664 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7665 (int) key_length,
7666 (const char *) key_source,
7667 (int) value_length,
7668 (const char *) value_start
7669 );
7670 }
7671 }
7672 }
7673
7674 // When we're done, we want to free the string in case we had to
7675 // allocate memory for it.
7676 pm_string_free(&key);
7677
7678 // Allocate a new magic comment node to append to the parser's list.
7680 if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) {
7681 magic_comment->key_start = key_start;
7682 magic_comment->value_start = value_start;
7683 magic_comment->key_length = (uint32_t) key_length;
7684 magic_comment->value_length = value_length;
7685 pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
7686 }
7687 }
7688
7689 return result;
7690}
7691
7692/******************************************************************************/
7693/* Context manipulations */
7694/******************************************************************************/
7695
7696static const uint32_t context_terminators[] = {
7697 [PM_CONTEXT_NONE] = 0,
7698 [PM_CONTEXT_BEGIN] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7699 [PM_CONTEXT_BEGIN_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7700 [PM_CONTEXT_BEGIN_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7701 [PM_CONTEXT_BEGIN_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7702 [PM_CONTEXT_BLOCK_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
7703 [PM_CONTEXT_BLOCK_KEYWORDS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7704 [PM_CONTEXT_BLOCK_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7705 [PM_CONTEXT_BLOCK_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7706 [PM_CONTEXT_BLOCK_PARAMETERS] = (1U << PM_TOKEN_PIPE),
7707 [PM_CONTEXT_BLOCK_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7708 [PM_CONTEXT_CASE_WHEN] = (1U << PM_TOKEN_KEYWORD_WHEN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
7709 [PM_CONTEXT_CASE_IN] = (1U << PM_TOKEN_KEYWORD_IN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
7710 [PM_CONTEXT_CLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7711 [PM_CONTEXT_CLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7712 [PM_CONTEXT_CLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7713 [PM_CONTEXT_CLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7714 [PM_CONTEXT_DEF] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7715 [PM_CONTEXT_DEF_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7716 [PM_CONTEXT_DEF_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7717 [PM_CONTEXT_DEF_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7718 [PM_CONTEXT_DEF_PARAMS] = (1U << PM_TOKEN_EOF),
7719 [PM_CONTEXT_DEFINED] = (1U << PM_TOKEN_EOF),
7720 [PM_CONTEXT_DEFAULT_PARAMS] = (1U << PM_TOKEN_COMMA) | (1U << PM_TOKEN_PARENTHESIS_RIGHT),
7721 [PM_CONTEXT_ELSE] = (1U << PM_TOKEN_KEYWORD_END),
7722 [PM_CONTEXT_ELSIF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
7723 [PM_CONTEXT_EMBEXPR] = (1U << PM_TOKEN_EMBEXPR_END),
7724 [PM_CONTEXT_FOR] = (1U << PM_TOKEN_KEYWORD_END),
7725 [PM_CONTEXT_FOR_INDEX] = (1U << PM_TOKEN_KEYWORD_IN),
7726 [PM_CONTEXT_IF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
7727 [PM_CONTEXT_LAMBDA_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
7728 [PM_CONTEXT_LAMBDA_DO_END] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7729 [PM_CONTEXT_LAMBDA_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7730 [PM_CONTEXT_LAMBDA_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7731 [PM_CONTEXT_LAMBDA_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7732 [PM_CONTEXT_LOOP_PREDICATE] = (1U << PM_TOKEN_KEYWORD_DO) | (1U << PM_TOKEN_KEYWORD_THEN),
7733 [PM_CONTEXT_MAIN] = (1U << PM_TOKEN_EOF),
7734 [PM_CONTEXT_MODULE] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7735 [PM_CONTEXT_MODULE_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7736 [PM_CONTEXT_MODULE_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7737 [PM_CONTEXT_MODULE_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7738 [PM_CONTEXT_MULTI_TARGET] = (1U << PM_TOKEN_EOF),
7739 [PM_CONTEXT_PARENS] = (1U << PM_TOKEN_PARENTHESIS_RIGHT),
7740 [PM_CONTEXT_POSTEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
7741 [PM_CONTEXT_PREDICATE] = (1U << PM_TOKEN_KEYWORD_THEN) | (1U << PM_TOKEN_NEWLINE) | (1U << PM_TOKEN_SEMICOLON),
7742 [PM_CONTEXT_PREEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
7743 [PM_CONTEXT_RESCUE_MODIFIER] = (1U << PM_TOKEN_EOF),
7744 [PM_CONTEXT_SCLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7745 [PM_CONTEXT_SCLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7746 [PM_CONTEXT_SCLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7747 [PM_CONTEXT_SCLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7748 [PM_CONTEXT_TERNARY] = (1U << PM_TOKEN_EOF),
7749 [PM_CONTEXT_UNLESS] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7750 [PM_CONTEXT_UNTIL] = (1U << PM_TOKEN_KEYWORD_END),
7751 [PM_CONTEXT_WHILE] = (1U << PM_TOKEN_KEYWORD_END),
7752};
7753
7754static inline bool
7755context_terminator(pm_context_t context, pm_token_t *token) {
7756 return token->type < 32 && (context_terminators[context] & (1U << token->type));
7757}
7758
7763static pm_context_t
7764context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
7765 pm_context_node_t *context_node = parser->current_context;
7766
7767 while (context_node != NULL) {
7768 if (context_terminator(context_node->context, token)) return context_node->context;
7769 context_node = context_node->prev;
7770 }
7771
7772 return PM_CONTEXT_NONE;
7773}
7774
7775static bool
7776context_push(pm_parser_t *parser, pm_context_t context) {
7777 pm_context_node_t *context_node = (pm_context_node_t *) xmalloc(sizeof(pm_context_node_t));
7778 if (context_node == NULL) return false;
7779
7780 *context_node = (pm_context_node_t) { .context = context, .prev = NULL };
7781
7782 if (parser->current_context == NULL) {
7783 parser->current_context = context_node;
7784 } else {
7785 context_node->prev = parser->current_context;
7786 parser->current_context = context_node;
7787 }
7788
7789 return true;
7790}
7791
7792static void
7793context_pop(pm_parser_t *parser) {
7794 pm_context_node_t *prev = parser->current_context->prev;
7795 xfree(parser->current_context);
7796 parser->current_context = prev;
7797}
7798
7799static bool
7800context_p(const pm_parser_t *parser, pm_context_t context) {
7801 pm_context_node_t *context_node = parser->current_context;
7802
7803 while (context_node != NULL) {
7804 if (context_node->context == context) return true;
7805 context_node = context_node->prev;
7806 }
7807
7808 return false;
7809}
7810
7811static bool
7812context_def_p(const pm_parser_t *parser) {
7813 pm_context_node_t *context_node = parser->current_context;
7814
7815 while (context_node != NULL) {
7816 switch (context_node->context) {
7817 case PM_CONTEXT_DEF:
7822 return true;
7823 case PM_CONTEXT_CLASS:
7827 case PM_CONTEXT_MODULE:
7831 case PM_CONTEXT_SCLASS:
7835 return false;
7836 default:
7837 context_node = context_node->prev;
7838 }
7839 }
7840
7841 return false;
7842}
7843
7848static const char *
7849context_human(pm_context_t context) {
7850 switch (context) {
7851 case PM_CONTEXT_NONE:
7852 assert(false && "unreachable");
7853 return "";
7854 case PM_CONTEXT_BEGIN: return "begin statement";
7855 case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
7856 case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
7857 case PM_CONTEXT_BLOCK_PARAMETERS: return "'|'..'|' block parameter";
7858 case PM_CONTEXT_CASE_WHEN: return "'when' clause";
7859 case PM_CONTEXT_CASE_IN: return "'in' clause";
7860 case PM_CONTEXT_CLASS: return "class definition";
7861 case PM_CONTEXT_DEF: return "method definition";
7862 case PM_CONTEXT_DEF_PARAMS: return "method parameters";
7863 case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
7864 case PM_CONTEXT_DEFINED: return "'defined?' expression";
7865 case PM_CONTEXT_ELSE:
7872 case PM_CONTEXT_SCLASS_ELSE: return "'else' clause";
7873 case PM_CONTEXT_ELSIF: return "'elsif' clause";
7874 case PM_CONTEXT_EMBEXPR: return "embedded expression";
7881 case PM_CONTEXT_SCLASS_ENSURE: return "'ensure' clause";
7882 case PM_CONTEXT_FOR: return "for loop";
7883 case PM_CONTEXT_FOR_INDEX: return "for loop index";
7884 case PM_CONTEXT_IF: return "if statement";
7885 case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
7886 case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
7887 case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
7888 case PM_CONTEXT_MAIN: return "top level context";
7889 case PM_CONTEXT_MODULE: return "module definition";
7890 case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
7891 case PM_CONTEXT_PARENS: return "parentheses";
7892 case PM_CONTEXT_POSTEXE: return "'END' block";
7893 case PM_CONTEXT_PREDICATE: return "predicate";
7894 case PM_CONTEXT_PREEXE: return "'BEGIN' block";
7902 case PM_CONTEXT_SCLASS_RESCUE: return "'rescue' clause";
7903 case PM_CONTEXT_SCLASS: return "singleton class definition";
7904 case PM_CONTEXT_TERNARY: return "ternary expression";
7905 case PM_CONTEXT_UNLESS: return "unless statement";
7906 case PM_CONTEXT_UNTIL: return "until statement";
7907 case PM_CONTEXT_WHILE: return "while statement";
7908 }
7909
7910 assert(false && "unreachable");
7911 return "";
7912}
7913
7914/******************************************************************************/
7915/* Specific token lexers */
7916/******************************************************************************/
7917
7918static inline void
7919pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
7920 if (invalid != NULL) {
7921 pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
7922 pm_parser_err(parser, invalid, invalid + 1, diag_id);
7923 }
7924}
7925
7926static size_t
7927pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
7928 const uint8_t *invalid = NULL;
7929 size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
7930 pm_strspn_number_validate(parser, string, length, invalid);
7931 return length;
7932}
7933
7934static size_t
7935pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
7936 const uint8_t *invalid = NULL;
7937 size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
7938 pm_strspn_number_validate(parser, string, length, invalid);
7939 return length;
7940}
7941
7942static size_t
7943pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
7944 const uint8_t *invalid = NULL;
7945 size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
7946 pm_strspn_number_validate(parser, string, length, invalid);
7947 return length;
7948}
7949
7950static size_t
7951pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
7952 const uint8_t *invalid = NULL;
7953 size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
7954 pm_strspn_number_validate(parser, string, length, invalid);
7955 return length;
7956}
7957
7958static pm_token_type_t
7959lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
7960 pm_token_type_t type = PM_TOKEN_INTEGER;
7961
7962 // Here we're going to attempt to parse the optional decimal portion of a
7963 // float. If it's not there, then it's okay and we'll just continue on.
7964 if (peek(parser) == '.') {
7965 if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
7966 parser->current.end += 2;
7967 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
7968 type = PM_TOKEN_FLOAT;
7969 } else {
7970 // If we had a . and then something else, then it's not a float
7971 // suffix on a number it's a method call or something else.
7972 return type;
7973 }
7974 }
7975
7976 // Here we're going to attempt to parse the optional exponent portion of a
7977 // float. If it's not there, it's okay and we'll just continue on.
7978 if ((peek(parser) == 'e') || (peek(parser) == 'E')) {
7979 if ((peek_offset(parser, 1) == '+') || (peek_offset(parser, 1) == '-')) {
7980 parser->current.end += 2;
7981
7982 if (pm_char_is_decimal_digit(peek(parser))) {
7983 parser->current.end++;
7984 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
7985 } else {
7986 pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
7987 }
7988 } else if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
7989 parser->current.end++;
7990 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
7991 } else {
7992 return type;
7993 }
7994
7995 *seen_e = true;
7996 type = PM_TOKEN_FLOAT;
7997 }
7998
7999 return type;
8000}
8001
8002static pm_token_type_t
8003lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8004 pm_token_type_t type = PM_TOKEN_INTEGER;
8005 *seen_e = false;
8006
8007 if (peek_offset(parser, -1) == '0') {
8008 switch (*parser->current.end) {
8009 // 0d1111 is a decimal number
8010 case 'd':
8011 case 'D':
8012 parser->current.end++;
8013 if (pm_char_is_decimal_digit(peek(parser))) {
8014 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8015 } else {
8016 match(parser, '_');
8017 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
8018 }
8019
8020 break;
8021
8022 // 0b1111 is a binary number
8023 case 'b':
8024 case 'B':
8025 parser->current.end++;
8026 if (pm_char_is_binary_digit(peek(parser))) {
8027 parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
8028 } else {
8029 match(parser, '_');
8030 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
8031 }
8032
8033 parser->integer_base = PM_INTEGER_BASE_FLAGS_BINARY;
8034 break;
8035
8036 // 0o1111 is an octal number
8037 case 'o':
8038 case 'O':
8039 parser->current.end++;
8040 if (pm_char_is_octal_digit(peek(parser))) {
8041 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8042 } else {
8043 match(parser, '_');
8044 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
8045 }
8046
8047 parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
8048 break;
8049
8050 // 01111 is an octal number
8051 case '_':
8052 case '0':
8053 case '1':
8054 case '2':
8055 case '3':
8056 case '4':
8057 case '5':
8058 case '6':
8059 case '7':
8060 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8061 parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
8062 break;
8063
8064 // 0x1111 is a hexadecimal number
8065 case 'x':
8066 case 'X':
8067 parser->current.end++;
8068 if (pm_char_is_hexadecimal_digit(peek(parser))) {
8069 parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
8070 } else {
8071 match(parser, '_');
8072 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
8073 }
8074
8075 parser->integer_base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
8076 break;
8077
8078 // 0.xxx is a float
8079 case '.': {
8080 type = lex_optional_float_suffix(parser, seen_e);
8081 break;
8082 }
8083
8084 // 0exxx is a float
8085 case 'e':
8086 case 'E': {
8087 type = lex_optional_float_suffix(parser, seen_e);
8088 break;
8089 }
8090 }
8091 } else {
8092 // If it didn't start with a 0, then we'll lex as far as we can into a
8093 // decimal number.
8094 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8095
8096 // Afterward, we'll lex as far as we can into an optional float suffix.
8097 type = lex_optional_float_suffix(parser, seen_e);
8098 }
8099
8100 // At this point we have a completed number, but we want to provide the user
8101 // with a good experience if they put an additional .xxx fractional
8102 // component on the end, so we'll check for that here.
8103 if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8104 const uint8_t *fraction_start = parser->current.end;
8105 const uint8_t *fraction_end = parser->current.end + 2;
8106 fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
8107 pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
8108 }
8109
8110 return type;
8111}
8112
8113static pm_token_type_t
8114lex_numeric(pm_parser_t *parser) {
8115 pm_token_type_t type = PM_TOKEN_INTEGER;
8116 parser->integer_base = PM_INTEGER_BASE_FLAGS_DECIMAL;
8117
8118 if (parser->current.end < parser->end) {
8119 bool seen_e = false;
8120 type = lex_numeric_prefix(parser, &seen_e);
8121
8122 const uint8_t *end = parser->current.end;
8123 pm_token_type_t suffix_type = type;
8124
8125 if (type == PM_TOKEN_INTEGER) {
8126 if (match(parser, 'r')) {
8127 suffix_type = PM_TOKEN_INTEGER_RATIONAL;
8128
8129 if (match(parser, 'i')) {
8130 suffix_type = PM_TOKEN_INTEGER_RATIONAL_IMAGINARY;
8131 }
8132 } else if (match(parser, 'i')) {
8133 suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
8134 }
8135 } else {
8136 if (!seen_e && match(parser, 'r')) {
8137 suffix_type = PM_TOKEN_FLOAT_RATIONAL;
8138
8139 if (match(parser, 'i')) {
8140 suffix_type = PM_TOKEN_FLOAT_RATIONAL_IMAGINARY;
8141 }
8142 } else if (match(parser, 'i')) {
8143 suffix_type = PM_TOKEN_FLOAT_IMAGINARY;
8144 }
8145 }
8146
8147 const uint8_t b = peek(parser);
8148 if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
8149 parser->current.end = end;
8150 } else {
8151 type = suffix_type;
8152 }
8153 }
8154
8155 return type;
8156}
8157
8158static pm_token_type_t
8159lex_global_variable(pm_parser_t *parser) {
8160 if (parser->current.end >= parser->end) {
8161 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
8162 return PM_TOKEN_GLOBAL_VARIABLE;
8163 }
8164
8165 // True if multiple characters are allowed after the declaration of the
8166 // global variable. Not true when it starts with "$-".
8167 bool allow_multiple = true;
8168
8169 switch (*parser->current.end) {
8170 case '~': // $~: match-data
8171 case '*': // $*: argv
8172 case '$': // $$: pid
8173 case '?': // $?: last status
8174 case '!': // $!: error string
8175 case '@': // $@: error position
8176 case '/': // $/: input record separator
8177 case '\\': // $\: output record separator
8178 case ';': // $;: field separator
8179 case ',': // $,: output field separator
8180 case '.': // $.: last read line number
8181 case '=': // $=: ignorecase
8182 case ':': // $:: load path
8183 case '<': // $<: reading filename
8184 case '>': // $>: default output handle
8185 case '\"': // $": already loaded files
8186 parser->current.end++;
8187 return PM_TOKEN_GLOBAL_VARIABLE;
8188
8189 case '&': // $&: last match
8190 case '`': // $`: string before last match
8191 case '\'': // $': string after last match
8192 case '+': // $+: string matches last paren.
8193 parser->current.end++;
8194 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_BACK_REFERENCE;
8195
8196 case '0': {
8197 parser->current.end++;
8198 size_t width;
8199
8200 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
8201 do {
8202 parser->current.end += width;
8203 } while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
8204
8205 // $0 isn't allowed to be followed by anything.
8206 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8207 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
8208 }
8209
8210 return PM_TOKEN_GLOBAL_VARIABLE;
8211 }
8212
8213 case '1':
8214 case '2':
8215 case '3':
8216 case '4':
8217 case '5':
8218 case '6':
8219 case '7':
8220 case '8':
8221 case '9':
8222 parser->current.end += pm_strspn_decimal_digit(parser->current.end, parser->end - parser->current.end);
8223 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_NUMBERED_REFERENCE;
8224
8225 case '-':
8226 parser->current.end++;
8227 allow_multiple = false;
8229 default: {
8230 size_t width;
8231
8232 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
8233 do {
8234 parser->current.end += width;
8235 } while (allow_multiple && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
8236 } else if (pm_char_is_whitespace(peek(parser))) {
8237 // If we get here, then we have a $ followed by whitespace,
8238 // which is not allowed.
8239 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
8240 } else {
8241 // If we get here, then we have a $ followed by something that
8242 // isn't recognized as a global variable.
8243 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8244 const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8245 PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
8246 }
8247
8248 return PM_TOKEN_GLOBAL_VARIABLE;
8249 }
8250 }
8251}
8252
8265static inline pm_token_type_t
8266lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
8267 if (memcmp(current_start, value, vlen) == 0) {
8268 pm_lex_state_t last_state = parser->lex_state;
8269
8270 if (parser->lex_state & PM_LEX_STATE_FNAME) {
8271 lex_state_set(parser, PM_LEX_STATE_ENDFN);
8272 } else {
8273 lex_state_set(parser, state);
8274 if (state == PM_LEX_STATE_BEG) {
8275 parser->command_start = true;
8276 }
8277
8278 if ((modifier_type != PM_TOKEN_EOF) && !(last_state & (PM_LEX_STATE_BEG | PM_LEX_STATE_LABELED | PM_LEX_STATE_CLASS))) {
8279 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
8280 return modifier_type;
8281 }
8282 }
8283
8284 return type;
8285 }
8286
8287 return PM_TOKEN_EOF;
8288}
8289
8290static pm_token_type_t
8291lex_identifier(pm_parser_t *parser, bool previous_command_start) {
8292 // Lex as far as we can into the current identifier.
8293 size_t width;
8294 const uint8_t *end = parser->end;
8295 const uint8_t *current_start = parser->current.start;
8296 const uint8_t *current_end = parser->current.end;
8297 bool encoding_changed = parser->encoding_changed;
8298
8299 if (encoding_changed) {
8300 while ((width = char_is_identifier(parser, current_end, end - current_end)) > 0) {
8301 current_end += width;
8302 }
8303 } else {
8304 while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
8305 current_end += width;
8306 }
8307 }
8308 parser->current.end = current_end;
8309
8310 // Now cache the length of the identifier so that we can quickly compare it
8311 // against known keywords.
8312 width = (size_t) (current_end - current_start);
8313
8314 if (current_end < end) {
8315 if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
8316 // First we'll attempt to extend the identifier by a ! or ?. Then we'll
8317 // check if we're returning the defined? keyword or just an identifier.
8318 width++;
8319
8320 if (
8321 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
8322 (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
8323 ) {
8324 // If we're in a position where we can accept a : at the end of an
8325 // identifier, then we'll optionally accept it.
8326 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
8327 (void) match(parser, ':');
8328 return PM_TOKEN_LABEL;
8329 }
8330
8331 if (parser->lex_state != PM_LEX_STATE_DOT) {
8332 if (width == 8 && (lex_keyword(parser, current_start, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
8333 return PM_TOKEN_KEYWORD_DEFINED;
8334 }
8335 }
8336
8337 return PM_TOKEN_METHOD_NAME;
8338 }
8339
8340 if (lex_state_p(parser, PM_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
8341 // If we're in a position where we can accept a = at the end of an
8342 // identifier, then we'll optionally accept it.
8343 return PM_TOKEN_IDENTIFIER;
8344 }
8345
8346 if (
8347 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
8348 peek(parser) == ':' && peek_offset(parser, 1) != ':'
8349 ) {
8350 // If we're in a position where we can accept a : at the end of an
8351 // identifier, then we'll optionally accept it.
8352 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
8353 (void) match(parser, ':');
8354 return PM_TOKEN_LABEL;
8355 }
8356 }
8357
8358 if (parser->lex_state != PM_LEX_STATE_DOT) {
8359 pm_token_type_t type;
8360 switch (width) {
8361 case 2:
8362 if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
8363 if (pm_do_loop_stack_p(parser)) {
8364 return PM_TOKEN_KEYWORD_DO_LOOP;
8365 }
8366 return PM_TOKEN_KEYWORD_DO;
8367 }
8368
8369 if ((type = lex_keyword(parser, current_start, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
8370 if ((type = lex_keyword(parser, current_start, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8371 if ((type = lex_keyword(parser, current_start, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8372 break;
8373 case 3:
8374 if ((type = lex_keyword(parser, current_start, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8375 if ((type = lex_keyword(parser, current_start, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8376 if ((type = lex_keyword(parser, current_start, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8377 if ((type = lex_keyword(parser, current_start, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8378 if ((type = lex_keyword(parser, current_start, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8379 if ((type = lex_keyword(parser, current_start, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8380 if ((type = lex_keyword(parser, current_start, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8381 break;
8382 case 4:
8383 if ((type = lex_keyword(parser, current_start, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8384 if ((type = lex_keyword(parser, current_start, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8385 if ((type = lex_keyword(parser, current_start, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8386 if ((type = lex_keyword(parser, current_start, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8387 if ((type = lex_keyword(parser, current_start, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8388 if ((type = lex_keyword(parser, current_start, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8389 if ((type = lex_keyword(parser, current_start, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8390 if ((type = lex_keyword(parser, current_start, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8391 break;
8392 case 5:
8393 if ((type = lex_keyword(parser, current_start, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8394 if ((type = lex_keyword(parser, current_start, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8395 if ((type = lex_keyword(parser, current_start, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8396 if ((type = lex_keyword(parser, current_start, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8397 if ((type = lex_keyword(parser, current_start, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8398 if ((type = lex_keyword(parser, current_start, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8399 if ((type = lex_keyword(parser, current_start, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8400 if ((type = lex_keyword(parser, current_start, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8401 if ((type = lex_keyword(parser, current_start, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8402 if ((type = lex_keyword(parser, current_start, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8403 if ((type = lex_keyword(parser, current_start, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
8404 if ((type = lex_keyword(parser, current_start, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
8405 if ((type = lex_keyword(parser, current_start, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8406 break;
8407 case 6:
8408 if ((type = lex_keyword(parser, current_start, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8409 if ((type = lex_keyword(parser, current_start, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8410 if ((type = lex_keyword(parser, current_start, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
8411 if ((type = lex_keyword(parser, current_start, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8412 if ((type = lex_keyword(parser, current_start, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
8413 break;
8414 case 8:
8415 if ((type = lex_keyword(parser, current_start, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8416 if ((type = lex_keyword(parser, current_start, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8417 break;
8418 case 12:
8419 if ((type = lex_keyword(parser, current_start, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8420 break;
8421 }
8422 }
8423
8424 if (encoding_changed) {
8425 return parser->encoding->isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
8426 }
8427 return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
8428}
8429
8434static bool
8435current_token_starts_line(pm_parser_t *parser) {
8436 return (parser->current.start == parser->start) || (parser->current.start[-1] == '\n');
8437}
8438
8453static pm_token_type_t
8454lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
8455 // If there is no content following this #, then we're at the end of
8456 // the string and we can safely return string content.
8457 if (pound + 1 >= parser->end) {
8458 parser->current.end = pound + 1;
8459 return PM_TOKEN_STRING_CONTENT;
8460 }
8461
8462 // Now we'll check against the character that follows the #. If it constitutes
8463 // valid interplation, we'll handle that, otherwise we'll return
8464 // PM_TOKEN_NOT_PROVIDED.
8465 switch (pound[1]) {
8466 case '@': {
8467 // In this case we may have hit an embedded instance or class variable.
8468 if (pound + 2 >= parser->end) {
8469 parser->current.end = pound + 1;
8470 return PM_TOKEN_STRING_CONTENT;
8471 }
8472
8473 // If we're looking at a @ and there's another @, then we'll skip past the
8474 // second @.
8475 const uint8_t *variable = pound + 2;
8476 if (*variable == '@' && pound + 3 < parser->end) variable++;
8477
8478 if (char_is_identifier_start(parser, variable, parser->end - variable)) {
8479 // At this point we're sure that we've either hit an embedded instance
8480 // or class variable. In this case we'll first need to check if we've
8481 // already consumed content.
8482 if (pound > parser->current.start) {
8483 parser->current.end = pound;
8484 return PM_TOKEN_STRING_CONTENT;
8485 }
8486
8487 // Otherwise we need to return the embedded variable token
8488 // and then switch to the embedded variable lex mode.
8489 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
8490 parser->current.end = pound + 1;
8491 return PM_TOKEN_EMBVAR;
8492 }
8493
8494 // If we didn't get a valid interpolation, then this is just regular
8495 // string content. This is like if we get "#@-". In this case the caller
8496 // should keep lexing.
8497 parser->current.end = pound + 1;
8498 return PM_TOKEN_NOT_PROVIDED;
8499 }
8500 case '$':
8501 // In this case we may have hit an embedded global variable. If there's
8502 // not enough room, then we'll just return string content.
8503 if (pound + 2 >= parser->end) {
8504 parser->current.end = pound + 1;
8505 return PM_TOKEN_STRING_CONTENT;
8506 }
8507
8508 // This is the character that we're going to check to see if it is the
8509 // start of an identifier that would indicate that this is a global
8510 // variable.
8511 const uint8_t *check = pound + 2;
8512
8513 if (pound[2] == '-') {
8514 if (pound + 3 >= parser->end) {
8515 parser->current.end = pound + 2;
8516 return PM_TOKEN_STRING_CONTENT;
8517 }
8518
8519 check++;
8520 }
8521
8522 // If the character that we're going to check is the start of an
8523 // identifier, or we don't have a - and the character is a decimal number
8524 // or a global name punctuation character, then we've hit an embedded
8525 // global variable.
8526 if (
8527 char_is_identifier_start(parser, check, parser->end - check) ||
8528 (pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
8529 ) {
8530 // In this case we've hit an embedded global variable. First check to
8531 // see if we've already consumed content. If we have, then we need to
8532 // return that content as string content first.
8533 if (pound > parser->current.start) {
8534 parser->current.end = pound;
8535 return PM_TOKEN_STRING_CONTENT;
8536 }
8537
8538 // Otherwise, we need to return the embedded variable token and switch
8539 // to the embedded variable lex mode.
8540 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
8541 parser->current.end = pound + 1;
8542 return PM_TOKEN_EMBVAR;
8543 }
8544
8545 // In this case we've hit a #$ that does not indicate a global variable.
8546 // In this case we'll continue lexing past it.
8547 parser->current.end = pound + 1;
8548 return PM_TOKEN_NOT_PROVIDED;
8549 case '{':
8550 // In this case it's the start of an embedded expression. If we have
8551 // already consumed content, then we need to return that content as string
8552 // content first.
8553 if (pound > parser->current.start) {
8554 parser->current.end = pound;
8555 return PM_TOKEN_STRING_CONTENT;
8556 }
8557
8558 parser->enclosure_nesting++;
8559
8560 // Otherwise we'll skip past the #{ and begin lexing the embedded
8561 // expression.
8562 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBEXPR });
8563 parser->current.end = pound + 2;
8564 parser->command_start = true;
8565 pm_do_loop_stack_push(parser, false);
8566 return PM_TOKEN_EMBEXPR_BEGIN;
8567 default:
8568 // In this case we've hit a # that doesn't constitute interpolation. We'll
8569 // mark that by returning the not provided token type. This tells the
8570 // consumer to keep lexing forward.
8571 parser->current.end = pound + 1;
8572 return PM_TOKEN_NOT_PROVIDED;
8573 }
8574}
8575
8576static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
8577static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
8578static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
8579static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
8580static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
8581
8585static const bool ascii_printable_chars[] = {
8586 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
8587 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8588 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8589 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8590 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8591 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
8592 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8593 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
8594};
8595
8596static inline bool
8597char_is_ascii_printable(const uint8_t b) {
8598 return (b < 0x80) && ascii_printable_chars[b];
8599}
8600
8605static inline uint8_t
8606escape_hexadecimal_digit(const uint8_t value) {
8607 return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
8608}
8609
8615static inline uint32_t
8616escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length, const pm_location_t *error_location) {
8617 uint32_t value = 0;
8618 for (size_t index = 0; index < length; index++) {
8619 if (index != 0) value <<= 4;
8620 value |= escape_hexadecimal_digit(string[index]);
8621 }
8622
8623 // Here we're going to verify that the value is actually a valid Unicode
8624 // codepoint and not a surrogate pair.
8625 if (value >= 0xD800 && value <= 0xDFFF) {
8626 if (error_location != NULL) {
8627 pm_parser_err(parser, error_location->start, error_location->end, PM_ERR_ESCAPE_INVALID_UNICODE);
8628 } else {
8629 pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
8630 }
8631 return 0xFFFD;
8632 }
8633
8634 return value;
8635}
8636
8640static inline uint8_t
8641escape_byte(uint8_t value, const uint8_t flags) {
8642 if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
8643 if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
8644 return value;
8645}
8646
8650static inline void
8651escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
8652 // \u escape sequences in string-like structures implicitly change the
8653 // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
8654 // literal.
8655 if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
8656 if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
8657 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
8658 }
8659
8661 }
8662
8663 if (!pm_buffer_append_unicode_codepoint(buffer, value)) {
8664 pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
8665 pm_buffer_append_byte(buffer, 0xEF);
8666 pm_buffer_append_byte(buffer, 0xBF);
8667 pm_buffer_append_byte(buffer, 0xBD);
8668 }
8669}
8670
8675static inline void
8676escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) {
8677 if (byte >= 0x80) {
8678 if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
8679 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
8680 }
8681
8682 parser->explicit_encoding = parser->encoding;
8683 }
8684
8685 pm_buffer_append_byte(buffer, byte);
8686}
8687
8703static inline void
8704escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
8705 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8706 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
8707 }
8708
8709 escape_write_byte_encoded(parser, buffer, byte);
8710}
8711
8715static inline void
8716escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
8717 size_t width;
8718 if (parser->encoding_changed) {
8719 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8720 } else {
8721 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
8722 }
8723
8724 if (width == 1) {
8725 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
8726 } else if (width > 1) {
8727 // Valid multibyte character. Just ignore escape.
8728 pm_buffer_t *b = (flags & PM_ESCAPE_FLAG_REGEXP) ? regular_expression_buffer : buffer;
8729 pm_buffer_append_bytes(b, parser->current.end, width);
8730 parser->current.end += width;
8731 } else {
8732 // Assume the next character wasn't meant to be part of this escape
8733 // sequence since it is invalid. Add an error and move on.
8734 parser->current.end++;
8735 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
8736 }
8737}
8738
8744static void
8745escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *type) {
8746#define FLAG(value) ((value & PM_ESCAPE_FLAG_CONTROL) ? "\\C-" : (value & PM_ESCAPE_FLAG_META) ? "\\M-" : "")
8747
8748 PM_PARSER_WARN_TOKEN_FORMAT(
8749 parser,
8750 parser->current,
8751 PM_WARN_INVALID_CHARACTER,
8752 FLAG(flags),
8753 FLAG(flag),
8754 type
8755 );
8756
8757#undef FLAG
8758}
8759
8763static void
8764escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
8765 uint8_t peeked = peek(parser);
8766 switch (peeked) {
8767 case '\\': {
8768 parser->current.end++;
8769 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
8770 return;
8771 }
8772 case '\'': {
8773 parser->current.end++;
8774 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
8775 return;
8776 }
8777 case 'a': {
8778 parser->current.end++;
8779 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
8780 return;
8781 }
8782 case 'b': {
8783 parser->current.end++;
8784 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
8785 return;
8786 }
8787 case 'e': {
8788 parser->current.end++;
8789 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
8790 return;
8791 }
8792 case 'f': {
8793 parser->current.end++;
8794 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
8795 return;
8796 }
8797 case 'n': {
8798 parser->current.end++;
8799 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
8800 return;
8801 }
8802 case 'r': {
8803 parser->current.end++;
8804 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
8805 return;
8806 }
8807 case 's': {
8808 parser->current.end++;
8809 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
8810 return;
8811 }
8812 case 't': {
8813 parser->current.end++;
8814 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
8815 return;
8816 }
8817 case 'v': {
8818 parser->current.end++;
8819 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
8820 return;
8821 }
8822 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
8823 uint8_t value = (uint8_t) (*parser->current.end - '0');
8824 parser->current.end++;
8825
8826 if (pm_char_is_octal_digit(peek(parser))) {
8827 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
8828 parser->current.end++;
8829
8830 if (pm_char_is_octal_digit(peek(parser))) {
8831 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
8832 parser->current.end++;
8833 }
8834 }
8835
8836 value = escape_byte(value, flags);
8837 escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
8838 return;
8839 }
8840 case 'x': {
8841 const uint8_t *start = parser->current.end - 1;
8842
8843 parser->current.end++;
8844 uint8_t byte = peek(parser);
8845
8846 if (pm_char_is_hexadecimal_digit(byte)) {
8847 uint8_t value = escape_hexadecimal_digit(byte);
8848 parser->current.end++;
8849
8850 byte = peek(parser);
8851 if (pm_char_is_hexadecimal_digit(byte)) {
8852 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
8853 parser->current.end++;
8854 }
8855
8856 value = escape_byte(value, flags);
8857 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8858 if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
8859 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
8860 } else {
8861 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8862 }
8863 }
8864
8865 escape_write_byte_encoded(parser, buffer, value);
8866 } else {
8867 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
8868 }
8869
8870 return;
8871 }
8872 case 'u': {
8873 const uint8_t *start = parser->current.end - 1;
8874 parser->current.end++;
8875
8876 if (parser->current.end == parser->end) {
8877 const uint8_t *start = parser->current.end - 2;
8878 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
8879 } else if (peek(parser) == '{') {
8880 const uint8_t *unicode_codepoints_start = parser->current.end - 2;
8881 parser->current.end++;
8882
8883 size_t whitespace;
8884 while (true) {
8885 if ((whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
8886 parser->current.end += whitespace;
8887 } else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
8888 // This is super hacky, but it gets us nicer error
8889 // messages because we can still pass it off to the
8890 // regular expression engine even if we hit an
8891 // unterminated regular expression.
8892 parser->current.end += 2;
8893 } else {
8894 break;
8895 }
8896 }
8897
8898 const uint8_t *extra_codepoints_start = NULL;
8899 int codepoints_count = 0;
8900
8901 while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
8902 const uint8_t *unicode_start = parser->current.end;
8903 size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
8904
8905 if (hexadecimal_length > 6) {
8906 // \u{nnnn} character literal allows only 1-6 hexadecimal digits
8907 pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
8908 } else if (hexadecimal_length == 0) {
8909 // there are not hexadecimal characters
8910
8911 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8912 // If this is a regular expression, we are going to
8913 // let the regular expression engine handle this
8914 // error instead of us.
8915 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8916 } else {
8917 pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
8918 pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
8919 }
8920
8921 return;
8922 }
8923
8924 parser->current.end += hexadecimal_length;
8925 codepoints_count++;
8926 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
8927 extra_codepoints_start = unicode_start;
8928 }
8929
8930 uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length, NULL);
8931 escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
8932
8933 parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
8934 }
8935
8936 // ?\u{nnnn} character literal should contain only one codepoint
8937 // and cannot be like ?\u{nnnn mmmm}.
8938 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
8939 pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
8940 }
8941
8942 if (parser->current.end == parser->end) {
8943 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
8944 } else if (peek(parser) == '}') {
8945 parser->current.end++;
8946 } else {
8947 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8948 // If this is a regular expression, we are going to let
8949 // the regular expression engine handle this error
8950 // instead of us.
8951 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8952 } else {
8953 pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
8954 }
8955 }
8956
8957 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8958 pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
8959 }
8960 } else {
8961 size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
8962
8963 if (length == 0) {
8964 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8965 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8966 } else {
8967 const uint8_t *start = parser->current.end - 2;
8968 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
8969 }
8970 } else if (length == 4) {
8971 uint32_t value = escape_unicode(parser, parser->current.end, 4, NULL);
8972
8973 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8974 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
8975 }
8976
8977 escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
8978 parser->current.end += 4;
8979 } else {
8980 parser->current.end += length;
8981
8982 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8983 // If this is a regular expression, we are going to let
8984 // the regular expression engine handle this error
8985 // instead of us.
8986 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8987 } else {
8988 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
8989 }
8990 }
8991 }
8992
8993 return;
8994 }
8995 case 'c': {
8996 parser->current.end++;
8997 if (flags & PM_ESCAPE_FLAG_CONTROL) {
8998 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
8999 }
9000
9001 if (parser->current.end == parser->end) {
9002 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9003 return;
9004 }
9005
9006 uint8_t peeked = peek(parser);
9007 switch (peeked) {
9008 case '?': {
9009 parser->current.end++;
9010 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9011 return;
9012 }
9013 case '\\':
9014 parser->current.end++;
9015
9016 if (match(parser, 'u') || match(parser, 'U')) {
9017 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9018 return;
9019 }
9020
9021 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9022 return;
9023 case ' ':
9024 parser->current.end++;
9025 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9026 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9027 return;
9028 case '\t':
9029 parser->current.end++;
9030 escape_read_warn(parser, flags, 0, "\\t");
9031 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9032 return;
9033 default: {
9034 if (!char_is_ascii_printable(peeked)) {
9035 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9036 return;
9037 }
9038
9039 parser->current.end++;
9040 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9041 return;
9042 }
9043 }
9044 }
9045 case 'C': {
9046 parser->current.end++;
9047 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9048 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9049 }
9050
9051 if (peek(parser) != '-') {
9052 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9053 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9054 return;
9055 }
9056
9057 parser->current.end++;
9058 if (parser->current.end == parser->end) {
9059 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9060 return;
9061 }
9062
9063 uint8_t peeked = peek(parser);
9064 switch (peeked) {
9065 case '?': {
9066 parser->current.end++;
9067 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9068 return;
9069 }
9070 case '\\':
9071 parser->current.end++;
9072
9073 if (match(parser, 'u') || match(parser, 'U')) {
9074 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9075 return;
9076 }
9077
9078 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9079 return;
9080 case ' ':
9081 parser->current.end++;
9082 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9083 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9084 return;
9085 case '\t':
9086 parser->current.end++;
9087 escape_read_warn(parser, flags, 0, "\\t");
9088 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9089 return;
9090 default: {
9091 if (!char_is_ascii_printable(peeked)) {
9092 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9093 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9094 return;
9095 }
9096
9097 parser->current.end++;
9098 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9099 return;
9100 }
9101 }
9102 }
9103 case 'M': {
9104 parser->current.end++;
9105 if (flags & PM_ESCAPE_FLAG_META) {
9106 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
9107 }
9108
9109 if (peek(parser) != '-') {
9110 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9111 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
9112 return;
9113 }
9114
9115 parser->current.end++;
9116 if (parser->current.end == parser->end) {
9117 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
9118 return;
9119 }
9120
9121 uint8_t peeked = peek(parser);
9122 switch (peeked) {
9123 case '\\':
9124 parser->current.end++;
9125
9126 if (match(parser, 'u') || match(parser, 'U')) {
9127 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9128 return;
9129 }
9130
9131 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
9132 return;
9133 case ' ':
9134 parser->current.end++;
9135 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_META, "\\s");
9136 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9137 return;
9138 case '\t':
9139 parser->current.end++;
9140 escape_read_warn(parser, flags & ((uint8_t) ~PM_ESCAPE_FLAG_CONTROL), PM_ESCAPE_FLAG_META, "\\t");
9141 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9142 return;
9143 default:
9144 if (!char_is_ascii_printable(peeked)) {
9145 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9146 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
9147 return;
9148 }
9149
9150 parser->current.end++;
9151 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9152 return;
9153 }
9154 }
9155 case '\r': {
9156 if (peek_offset(parser, 1) == '\n') {
9157 parser->current.end += 2;
9158 escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
9159 return;
9160 }
9162 }
9163 default: {
9164 if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
9165 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9166 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
9167 return;
9168 }
9169 if (parser->current.end < parser->end) {
9170 escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags);
9171 } else {
9172 pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
9173 }
9174 return;
9175 }
9176 }
9177}
9178
9204static pm_token_type_t
9205lex_question_mark(pm_parser_t *parser) {
9206 if (lex_state_end_p(parser)) {
9207 lex_state_set(parser, PM_LEX_STATE_BEG);
9208 return PM_TOKEN_QUESTION_MARK;
9209 }
9210
9211 if (parser->current.end >= parser->end) {
9212 pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
9213 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
9214 return PM_TOKEN_CHARACTER_LITERAL;
9215 }
9216
9217 if (pm_char_is_whitespace(*parser->current.end)) {
9218 lex_state_set(parser, PM_LEX_STATE_BEG);
9219 return PM_TOKEN_QUESTION_MARK;
9220 }
9221
9222 lex_state_set(parser, PM_LEX_STATE_BEG);
9223
9224 if (match(parser, '\\')) {
9225 lex_state_set(parser, PM_LEX_STATE_END);
9226
9227 pm_buffer_t buffer;
9228 pm_buffer_init_capacity(&buffer, 3);
9229
9230 escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
9231 pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
9232
9233 return PM_TOKEN_CHARACTER_LITERAL;
9234 } else {
9235 size_t encoding_width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9236
9237 // Ternary operators can have a ? immediately followed by an identifier
9238 // which starts with an underscore. We check for this case here.
9239 if (
9240 !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
9241 (
9242 (parser->current.end + encoding_width >= parser->end) ||
9243 !char_is_identifier(parser, parser->current.end + encoding_width, parser->end - (parser->current.end + encoding_width))
9244 )
9245 ) {
9246 lex_state_set(parser, PM_LEX_STATE_END);
9247 parser->current.end += encoding_width;
9248 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
9249 return PM_TOKEN_CHARACTER_LITERAL;
9250 }
9251 }
9252
9253 return PM_TOKEN_QUESTION_MARK;
9254}
9255
9260static pm_token_type_t
9261lex_at_variable(pm_parser_t *parser) {
9262 pm_token_type_t type = match(parser, '@') ? PM_TOKEN_CLASS_VARIABLE : PM_TOKEN_INSTANCE_VARIABLE;
9263 const uint8_t *end = parser->end;
9264
9265 size_t width;
9266 if ((width = char_is_identifier_start(parser, parser->current.end, end - parser->current.end)) > 0) {
9267 parser->current.end += width;
9268
9269 while ((width = char_is_identifier(parser, parser->current.end, end - parser->current.end)) > 0) {
9270 parser->current.end += width;
9271 }
9272 } else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) {
9273 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
9274 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) {
9275 diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
9276 }
9277
9278 size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end);
9279 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
9280 } else {
9281 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
9282 pm_parser_err_token(parser, &parser->current, diag_id);
9283 }
9284
9285 // If we're lexing an embedded variable, then we need to pop back into the
9286 // parent lex context.
9287 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
9288 lex_mode_pop(parser);
9289 }
9290
9291 return type;
9292}
9293
9297static inline void
9298parser_lex_callback(pm_parser_t *parser) {
9299 if (parser->lex_callback) {
9300 parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current);
9301 }
9302}
9303
9307static inline pm_comment_t *
9308parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
9309 pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t));
9310 if (comment == NULL) return NULL;
9311
9312 *comment = (pm_comment_t) {
9313 .type = type,
9314 .location = { parser->current.start, parser->current.end }
9315 };
9316
9317 return comment;
9318}
9319
9325static pm_token_type_t
9326lex_embdoc(pm_parser_t *parser) {
9327 // First, lex out the EMBDOC_BEGIN token.
9328 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9329
9330 if (newline == NULL) {
9331 parser->current.end = parser->end;
9332 } else {
9333 pm_newline_list_append(&parser->newline_list, newline);
9334 parser->current.end = newline + 1;
9335 }
9336
9337 parser->current.type = PM_TOKEN_EMBDOC_BEGIN;
9338 parser_lex_callback(parser);
9339
9340 // Now, create a comment that is going to be attached to the parser.
9341 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
9342 if (comment == NULL) return PM_TOKEN_EOF;
9343
9344 // Now, loop until we find the end of the embedded documentation or the end
9345 // of the file.
9346 while (parser->current.end + 4 <= parser->end) {
9347 parser->current.start = parser->current.end;
9348
9349 // If we've hit the end of the embedded documentation then we'll return
9350 // that token here.
9351 if (
9352 (memcmp(parser->current.end, "=end", 4) == 0) &&
9353 (
9354 (parser->current.end + 4 == parser->end) || // end of file
9355 pm_char_is_whitespace(parser->current.end[4]) || // whitespace
9356 (parser->current.end[4] == '\0') || // NUL or end of script
9357 (parser->current.end[4] == '\004') || // ^D
9358 (parser->current.end[4] == '\032') // ^Z
9359 )
9360 ) {
9361 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9362
9363 if (newline == NULL) {
9364 parser->current.end = parser->end;
9365 } else {
9366 pm_newline_list_append(&parser->newline_list, newline);
9367 parser->current.end = newline + 1;
9368 }
9369
9370 parser->current.type = PM_TOKEN_EMBDOC_END;
9371 parser_lex_callback(parser);
9372
9373 comment->location.end = parser->current.end;
9374 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9375
9376 return PM_TOKEN_EMBDOC_END;
9377 }
9378
9379 // Otherwise, we'll parse until the end of the line and return a line of
9380 // embedded documentation.
9381 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9382
9383 if (newline == NULL) {
9384 parser->current.end = parser->end;
9385 } else {
9386 pm_newline_list_append(&parser->newline_list, newline);
9387 parser->current.end = newline + 1;
9388 }
9389
9390 parser->current.type = PM_TOKEN_EMBDOC_LINE;
9391 parser_lex_callback(parser);
9392 }
9393
9394 pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
9395
9396 comment->location.end = parser->current.end;
9397 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9398
9399 return PM_TOKEN_EOF;
9400}
9401
9407static inline void
9408parser_lex_ignored_newline(pm_parser_t *parser) {
9409 parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
9410 parser_lex_callback(parser);
9411}
9412
9422static inline void
9423parser_flush_heredoc_end(pm_parser_t *parser) {
9424 assert(parser->heredoc_end <= parser->end);
9425 parser->next_start = parser->heredoc_end;
9426 parser->heredoc_end = NULL;
9427}
9428
9432static bool
9433parser_end_of_line_p(const pm_parser_t *parser) {
9434 const uint8_t *cursor = parser->current.end;
9435
9436 while (cursor < parser->end && *cursor != '\n' && *cursor != '#') {
9437 if (!pm_char_is_inline_whitespace(*cursor++)) return false;
9438 }
9439
9440 return true;
9441}
9442
9461typedef struct {
9467
9472 const uint8_t *cursor;
9474
9494
9498static inline void
9499pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
9500 pm_buffer_append_byte(&token_buffer->buffer, byte);
9501}
9502
9503static inline void
9504pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
9505 pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
9506}
9507
9511static inline size_t
9512parser_char_width(const pm_parser_t *parser) {
9513 size_t width;
9514 if (parser->encoding_changed) {
9515 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9516 } else {
9517 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
9518 }
9519
9520 // TODO: If the character is invalid in the given encoding, then we'll just
9521 // push one byte into the buffer. This should actually be an error.
9522 return (width == 0 ? 1 : width);
9523}
9524
9528static void
9529pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
9530 size_t width = parser_char_width(parser);
9531 pm_buffer_append_bytes(&token_buffer->buffer, parser->current.end, width);
9532 parser->current.end += width;
9533}
9534
9535static void
9536pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
9537 size_t width = parser_char_width(parser);
9538 pm_buffer_append_bytes(&token_buffer->base.buffer, parser->current.end, width);
9539 pm_buffer_append_bytes(&token_buffer->regexp_buffer, parser->current.end, width);
9540 parser->current.end += width;
9541}
9542
9543static bool
9544pm_slice_ascii_only_p(const uint8_t *value, size_t length) {
9545 for (size_t index = 0; index < length; index++) {
9546 if (value[index] & 0x80) return false;
9547 }
9548
9549 return true;
9550}
9551
9558static inline void
9559pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9560 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->buffer), pm_buffer_length(&token_buffer->buffer));
9561}
9562
9563static inline void
9564pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9565 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->base.buffer), pm_buffer_length(&token_buffer->base.buffer));
9566 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p((const uint8_t *) pm_buffer_value(&token_buffer->regexp_buffer), pm_buffer_length(&token_buffer->regexp_buffer));
9567 pm_buffer_free(&token_buffer->regexp_buffer);
9568}
9569
9579static void
9580pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9581 if (token_buffer->cursor == NULL) {
9582 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
9583 } else {
9584 pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
9585 pm_token_buffer_copy(parser, token_buffer);
9586 }
9587}
9588
9589static void
9590pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9591 if (token_buffer->base.cursor == NULL) {
9592 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
9593 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p(parser->current.start, (size_t) (parser->current.end - parser->current.start));
9594 } else {
9595 pm_buffer_append_bytes(&token_buffer->base.buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
9596 pm_buffer_append_bytes(&token_buffer->regexp_buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
9597 pm_regexp_token_buffer_copy(parser, token_buffer);
9598 }
9599}
9600
9601#define PM_TOKEN_BUFFER_DEFAULT_SIZE 16
9602
9611static void
9612pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9613 const uint8_t *start;
9614 if (token_buffer->cursor == NULL) {
9615 pm_buffer_init_capacity(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9616 start = parser->current.start;
9617 } else {
9618 start = token_buffer->cursor;
9619 }
9620
9621 const uint8_t *end = parser->current.end - 1;
9622 assert(end >= start);
9623 pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
9624
9625 token_buffer->cursor = end;
9626}
9627
9628static void
9629pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9630 const uint8_t *start;
9631 if (token_buffer->base.cursor == NULL) {
9632 pm_buffer_init_capacity(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9633 pm_buffer_init_capacity(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9634 start = parser->current.start;
9635 } else {
9636 start = token_buffer->base.cursor;
9637 }
9638
9639 const uint8_t *end = parser->current.end - 1;
9640 pm_buffer_append_bytes(&token_buffer->base.buffer, start, (size_t) (end - start));
9641 pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, (size_t) (end - start));
9642
9643 token_buffer->base.cursor = end;
9644}
9645
9646#undef PM_TOKEN_BUFFER_DEFAULT_SIZE
9647
9652static inline size_t
9653pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
9654 size_t whitespace = 0;
9655
9656 switch (indent) {
9657 case PM_HEREDOC_INDENT_NONE:
9658 // Do nothing, we can't match a terminator with
9659 // indentation and there's no need to calculate common
9660 // whitespace.
9661 break;
9662 case PM_HEREDOC_INDENT_DASH:
9663 // Skip past inline whitespace.
9664 *cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
9665 break;
9666 case PM_HEREDOC_INDENT_TILDE:
9667 // Skip past inline whitespace and calculate common
9668 // whitespace.
9669 while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
9670 if (**cursor == '\t') {
9671 whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
9672 } else {
9673 whitespace++;
9674 }
9675 (*cursor)++;
9676 }
9677
9678 break;
9679 }
9680
9681 return whitespace;
9682}
9683
9688static uint8_t
9689pm_lex_percent_delimiter(pm_parser_t *parser) {
9690 size_t eol_length = match_eol(parser);
9691
9692 if (eol_length) {
9693 if (parser->heredoc_end) {
9694 // If we have already lexed a heredoc, then the newline has already
9695 // been added to the list. In this case we want to just flush the
9696 // heredoc end.
9697 parser_flush_heredoc_end(parser);
9698 } else {
9699 // Otherwise, we'll add the newline to the list of newlines.
9700 pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
9701 }
9702
9703 uint8_t delimiter = *parser->current.end;
9704
9705 // If our delimiter is \r\n, we want to treat it as if it's \n.
9706 // For example, %\r\nfoo\r\n should be "foo"
9707 if (eol_length == 2) {
9708 delimiter = *(parser->current.end + 1);
9709 }
9710
9711 parser->current.end += eol_length;
9712 return delimiter;
9713 }
9714
9715 return *parser->current.end++;
9716}
9717
9722#define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
9723
9730static void
9731parser_lex(pm_parser_t *parser) {
9732 assert(parser->current.end <= parser->end);
9733 parser->previous = parser->current;
9734
9735 // This value mirrors cmd_state from CRuby.
9736 bool previous_command_start = parser->command_start;
9737 parser->command_start = false;
9738
9739 // This is used to communicate to the newline lexing function that we've
9740 // already seen a comment.
9741 bool lexed_comment = false;
9742
9743 // Here we cache the current value of the semantic token seen flag. This is
9744 // used to reset it in case we find a token that shouldn't flip this flag.
9745 unsigned int semantic_token_seen = parser->semantic_token_seen;
9746 parser->semantic_token_seen = true;
9747
9748 switch (parser->lex_modes.current->mode) {
9749 case PM_LEX_DEFAULT:
9750 case PM_LEX_EMBEXPR:
9751 case PM_LEX_EMBVAR:
9752
9753 // We have a specific named label here because we are going to jump back to
9754 // this location in the event that we have lexed a token that should not be
9755 // returned to the parser. This includes comments, ignored newlines, and
9756 // invalid tokens of some form.
9757 lex_next_token: {
9758 // If we have the special next_start pointer set, then we're going to jump
9759 // to that location and start lexing from there.
9760 if (parser->next_start != NULL) {
9761 parser->current.end = parser->next_start;
9762 parser->next_start = NULL;
9763 }
9764
9765 // This value mirrors space_seen from CRuby. It tracks whether or not
9766 // space has been eaten before the start of the next token.
9767 bool space_seen = false;
9768
9769 // First, we're going to skip past any whitespace at the front of the next
9770 // token.
9771 bool chomping = true;
9772 while (parser->current.end < parser->end && chomping) {
9773 switch (*parser->current.end) {
9774 case ' ':
9775 case '\t':
9776 case '\f':
9777 case '\v':
9778 parser->current.end++;
9779 space_seen = true;
9780 break;
9781 case '\r':
9782 if (match_eol_offset(parser, 1)) {
9783 chomping = false;
9784 } else {
9785 pm_parser_warn(parser, parser->current.end, parser->current.end + 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
9786 parser->current.end++;
9787 space_seen = true;
9788 }
9789 break;
9790 case '\\': {
9791 size_t eol_length = match_eol_offset(parser, 1);
9792 if (eol_length) {
9793 if (parser->heredoc_end) {
9794 parser->current.end = parser->heredoc_end;
9795 parser->heredoc_end = NULL;
9796 } else {
9797 parser->current.end += eol_length + 1;
9798 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
9799 space_seen = true;
9800 }
9801 } else if (pm_char_is_inline_whitespace(*parser->current.end)) {
9802 parser->current.end += 2;
9803 } else {
9804 chomping = false;
9805 }
9806
9807 break;
9808 }
9809 default:
9810 chomping = false;
9811 break;
9812 }
9813 }
9814
9815 // Next, we'll set to start of this token to be the current end.
9816 parser->current.start = parser->current.end;
9817
9818 // We'll check if we're at the end of the file. If we are, then we
9819 // need to return the EOF token.
9820 if (parser->current.end >= parser->end) {
9821 // If we hit EOF, but the EOF came immediately after a newline,
9822 // set the start of the token to the newline. This way any EOF
9823 // errors will be reported as happening on that line rather than
9824 // a line after. For example "foo(\n" should report an error
9825 // on line 1 even though EOF technically occurs on line 2.
9826 if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
9827 parser->current.start -= 1;
9828 }
9829 LEX(PM_TOKEN_EOF);
9830 }
9831
9832 // Finally, we'll check the current character to determine the next
9833 // token.
9834 switch (*parser->current.end++) {
9835 case '\0': // NUL or end of script
9836 case '\004': // ^D
9837 case '\032': // ^Z
9838 parser->current.end--;
9839 LEX(PM_TOKEN_EOF);
9840
9841 case '#': { // comments
9842 const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
9843 parser->current.end = ending == NULL ? parser->end : ending;
9844
9845 // If we found a comment while lexing, then we're going to
9846 // add it to the list of comments in the file and keep
9847 // lexing.
9848 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_INLINE);
9849 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9850
9851 if (ending) parser->current.end++;
9852 parser->current.type = PM_TOKEN_COMMENT;
9853 parser_lex_callback(parser);
9854
9855 // Here, parse the comment to see if it's a magic comment
9856 // and potentially change state on the parser.
9857 if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
9858 ptrdiff_t length = parser->current.end - parser->current.start;
9859
9860 // If we didn't find a magic comment within the first
9861 // pass and we're at the start of the file, then we need
9862 // to do another pass to potentially find other patterns
9863 // for encoding comments.
9864 if (length >= 10 && !parser->encoding_locked) {
9865 parser_lex_magic_comment_encoding(parser);
9866 }
9867 }
9868
9869 lexed_comment = true;
9870 }
9872 case '\r':
9873 case '\n': {
9874 parser->semantic_token_seen = semantic_token_seen & 0x1;
9875 size_t eol_length = match_eol_at(parser, parser->current.end - 1);
9876
9877 if (eol_length) {
9878 // The only way you can have carriage returns in this
9879 // particular loop is if you have a carriage return
9880 // followed by a newline. In that case we'll just skip
9881 // over the carriage return and continue lexing, in
9882 // order to make it so that the newline token
9883 // encapsulates both the carriage return and the
9884 // newline. Note that we need to check that we haven't
9885 // already lexed a comment here because that falls
9886 // through into here as well.
9887 if (!lexed_comment) {
9888 parser->current.end += eol_length - 1; // skip CR
9889 }
9890
9891 if (parser->heredoc_end == NULL) {
9892 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
9893 }
9894 }
9895
9896 if (parser->heredoc_end) {
9897 parser_flush_heredoc_end(parser);
9898 }
9899
9900 // If this is an ignored newline, then we can continue lexing after
9901 // calling the callback with the ignored newline token.
9902 switch (lex_state_ignored_p(parser)) {
9903 case PM_IGNORED_NEWLINE_NONE:
9904 break;
9905 case PM_IGNORED_NEWLINE_PATTERN:
9906 if (parser->pattern_matching_newlines || parser->in_keyword_arg) {
9907 if (!lexed_comment) parser_lex_ignored_newline(parser);
9908 lex_state_set(parser, PM_LEX_STATE_BEG);
9909 parser->command_start = true;
9910 parser->current.type = PM_TOKEN_NEWLINE;
9911 return;
9912 }
9914 case PM_IGNORED_NEWLINE_ALL:
9915 if (!lexed_comment) parser_lex_ignored_newline(parser);
9916 lexed_comment = false;
9917 goto lex_next_token;
9918 }
9919
9920 // Here we need to look ahead and see if there is a call operator
9921 // (either . or &.) that starts the next line. If there is, then this
9922 // is going to become an ignored newline and we're going to instead
9923 // return the call operator.
9924 const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
9925 next_content += pm_strspn_inline_whitespace(next_content, parser->end - next_content);
9926
9927 if (next_content < parser->end) {
9928 // If we hit a comment after a newline, then we're going to check
9929 // if it's ignored or if it's followed by a method call ('.').
9930 // If it is, then we're going to call the
9931 // callback with an ignored newline and then continue lexing.
9932 // Otherwise we'll return a regular newline.
9933 if (next_content[0] == '#') {
9934 // Here we look for a "." or "&." following a "\n".
9935 const uint8_t *following = next_newline(next_content, parser->end - next_content);
9936
9937 while (following && (following + 1 < parser->end)) {
9938 following++;
9939 following += pm_strspn_inline_whitespace(following, parser->end - following);
9940
9941 // If this is not followed by a comment, then we can break out
9942 // of this loop.
9943 if (peek_at(parser, following) != '#') break;
9944
9945 // If there is a comment, then we need to find the end of the
9946 // comment and continue searching from there.
9947 following = next_newline(following, parser->end - following);
9948 }
9949
9950 // If the lex state was ignored, we will lex the
9951 // ignored newline.
9952 if (lex_state_ignored_p(parser)) {
9953 if (!lexed_comment) parser_lex_ignored_newline(parser);
9954 lexed_comment = false;
9955 goto lex_next_token;
9956 }
9957
9958 // If we hit a '.' or a '&.' we will lex the ignored
9959 // newline.
9960 if (following && (
9961 (peek_at(parser, following) == '.') ||
9962 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
9963 )) {
9964 if (!lexed_comment) parser_lex_ignored_newline(parser);
9965 lexed_comment = false;
9966 goto lex_next_token;
9967 }
9968
9969
9970 // If we are parsing as CRuby 4.0 or later and we
9971 // hit a '&&' or a '||' then we will lex the ignored
9972 // newline.
9973 if (
9975 following && (
9976 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '&') ||
9977 (peek_at(parser, following) == '|' && peek_at(parser, following + 1) == '|') ||
9978 (peek_at(parser, following) == 'a' && peek_at(parser, following + 1) == 'n' && peek_at(parser, following + 2) == 'd' && !char_is_identifier(parser, following + 3, parser->end - (following + 3))) ||
9979 (peek_at(parser, following) == 'o' && peek_at(parser, following + 1) == 'r' && !char_is_identifier(parser, following + 2, parser->end - (following + 2)))
9980 )
9981 ) {
9982 if (!lexed_comment) parser_lex_ignored_newline(parser);
9983 lexed_comment = false;
9984 goto lex_next_token;
9985 }
9986 }
9987
9988 // If we hit a . after a newline, then we're in a call chain and
9989 // we need to return the call operator.
9990 if (next_content[0] == '.') {
9991 // To match ripper, we need to emit an ignored newline even though
9992 // it's a real newline in the case that we have a beginless range
9993 // on a subsequent line.
9994 if (peek_at(parser, next_content + 1) == '.') {
9995 if (!lexed_comment) parser_lex_ignored_newline(parser);
9996 lex_state_set(parser, PM_LEX_STATE_BEG);
9997 parser->command_start = true;
9998 parser->current.type = PM_TOKEN_NEWLINE;
9999 return;
10000 }
10001
10002 if (!lexed_comment) parser_lex_ignored_newline(parser);
10003 lex_state_set(parser, PM_LEX_STATE_DOT);
10004 parser->current.start = next_content;
10005 parser->current.end = next_content + 1;
10006 parser->next_start = NULL;
10007 LEX(PM_TOKEN_DOT);
10008 }
10009
10010 // If we hit a &. after a newline, then we're in a call chain and
10011 // we need to return the call operator.
10012 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
10013 if (!lexed_comment) parser_lex_ignored_newline(parser);
10014 lex_state_set(parser, PM_LEX_STATE_DOT);
10015 parser->current.start = next_content;
10016 parser->current.end = next_content + 2;
10017 parser->next_start = NULL;
10018 LEX(PM_TOKEN_AMPERSAND_DOT);
10019 }
10020
10021 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
10022 // If we hit an && then we are in a logical chain
10023 // and we need to return the logical operator.
10024 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '&') {
10025 if (!lexed_comment) parser_lex_ignored_newline(parser);
10026 lex_state_set(parser, PM_LEX_STATE_BEG);
10027 parser->current.start = next_content;
10028 parser->current.end = next_content + 2;
10029 parser->next_start = NULL;
10030 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
10031 }
10032
10033 // If we hit a || then we are in a logical chain and
10034 // we need to return the logical operator.
10035 if (peek_at(parser, next_content) == '|' && peek_at(parser, next_content + 1) == '|') {
10036 if (!lexed_comment) parser_lex_ignored_newline(parser);
10037 lex_state_set(parser, PM_LEX_STATE_BEG);
10038 parser->current.start = next_content;
10039 parser->current.end = next_content + 2;
10040 parser->next_start = NULL;
10041 LEX(PM_TOKEN_PIPE_PIPE);
10042 }
10043
10044 // If we hit an 'and' then we are in a logical chain
10045 // and we need to return the logical operator.
10046 if (
10047 peek_at(parser, next_content) == 'a' &&
10048 peek_at(parser, next_content + 1) == 'n' &&
10049 peek_at(parser, next_content + 2) == 'd' &&
10050 !char_is_identifier(parser, next_content + 3, parser->end - (next_content + 3))
10051 ) {
10052 if (!lexed_comment) parser_lex_ignored_newline(parser);
10053 lex_state_set(parser, PM_LEX_STATE_BEG);
10054 parser->current.start = next_content;
10055 parser->current.end = next_content + 3;
10056 parser->next_start = NULL;
10057 parser->command_start = true;
10058 LEX(PM_TOKEN_KEYWORD_AND);
10059 }
10060
10061 // If we hit a 'or' then we are in a logical chain
10062 // and we need to return the logical operator.
10063 if (
10064 peek_at(parser, next_content) == 'o' &&
10065 peek_at(parser, next_content + 1) == 'r' &&
10066 !char_is_identifier(parser, next_content + 2, parser->end - (next_content + 2))
10067 ) {
10068 if (!lexed_comment) parser_lex_ignored_newline(parser);
10069 lex_state_set(parser, PM_LEX_STATE_BEG);
10070 parser->current.start = next_content;
10071 parser->current.end = next_content + 2;
10072 parser->next_start = NULL;
10073 parser->command_start = true;
10074 LEX(PM_TOKEN_KEYWORD_OR);
10075 }
10076 }
10077 }
10078
10079 // At this point we know this is a regular newline, and we can set the
10080 // necessary state and return the token.
10081 lex_state_set(parser, PM_LEX_STATE_BEG);
10082 parser->command_start = true;
10083 parser->current.type = PM_TOKEN_NEWLINE;
10084 if (!lexed_comment) parser_lex_callback(parser);
10085 return;
10086 }
10087
10088 // ,
10089 case ',':
10090 if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
10091 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
10092 }
10093
10094 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10095 LEX(PM_TOKEN_COMMA);
10096
10097 // (
10098 case '(': {
10099 pm_token_type_t type = PM_TOKEN_PARENTHESIS_LEFT;
10100
10101 if (space_seen && (lex_state_arg_p(parser) || parser->lex_state == (PM_LEX_STATE_END | PM_LEX_STATE_LABEL))) {
10102 type = PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES;
10103 }
10104
10105 parser->enclosure_nesting++;
10106 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10107 pm_do_loop_stack_push(parser, false);
10108 LEX(type);
10109 }
10110
10111 // )
10112 case ')':
10113 parser->enclosure_nesting--;
10114 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10115 pm_do_loop_stack_pop(parser);
10116 LEX(PM_TOKEN_PARENTHESIS_RIGHT);
10117
10118 // ;
10119 case ';':
10120 lex_state_set(parser, PM_LEX_STATE_BEG);
10121 parser->command_start = true;
10122 LEX(PM_TOKEN_SEMICOLON);
10123
10124 // [ [] []=
10125 case '[':
10126 parser->enclosure_nesting++;
10127 pm_token_type_t type = PM_TOKEN_BRACKET_LEFT;
10128
10129 if (lex_state_operator_p(parser)) {
10130 if (match(parser, ']')) {
10131 parser->enclosure_nesting--;
10132 lex_state_set(parser, PM_LEX_STATE_ARG);
10133 LEX(match(parser, '=') ? PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL : PM_TOKEN_BRACKET_LEFT_RIGHT);
10134 }
10135
10136 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABEL);
10137 LEX(type);
10138 }
10139
10140 if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, PM_LEX_STATE_LABELED)))) {
10141 type = PM_TOKEN_BRACKET_LEFT_ARRAY;
10142 }
10143
10144 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10145 pm_do_loop_stack_push(parser, false);
10146 LEX(type);
10147
10148 // ]
10149 case ']':
10150 parser->enclosure_nesting--;
10151 lex_state_set(parser, PM_LEX_STATE_END);
10152 pm_do_loop_stack_pop(parser);
10153 LEX(PM_TOKEN_BRACKET_RIGHT);
10154
10155 // {
10156 case '{': {
10157 pm_token_type_t type = PM_TOKEN_BRACE_LEFT;
10158
10159 if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
10160 // This { begins a lambda
10161 parser->command_start = true;
10162 lex_state_set(parser, PM_LEX_STATE_BEG);
10163 type = PM_TOKEN_LAMBDA_BEGIN;
10164 } else if (lex_state_p(parser, PM_LEX_STATE_LABELED)) {
10165 // This { begins a hash literal
10166 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10167 } else if (lex_state_p(parser, PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_END | PM_LEX_STATE_ENDFN)) {
10168 // This { begins a block
10169 parser->command_start = true;
10170 lex_state_set(parser, PM_LEX_STATE_BEG);
10171 } else if (lex_state_p(parser, PM_LEX_STATE_ENDARG)) {
10172 // This { begins a block on a command
10173 parser->command_start = true;
10174 lex_state_set(parser, PM_LEX_STATE_BEG);
10175 } else {
10176 // This { begins a hash literal
10177 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10178 }
10179
10180 parser->enclosure_nesting++;
10181 parser->brace_nesting++;
10182 pm_do_loop_stack_push(parser, false);
10183
10184 LEX(type);
10185 }
10186
10187 // }
10188 case '}':
10189 parser->enclosure_nesting--;
10190 pm_do_loop_stack_pop(parser);
10191
10192 if ((parser->lex_modes.current->mode == PM_LEX_EMBEXPR) && (parser->brace_nesting == 0)) {
10193 lex_mode_pop(parser);
10194 LEX(PM_TOKEN_EMBEXPR_END);
10195 }
10196
10197 parser->brace_nesting--;
10198 lex_state_set(parser, PM_LEX_STATE_END);
10199 LEX(PM_TOKEN_BRACE_RIGHT);
10200
10201 // * ** **= *=
10202 case '*': {
10203 if (match(parser, '*')) {
10204 if (match(parser, '=')) {
10205 lex_state_set(parser, PM_LEX_STATE_BEG);
10206 LEX(PM_TOKEN_STAR_STAR_EQUAL);
10207 }
10208
10209 pm_token_type_t type = PM_TOKEN_STAR_STAR;
10210
10211 if (lex_state_spcarg_p(parser, space_seen)) {
10212 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR);
10213 type = PM_TOKEN_USTAR_STAR;
10214 } else if (lex_state_beg_p(parser)) {
10215 type = PM_TOKEN_USTAR_STAR;
10216 } else if (ambiguous_operator_p(parser, space_seen)) {
10217 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
10218 }
10219
10220 if (lex_state_operator_p(parser)) {
10221 lex_state_set(parser, PM_LEX_STATE_ARG);
10222 } else {
10223 lex_state_set(parser, PM_LEX_STATE_BEG);
10224 }
10225
10226 LEX(type);
10227 }
10228
10229 if (match(parser, '=')) {
10230 lex_state_set(parser, PM_LEX_STATE_BEG);
10231 LEX(PM_TOKEN_STAR_EQUAL);
10232 }
10233
10234 pm_token_type_t type = PM_TOKEN_STAR;
10235
10236 if (lex_state_spcarg_p(parser, space_seen)) {
10237 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
10238 type = PM_TOKEN_USTAR;
10239 } else if (lex_state_beg_p(parser)) {
10240 type = PM_TOKEN_USTAR;
10241 } else if (ambiguous_operator_p(parser, space_seen)) {
10242 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
10243 }
10244
10245 if (lex_state_operator_p(parser)) {
10246 lex_state_set(parser, PM_LEX_STATE_ARG);
10247 } else {
10248 lex_state_set(parser, PM_LEX_STATE_BEG);
10249 }
10250
10251 LEX(type);
10252 }
10253
10254 // ! != !~ !@
10255 case '!':
10256 if (lex_state_operator_p(parser)) {
10257 lex_state_set(parser, PM_LEX_STATE_ARG);
10258 if (match(parser, '@')) {
10259 LEX(PM_TOKEN_BANG);
10260 }
10261 } else {
10262 lex_state_set(parser, PM_LEX_STATE_BEG);
10263 }
10264
10265 if (match(parser, '=')) {
10266 LEX(PM_TOKEN_BANG_EQUAL);
10267 }
10268
10269 if (match(parser, '~')) {
10270 LEX(PM_TOKEN_BANG_TILDE);
10271 }
10272
10273 LEX(PM_TOKEN_BANG);
10274
10275 // = => =~ == === =begin
10276 case '=':
10277 if (
10278 current_token_starts_line(parser) &&
10279 (parser->current.end + 5 <= parser->end) &&
10280 memcmp(parser->current.end, "begin", 5) == 0 &&
10281 (pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) == '\0'))
10282 ) {
10283 pm_token_type_t type = lex_embdoc(parser);
10284 if (type == PM_TOKEN_EOF) {
10285 LEX(type);
10286 }
10287
10288 goto lex_next_token;
10289 }
10290
10291 if (lex_state_operator_p(parser)) {
10292 lex_state_set(parser, PM_LEX_STATE_ARG);
10293 } else {
10294 lex_state_set(parser, PM_LEX_STATE_BEG);
10295 }
10296
10297 if (match(parser, '>')) {
10298 LEX(PM_TOKEN_EQUAL_GREATER);
10299 }
10300
10301 if (match(parser, '~')) {
10302 LEX(PM_TOKEN_EQUAL_TILDE);
10303 }
10304
10305 if (match(parser, '=')) {
10306 LEX(match(parser, '=') ? PM_TOKEN_EQUAL_EQUAL_EQUAL : PM_TOKEN_EQUAL_EQUAL);
10307 }
10308
10309 LEX(PM_TOKEN_EQUAL);
10310
10311 // < << <<= <= <=>
10312 case '<':
10313 if (match(parser, '<')) {
10314 if (
10315 !lex_state_p(parser, PM_LEX_STATE_DOT | PM_LEX_STATE_CLASS) &&
10316 !lex_state_end_p(parser) &&
10317 (!lex_state_p(parser, PM_LEX_STATE_ARG_ANY) || lex_state_p(parser, PM_LEX_STATE_LABELED) || space_seen)
10318 ) {
10319 const uint8_t *end = parser->current.end;
10320
10321 pm_heredoc_quote_t quote = PM_HEREDOC_QUOTE_NONE;
10322 pm_heredoc_indent_t indent = PM_HEREDOC_INDENT_NONE;
10323
10324 if (match(parser, '-')) {
10325 indent = PM_HEREDOC_INDENT_DASH;
10326 }
10327 else if (match(parser, '~')) {
10328 indent = PM_HEREDOC_INDENT_TILDE;
10329 }
10330
10331 if (match(parser, '`')) {
10332 quote = PM_HEREDOC_QUOTE_BACKTICK;
10333 }
10334 else if (match(parser, '"')) {
10335 quote = PM_HEREDOC_QUOTE_DOUBLE;
10336 }
10337 else if (match(parser, '\'')) {
10338 quote = PM_HEREDOC_QUOTE_SINGLE;
10339 }
10340
10341 const uint8_t *ident_start = parser->current.end;
10342 size_t width = 0;
10343
10344 if (parser->current.end >= parser->end) {
10345 parser->current.end = end;
10346 } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) == 0) {
10347 parser->current.end = end;
10348 } else {
10349 if (quote == PM_HEREDOC_QUOTE_NONE) {
10350 parser->current.end += width;
10351
10352 while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end))) {
10353 parser->current.end += width;
10354 }
10355 } else {
10356 // If we have quotes, then we're going to go until we find the
10357 // end quote.
10358 while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
10359 if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
10360 parser->current.end++;
10361 }
10362 }
10363
10364 size_t ident_length = (size_t) (parser->current.end - ident_start);
10365 bool ident_error = false;
10366
10367 if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
10368 pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
10369 ident_error = true;
10370 }
10371
10372 parser->explicit_encoding = NULL;
10373 lex_mode_push(parser, (pm_lex_mode_t) {
10374 .mode = PM_LEX_HEREDOC,
10375 .as.heredoc = {
10376 .base = {
10377 .ident_start = ident_start,
10378 .ident_length = ident_length,
10379 .quote = quote,
10380 .indent = indent
10381 },
10382 .next_start = parser->current.end,
10383 .common_whitespace = NULL,
10384 .line_continuation = false
10385 }
10386 });
10387
10388 if (parser->heredoc_end == NULL) {
10389 const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
10390
10391 if (body_start == NULL) {
10392 // If there is no newline after the heredoc identifier, then
10393 // this is not a valid heredoc declaration. In this case we
10394 // will add an error, but we will still return a heredoc
10395 // start.
10396 if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
10397 body_start = parser->end;
10398 } else {
10399 // Otherwise, we want to indicate that the body of the
10400 // heredoc starts on the character after the next newline.
10401 pm_newline_list_append(&parser->newline_list, body_start);
10402 body_start++;
10403 }
10404
10405 parser->next_start = body_start;
10406 } else {
10407 parser->next_start = parser->heredoc_end;
10408 }
10409
10410 LEX(PM_TOKEN_HEREDOC_START);
10411 }
10412 }
10413
10414 if (match(parser, '=')) {
10415 lex_state_set(parser, PM_LEX_STATE_BEG);
10416 LEX(PM_TOKEN_LESS_LESS_EQUAL);
10417 }
10418
10419 if (ambiguous_operator_p(parser, space_seen)) {
10420 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
10421 }
10422
10423 if (lex_state_operator_p(parser)) {
10424 lex_state_set(parser, PM_LEX_STATE_ARG);
10425 } else {
10426 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
10427 lex_state_set(parser, PM_LEX_STATE_BEG);
10428 }
10429
10430 LEX(PM_TOKEN_LESS_LESS);
10431 }
10432
10433 if (lex_state_operator_p(parser)) {
10434 lex_state_set(parser, PM_LEX_STATE_ARG);
10435 } else {
10436 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
10437 lex_state_set(parser, PM_LEX_STATE_BEG);
10438 }
10439
10440 if (match(parser, '=')) {
10441 if (match(parser, '>')) {
10442 LEX(PM_TOKEN_LESS_EQUAL_GREATER);
10443 }
10444
10445 LEX(PM_TOKEN_LESS_EQUAL);
10446 }
10447
10448 LEX(PM_TOKEN_LESS);
10449
10450 // > >> >>= >=
10451 case '>':
10452 if (match(parser, '>')) {
10453 if (lex_state_operator_p(parser)) {
10454 lex_state_set(parser, PM_LEX_STATE_ARG);
10455 } else {
10456 lex_state_set(parser, PM_LEX_STATE_BEG);
10457 }
10458 LEX(match(parser, '=') ? PM_TOKEN_GREATER_GREATER_EQUAL : PM_TOKEN_GREATER_GREATER);
10459 }
10460
10461 if (lex_state_operator_p(parser)) {
10462 lex_state_set(parser, PM_LEX_STATE_ARG);
10463 } else {
10464 lex_state_set(parser, PM_LEX_STATE_BEG);
10465 }
10466
10467 LEX(match(parser, '=') ? PM_TOKEN_GREATER_EQUAL : PM_TOKEN_GREATER);
10468
10469 // double-quoted string literal
10470 case '"': {
10471 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
10472 lex_mode_push_string(parser, true, label_allowed, '\0', '"');
10473 LEX(PM_TOKEN_STRING_BEGIN);
10474 }
10475
10476 // xstring literal
10477 case '`': {
10478 if (lex_state_p(parser, PM_LEX_STATE_FNAME)) {
10479 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10480 LEX(PM_TOKEN_BACKTICK);
10481 }
10482
10483 if (lex_state_p(parser, PM_LEX_STATE_DOT)) {
10484 if (previous_command_start) {
10485 lex_state_set(parser, PM_LEX_STATE_CMDARG);
10486 } else {
10487 lex_state_set(parser, PM_LEX_STATE_ARG);
10488 }
10489
10490 LEX(PM_TOKEN_BACKTICK);
10491 }
10492
10493 lex_mode_push_string(parser, true, false, '\0', '`');
10494 LEX(PM_TOKEN_BACKTICK);
10495 }
10496
10497 // single-quoted string literal
10498 case '\'': {
10499 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
10500 lex_mode_push_string(parser, false, label_allowed, '\0', '\'');
10501 LEX(PM_TOKEN_STRING_BEGIN);
10502 }
10503
10504 // ? character literal
10505 case '?':
10506 LEX(lex_question_mark(parser));
10507
10508 // & && &&= &=
10509 case '&': {
10510 if (match(parser, '&')) {
10511 lex_state_set(parser, PM_LEX_STATE_BEG);
10512
10513 if (match(parser, '=')) {
10514 LEX(PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
10515 }
10516
10517 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
10518 }
10519
10520 if (match(parser, '=')) {
10521 lex_state_set(parser, PM_LEX_STATE_BEG);
10522 LEX(PM_TOKEN_AMPERSAND_EQUAL);
10523 }
10524
10525 if (match(parser, '.')) {
10526 lex_state_set(parser, PM_LEX_STATE_DOT);
10527 LEX(PM_TOKEN_AMPERSAND_DOT);
10528 }
10529
10530 pm_token_type_t type = PM_TOKEN_AMPERSAND;
10531 if (lex_state_spcarg_p(parser, space_seen)) {
10532 if ((peek(parser) != ':') || (peek_offset(parser, 1) == '\0')) {
10533 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
10534 } else {
10535 const uint8_t delim = peek_offset(parser, 1);
10536
10537 if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1, parser->end - (parser->current.end + 1))) {
10538 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
10539 }
10540 }
10541
10542 type = PM_TOKEN_UAMPERSAND;
10543 } else if (lex_state_beg_p(parser)) {
10544 type = PM_TOKEN_UAMPERSAND;
10545 } else if (ambiguous_operator_p(parser, space_seen)) {
10546 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
10547 }
10548
10549 if (lex_state_operator_p(parser)) {
10550 lex_state_set(parser, PM_LEX_STATE_ARG);
10551 } else {
10552 lex_state_set(parser, PM_LEX_STATE_BEG);
10553 }
10554
10555 LEX(type);
10556 }
10557
10558 // | || ||= |=
10559 case '|':
10560 if (match(parser, '|')) {
10561 if (match(parser, '=')) {
10562 lex_state_set(parser, PM_LEX_STATE_BEG);
10563 LEX(PM_TOKEN_PIPE_PIPE_EQUAL);
10564 }
10565
10566 if (lex_state_p(parser, PM_LEX_STATE_BEG)) {
10567 parser->current.end--;
10568 LEX(PM_TOKEN_PIPE);
10569 }
10570
10571 lex_state_set(parser, PM_LEX_STATE_BEG);
10572 LEX(PM_TOKEN_PIPE_PIPE);
10573 }
10574
10575 if (match(parser, '=')) {
10576 lex_state_set(parser, PM_LEX_STATE_BEG);
10577 LEX(PM_TOKEN_PIPE_EQUAL);
10578 }
10579
10580 if (lex_state_operator_p(parser)) {
10581 lex_state_set(parser, PM_LEX_STATE_ARG);
10582 } else {
10583 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10584 }
10585
10586 LEX(PM_TOKEN_PIPE);
10587
10588 // + += +@
10589 case '+': {
10590 if (lex_state_operator_p(parser)) {
10591 lex_state_set(parser, PM_LEX_STATE_ARG);
10592
10593 if (match(parser, '@')) {
10594 LEX(PM_TOKEN_UPLUS);
10595 }
10596
10597 LEX(PM_TOKEN_PLUS);
10598 }
10599
10600 if (match(parser, '=')) {
10601 lex_state_set(parser, PM_LEX_STATE_BEG);
10602 LEX(PM_TOKEN_PLUS_EQUAL);
10603 }
10604
10605 if (
10606 lex_state_beg_p(parser) ||
10607 (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
10608 ) {
10609 lex_state_set(parser, PM_LEX_STATE_BEG);
10610
10611 if (pm_char_is_decimal_digit(peek(parser))) {
10612 parser->current.end++;
10613 pm_token_type_t type = lex_numeric(parser);
10614 lex_state_set(parser, PM_LEX_STATE_END);
10615 LEX(type);
10616 }
10617
10618 LEX(PM_TOKEN_UPLUS);
10619 }
10620
10621 if (ambiguous_operator_p(parser, space_seen)) {
10622 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
10623 }
10624
10625 lex_state_set(parser, PM_LEX_STATE_BEG);
10626 LEX(PM_TOKEN_PLUS);
10627 }
10628
10629 // - -= -@
10630 case '-': {
10631 if (lex_state_operator_p(parser)) {
10632 lex_state_set(parser, PM_LEX_STATE_ARG);
10633
10634 if (match(parser, '@')) {
10635 LEX(PM_TOKEN_UMINUS);
10636 }
10637
10638 LEX(PM_TOKEN_MINUS);
10639 }
10640
10641 if (match(parser, '=')) {
10642 lex_state_set(parser, PM_LEX_STATE_BEG);
10643 LEX(PM_TOKEN_MINUS_EQUAL);
10644 }
10645
10646 if (match(parser, '>')) {
10647 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10648 LEX(PM_TOKEN_MINUS_GREATER);
10649 }
10650
10651 bool spcarg = lex_state_spcarg_p(parser, space_seen);
10652 bool is_beg = lex_state_beg_p(parser);
10653 if (!is_beg && spcarg) {
10654 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
10655 }
10656
10657 if (is_beg || spcarg) {
10658 lex_state_set(parser, PM_LEX_STATE_BEG);
10659 LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
10660 }
10661
10662 if (ambiguous_operator_p(parser, space_seen)) {
10663 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
10664 }
10665
10666 lex_state_set(parser, PM_LEX_STATE_BEG);
10667 LEX(PM_TOKEN_MINUS);
10668 }
10669
10670 // . .. ...
10671 case '.': {
10672 bool beg_p = lex_state_beg_p(parser);
10673
10674 if (match(parser, '.')) {
10675 if (match(parser, '.')) {
10676 // If we're _not_ inside a range within default parameters
10677 if (!context_p(parser, PM_CONTEXT_DEFAULT_PARAMS) && context_p(parser, PM_CONTEXT_DEF_PARAMS)) {
10678 if (lex_state_p(parser, PM_LEX_STATE_END)) {
10679 lex_state_set(parser, PM_LEX_STATE_BEG);
10680 } else {
10681 lex_state_set(parser, PM_LEX_STATE_ENDARG);
10682 }
10683 LEX(PM_TOKEN_UDOT_DOT_DOT);
10684 }
10685
10686 if (parser->enclosure_nesting == 0 && parser_end_of_line_p(parser)) {
10687 pm_parser_warn_token(parser, &parser->current, PM_WARN_DOT_DOT_DOT_EOL);
10688 }
10689
10690 lex_state_set(parser, PM_LEX_STATE_BEG);
10691 LEX(beg_p ? PM_TOKEN_UDOT_DOT_DOT : PM_TOKEN_DOT_DOT_DOT);
10692 }
10693
10694 lex_state_set(parser, PM_LEX_STATE_BEG);
10695 LEX(beg_p ? PM_TOKEN_UDOT_DOT : PM_TOKEN_DOT_DOT);
10696 }
10697
10698 lex_state_set(parser, PM_LEX_STATE_DOT);
10699 LEX(PM_TOKEN_DOT);
10700 }
10701
10702 // integer
10703 case '0':
10704 case '1':
10705 case '2':
10706 case '3':
10707 case '4':
10708 case '5':
10709 case '6':
10710 case '7':
10711 case '8':
10712 case '9': {
10713 pm_token_type_t type = lex_numeric(parser);
10714 lex_state_set(parser, PM_LEX_STATE_END);
10715 LEX(type);
10716 }
10717
10718 // :: symbol
10719 case ':':
10720 if (match(parser, ':')) {
10721 if (lex_state_beg_p(parser) || lex_state_p(parser, PM_LEX_STATE_CLASS) || (lex_state_p(parser, PM_LEX_STATE_ARG_ANY) && space_seen)) {
10722 lex_state_set(parser, PM_LEX_STATE_BEG);
10723 LEX(PM_TOKEN_UCOLON_COLON);
10724 }
10725
10726 lex_state_set(parser, PM_LEX_STATE_DOT);
10727 LEX(PM_TOKEN_COLON_COLON);
10728 }
10729
10730 if (lex_state_end_p(parser) || pm_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
10731 lex_state_set(parser, PM_LEX_STATE_BEG);
10732 LEX(PM_TOKEN_COLON);
10733 }
10734
10735 if (peek(parser) == '"' || peek(parser) == '\'') {
10736 lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
10737 parser->current.end++;
10738 }
10739
10740 lex_state_set(parser, PM_LEX_STATE_FNAME);
10741 LEX(PM_TOKEN_SYMBOL_BEGIN);
10742
10743 // / /=
10744 case '/':
10745 if (lex_state_beg_p(parser)) {
10746 lex_mode_push_regexp(parser, '\0', '/');
10747 LEX(PM_TOKEN_REGEXP_BEGIN);
10748 }
10749
10750 if (match(parser, '=')) {
10751 lex_state_set(parser, PM_LEX_STATE_BEG);
10752 LEX(PM_TOKEN_SLASH_EQUAL);
10753 }
10754
10755 if (lex_state_spcarg_p(parser, space_seen)) {
10756 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
10757 lex_mode_push_regexp(parser, '\0', '/');
10758 LEX(PM_TOKEN_REGEXP_BEGIN);
10759 }
10760
10761 if (ambiguous_operator_p(parser, space_seen)) {
10762 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
10763 }
10764
10765 if (lex_state_operator_p(parser)) {
10766 lex_state_set(parser, PM_LEX_STATE_ARG);
10767 } else {
10768 lex_state_set(parser, PM_LEX_STATE_BEG);
10769 }
10770
10771 LEX(PM_TOKEN_SLASH);
10772
10773 // ^ ^=
10774 case '^':
10775 if (lex_state_operator_p(parser)) {
10776 lex_state_set(parser, PM_LEX_STATE_ARG);
10777 } else {
10778 lex_state_set(parser, PM_LEX_STATE_BEG);
10779 }
10780 LEX(match(parser, '=') ? PM_TOKEN_CARET_EQUAL : PM_TOKEN_CARET);
10781
10782 // ~ ~@
10783 case '~':
10784 if (lex_state_operator_p(parser)) {
10785 (void) match(parser, '@');
10786 lex_state_set(parser, PM_LEX_STATE_ARG);
10787 } else {
10788 lex_state_set(parser, PM_LEX_STATE_BEG);
10789 }
10790
10791 LEX(PM_TOKEN_TILDE);
10792
10793 // % %= %i %I %q %Q %w %W
10794 case '%': {
10795 // If there is no subsequent character then we have an
10796 // invalid token. We're going to say it's the percent
10797 // operator because we don't want to move into the string
10798 // lex mode unnecessarily.
10799 if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
10800 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
10801 LEX(PM_TOKEN_PERCENT);
10802 }
10803
10804 if (!lex_state_beg_p(parser) && match(parser, '=')) {
10805 lex_state_set(parser, PM_LEX_STATE_BEG);
10806 LEX(PM_TOKEN_PERCENT_EQUAL);
10807 } else if (
10808 lex_state_beg_p(parser) ||
10809 (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
10810 lex_state_spcarg_p(parser, space_seen)
10811 ) {
10812 if (!parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end)) {
10813 if (*parser->current.end >= 0x80) {
10814 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
10815 }
10816
10817 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10818 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10819 LEX(PM_TOKEN_STRING_BEGIN);
10820 }
10821
10822 // Delimiters for %-literals cannot be alphanumeric. We
10823 // validate that here.
10824 uint8_t delimiter = peek_offset(parser, 1);
10825 if (delimiter >= 0x80 || parser->encoding->alnum_char(&delimiter, 1)) {
10826 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
10827 goto lex_next_token;
10828 }
10829
10830 switch (peek(parser)) {
10831 case 'i': {
10832 parser->current.end++;
10833
10834 if (parser->current.end < parser->end) {
10835 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
10836 } else {
10837 lex_mode_push_list_eof(parser);
10838 }
10839
10840 LEX(PM_TOKEN_PERCENT_LOWER_I);
10841 }
10842 case 'I': {
10843 parser->current.end++;
10844
10845 if (parser->current.end < parser->end) {
10846 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
10847 } else {
10848 lex_mode_push_list_eof(parser);
10849 }
10850
10851 LEX(PM_TOKEN_PERCENT_UPPER_I);
10852 }
10853 case 'r': {
10854 parser->current.end++;
10855
10856 if (parser->current.end < parser->end) {
10857 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10858 lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10859 } else {
10860 lex_mode_push_regexp(parser, '\0', '\0');
10861 }
10862
10863 LEX(PM_TOKEN_REGEXP_BEGIN);
10864 }
10865 case 'q': {
10866 parser->current.end++;
10867
10868 if (parser->current.end < parser->end) {
10869 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10870 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10871 } else {
10872 lex_mode_push_string_eof(parser);
10873 }
10874
10875 LEX(PM_TOKEN_STRING_BEGIN);
10876 }
10877 case 'Q': {
10878 parser->current.end++;
10879
10880 if (parser->current.end < parser->end) {
10881 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10882 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10883 } else {
10884 lex_mode_push_string_eof(parser);
10885 }
10886
10887 LEX(PM_TOKEN_STRING_BEGIN);
10888 }
10889 case 's': {
10890 parser->current.end++;
10891
10892 if (parser->current.end < parser->end) {
10893 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10894 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10895 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
10896 } else {
10897 lex_mode_push_string_eof(parser);
10898 }
10899
10900 LEX(PM_TOKEN_SYMBOL_BEGIN);
10901 }
10902 case 'w': {
10903 parser->current.end++;
10904
10905 if (parser->current.end < parser->end) {
10906 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
10907 } else {
10908 lex_mode_push_list_eof(parser);
10909 }
10910
10911 LEX(PM_TOKEN_PERCENT_LOWER_W);
10912 }
10913 case 'W': {
10914 parser->current.end++;
10915
10916 if (parser->current.end < parser->end) {
10917 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
10918 } else {
10919 lex_mode_push_list_eof(parser);
10920 }
10921
10922 LEX(PM_TOKEN_PERCENT_UPPER_W);
10923 }
10924 case 'x': {
10925 parser->current.end++;
10926
10927 if (parser->current.end < parser->end) {
10928 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10929 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10930 } else {
10931 lex_mode_push_string_eof(parser);
10932 }
10933
10934 LEX(PM_TOKEN_PERCENT_LOWER_X);
10935 }
10936 default:
10937 // If we get to this point, then we have a % that is completely
10938 // unparsable. In this case we'll just drop it from the parser
10939 // and skip past it and hope that the next token is something
10940 // that we can parse.
10941 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
10942 goto lex_next_token;
10943 }
10944 }
10945
10946 if (ambiguous_operator_p(parser, space_seen)) {
10947 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
10948 }
10949
10950 lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
10951 LEX(PM_TOKEN_PERCENT);
10952 }
10953
10954 // global variable
10955 case '$': {
10956 pm_token_type_t type = lex_global_variable(parser);
10957
10958 // If we're lexing an embedded variable, then we need to pop back into
10959 // the parent lex context.
10960 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
10961 lex_mode_pop(parser);
10962 }
10963
10964 lex_state_set(parser, PM_LEX_STATE_END);
10965 LEX(type);
10966 }
10967
10968 // instance variable, class variable
10969 case '@':
10970 lex_state_set(parser, parser->lex_state & PM_LEX_STATE_FNAME ? PM_LEX_STATE_ENDFN : PM_LEX_STATE_END);
10971 LEX(lex_at_variable(parser));
10972
10973 default: {
10974 if (*parser->current.start != '_') {
10975 size_t width = char_is_identifier_start(parser, parser->current.start, parser->end - parser->current.start);
10976
10977 // If this isn't the beginning of an identifier, then
10978 // it's an invalid token as we've exhausted all of the
10979 // other options. We'll skip past it and return the next
10980 // token after adding an appropriate error message.
10981 if (!width) {
10982 if (*parser->current.start >= 0x80) {
10983 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
10984 } else if (*parser->current.start == '\\') {
10985 switch (peek_at(parser, parser->current.start + 1)) {
10986 case ' ':
10987 parser->current.end++;
10988 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
10989 break;
10990 case '\f':
10991 parser->current.end++;
10992 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
10993 break;
10994 case '\t':
10995 parser->current.end++;
10996 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
10997 break;
10998 case '\v':
10999 parser->current.end++;
11000 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
11001 break;
11002 case '\r':
11003 if (peek_at(parser, parser->current.start + 2) != '\n') {
11004 parser->current.end++;
11005 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
11006 break;
11007 }
11009 default:
11010 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
11011 break;
11012 }
11013 } else if (char_is_ascii_printable(*parser->current.start)) {
11014 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
11015 } else {
11016 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
11017 }
11018
11019 goto lex_next_token;
11020 }
11021
11022 parser->current.end = parser->current.start + width;
11023 }
11024
11025 pm_token_type_t type = lex_identifier(parser, previous_command_start);
11026
11027 // If we've hit a __END__ and it was at the start of the
11028 // line or the start of the file and it is followed by
11029 // either a \n or a \r\n, then this is the last token of the
11030 // file.
11031 if (
11032 ((parser->current.end - parser->current.start) == 7) &&
11033 current_token_starts_line(parser) &&
11034 (memcmp(parser->current.start, "__END__", 7) == 0) &&
11035 (parser->current.end == parser->end || match_eol(parser))
11036 ) {
11037 // Since we know we're about to add an __END__ comment,
11038 // we know we need to add all of the newlines to get the
11039 // correct column information for it.
11040 const uint8_t *cursor = parser->current.end;
11041 while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
11042 pm_newline_list_append(&parser->newline_list, cursor++);
11043 }
11044
11045 parser->current.end = parser->end;
11046 parser->current.type = PM_TOKEN___END__;
11047 parser_lex_callback(parser);
11048
11049 parser->data_loc.start = parser->current.start;
11050 parser->data_loc.end = parser->current.end;
11051
11052 LEX(PM_TOKEN_EOF);
11053 }
11054
11055 pm_lex_state_t last_state = parser->lex_state;
11056
11057 if (type == PM_TOKEN_IDENTIFIER || type == PM_TOKEN_CONSTANT || type == PM_TOKEN_METHOD_NAME) {
11058 if (lex_state_p(parser, PM_LEX_STATE_BEG_ANY | PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_DOT)) {
11059 if (previous_command_start) {
11060 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11061 } else {
11062 lex_state_set(parser, PM_LEX_STATE_ARG);
11063 }
11064 } else if (parser->lex_state == PM_LEX_STATE_FNAME) {
11065 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11066 } else {
11067 lex_state_set(parser, PM_LEX_STATE_END);
11068 }
11069 }
11070
11071 if (
11072 !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
11073 (type == PM_TOKEN_IDENTIFIER) &&
11074 ((pm_parser_local_depth(parser, &parser->current) != -1) ||
11075 pm_token_is_numbered_parameter(parser->current.start, parser->current.end))
11076 ) {
11077 lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
11078 }
11079
11080 LEX(type);
11081 }
11082 }
11083 }
11084 case PM_LEX_LIST: {
11085 if (parser->next_start != NULL) {
11086 parser->current.end = parser->next_start;
11087 parser->next_start = NULL;
11088 }
11089
11090 // First we'll set the beginning of the token.
11091 parser->current.start = parser->current.end;
11092
11093 // If there's any whitespace at the start of the list, then we're
11094 // going to trim it off the beginning and create a new token.
11095 size_t whitespace;
11096
11097 if (parser->heredoc_end) {
11098 whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
11099 if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') {
11100 whitespace += 1;
11101 }
11102 } else {
11103 whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list);
11104 }
11105
11106 if (whitespace > 0) {
11107 parser->current.end += whitespace;
11108 if (peek_offset(parser, -1) == '\n') {
11109 // mutates next_start
11110 parser_flush_heredoc_end(parser);
11111 }
11112 LEX(PM_TOKEN_WORDS_SEP);
11113 }
11114
11115 // We'll check if we're at the end of the file. If we are, then we
11116 // need to return the EOF token.
11117 if (parser->current.end >= parser->end) {
11118 LEX(PM_TOKEN_EOF);
11119 }
11120
11121 // Here we'll get a list of the places where strpbrk should break,
11122 // and then find the first one.
11123 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11124 const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
11125 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11126
11127 // If we haven't found an escape yet, then this buffer will be
11128 // unallocated since we can refer directly to the source string.
11129 pm_token_buffer_t token_buffer = { 0 };
11130
11131 while (breakpoint != NULL) {
11132 // If we hit whitespace, then we must have received content by
11133 // now, so we can return an element of the list.
11134 if (pm_char_is_whitespace(*breakpoint)) {
11135 parser->current.end = breakpoint;
11136 pm_token_buffer_flush(parser, &token_buffer);
11137 LEX(PM_TOKEN_STRING_CONTENT);
11138 }
11139
11140 // If we hit the terminator, we need to check which token to
11141 // return.
11142 if (*breakpoint == lex_mode->as.list.terminator) {
11143 // If this terminator doesn't actually close the list, then
11144 // we need to continue on past it.
11145 if (lex_mode->as.list.nesting > 0) {
11146 parser->current.end = breakpoint + 1;
11147 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11148 lex_mode->as.list.nesting--;
11149 continue;
11150 }
11151
11152 // If we've hit the terminator and we've already skipped
11153 // past content, then we can return a list node.
11154 if (breakpoint > parser->current.start) {
11155 parser->current.end = breakpoint;
11156 pm_token_buffer_flush(parser, &token_buffer);
11157 LEX(PM_TOKEN_STRING_CONTENT);
11158 }
11159
11160 // Otherwise, switch back to the default state and return
11161 // the end of the list.
11162 parser->current.end = breakpoint + 1;
11163 lex_mode_pop(parser);
11164 lex_state_set(parser, PM_LEX_STATE_END);
11165 LEX(PM_TOKEN_STRING_END);
11166 }
11167
11168 // If we hit a null byte, skip directly past it.
11169 if (*breakpoint == '\0') {
11170 breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
11171 continue;
11172 }
11173
11174 // If we hit escapes, then we need to treat the next token
11175 // literally. In this case we'll skip past the next character
11176 // and find the next breakpoint.
11177 if (*breakpoint == '\\') {
11178 parser->current.end = breakpoint + 1;
11179
11180 // If we've hit the end of the file, then break out of the
11181 // loop by setting the breakpoint to NULL.
11182 if (parser->current.end == parser->end) {
11183 breakpoint = NULL;
11184 continue;
11185 }
11186
11187 pm_token_buffer_escape(parser, &token_buffer);
11188 uint8_t peeked = peek(parser);
11189
11190 switch (peeked) {
11191 case ' ':
11192 case '\f':
11193 case '\t':
11194 case '\v':
11195 case '\\':
11196 pm_token_buffer_push_byte(&token_buffer, peeked);
11197 parser->current.end++;
11198 break;
11199 case '\r':
11200 parser->current.end++;
11201 if (peek(parser) != '\n') {
11202 pm_token_buffer_push_byte(&token_buffer, '\r');
11203 break;
11204 }
11206 case '\n':
11207 pm_token_buffer_push_byte(&token_buffer, '\n');
11208
11209 if (parser->heredoc_end) {
11210 // ... if we are on the same line as a heredoc,
11211 // flush the heredoc and continue parsing after
11212 // heredoc_end.
11213 parser_flush_heredoc_end(parser);
11214 pm_token_buffer_copy(parser, &token_buffer);
11215 LEX(PM_TOKEN_STRING_CONTENT);
11216 } else {
11217 // ... else track the newline.
11218 pm_newline_list_append(&parser->newline_list, parser->current.end);
11219 }
11220
11221 parser->current.end++;
11222 break;
11223 default:
11224 if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
11225 pm_token_buffer_push_byte(&token_buffer, peeked);
11226 parser->current.end++;
11227 } else if (lex_mode->as.list.interpolation) {
11228 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
11229 } else {
11230 pm_token_buffer_push_byte(&token_buffer, '\\');
11231 pm_token_buffer_push_escaped(&token_buffer, parser);
11232 }
11233
11234 break;
11235 }
11236
11237 token_buffer.cursor = parser->current.end;
11238 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11239 continue;
11240 }
11241
11242 // If we hit a #, then we will attempt to lex interpolation.
11243 if (*breakpoint == '#') {
11244 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11245
11246 if (type == PM_TOKEN_NOT_PROVIDED) {
11247 // If we haven't returned at this point then we had something
11248 // that looked like an interpolated class or instance variable
11249 // like "#@" but wasn't actually. In this case we'll just skip
11250 // to the next breakpoint.
11251 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11252 continue;
11253 }
11254
11255 if (type == PM_TOKEN_STRING_CONTENT) {
11256 pm_token_buffer_flush(parser, &token_buffer);
11257 }
11258
11259 LEX(type);
11260 }
11261
11262 // If we've hit the incrementor, then we need to skip past it
11263 // and find the next breakpoint.
11264 assert(*breakpoint == lex_mode->as.list.incrementor);
11265 parser->current.end = breakpoint + 1;
11266 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11267 lex_mode->as.list.nesting++;
11268 continue;
11269 }
11270
11271 if (parser->current.end > parser->current.start) {
11272 pm_token_buffer_flush(parser, &token_buffer);
11273 LEX(PM_TOKEN_STRING_CONTENT);
11274 }
11275
11276 // If we were unable to find a breakpoint, then this token hits the
11277 // end of the file.
11278 parser->current.end = parser->end;
11279 pm_token_buffer_flush(parser, &token_buffer);
11280 LEX(PM_TOKEN_STRING_CONTENT);
11281 }
11282 case PM_LEX_REGEXP: {
11283 // First, we'll set to start of this token to be the current end.
11284 if (parser->next_start == NULL) {
11285 parser->current.start = parser->current.end;
11286 } else {
11287 parser->current.start = parser->next_start;
11288 parser->current.end = parser->next_start;
11289 parser->next_start = NULL;
11290 }
11291
11292 // We'll check if we're at the end of the file. If we are, then we
11293 // need to return the EOF token.
11294 if (parser->current.end >= parser->end) {
11295 LEX(PM_TOKEN_EOF);
11296 }
11297
11298 // Get a reference to the current mode.
11299 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11300
11301 // These are the places where we need to split up the content of the
11302 // regular expression. We'll use strpbrk to find the first of these
11303 // characters.
11304 const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
11305 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11306 pm_regexp_token_buffer_t token_buffer = { 0 };
11307
11308 while (breakpoint != NULL) {
11309 uint8_t term = lex_mode->as.regexp.terminator;
11310 bool is_terminator = (*breakpoint == term);
11311
11312 // If the terminator is newline, we need to consider \r\n _also_ a newline
11313 // For example: `%\nfoo\r\n`
11314 // The string should be "foo", not "foo\r"
11315 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
11316 if (term == '\n') {
11317 is_terminator = true;
11318 }
11319
11320 // If the terminator is a CR, but we see a CRLF, we need to
11321 // treat the CRLF as a newline, meaning this is _not_ the
11322 // terminator
11323 if (term == '\r') {
11324 is_terminator = false;
11325 }
11326 }
11327
11328 // If we hit the terminator, we need to determine what kind of
11329 // token to return.
11330 if (is_terminator) {
11331 if (lex_mode->as.regexp.nesting > 0) {
11332 parser->current.end = breakpoint + 1;
11333 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11334 lex_mode->as.regexp.nesting--;
11335 continue;
11336 }
11337
11338 // Here we've hit the terminator. If we have already consumed
11339 // content then we need to return that content as string content
11340 // first.
11341 if (breakpoint > parser->current.start) {
11342 parser->current.end = breakpoint;
11343 pm_regexp_token_buffer_flush(parser, &token_buffer);
11344 LEX(PM_TOKEN_STRING_CONTENT);
11345 }
11346
11347 // Check here if we need to track the newline.
11348 size_t eol_length = match_eol_at(parser, breakpoint);
11349 if (eol_length) {
11350 parser->current.end = breakpoint + eol_length;
11351
11352 // Track the newline if we're not in a heredoc that
11353 // would have already have added the newline to the
11354 // list.
11355 if (parser->heredoc_end == NULL) {
11356 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
11357 }
11358 } else {
11359 parser->current.end = breakpoint + 1;
11360 }
11361
11362 // Since we've hit the terminator of the regular expression,
11363 // we now need to parse the options.
11364 parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
11365
11366 lex_mode_pop(parser);
11367 lex_state_set(parser, PM_LEX_STATE_END);
11368 LEX(PM_TOKEN_REGEXP_END);
11369 }
11370
11371 // If we've hit the incrementor, then we need to skip past it
11372 // and find the next breakpoint.
11373 if (*breakpoint && *breakpoint == lex_mode->as.regexp.incrementor) {
11374 parser->current.end = breakpoint + 1;
11375 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11376 lex_mode->as.regexp.nesting++;
11377 continue;
11378 }
11379
11380 switch (*breakpoint) {
11381 case '\0':
11382 // If we hit a null byte, skip directly past it.
11383 parser->current.end = breakpoint + 1;
11384 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11385 break;
11386 case '\r':
11387 if (peek_at(parser, breakpoint + 1) != '\n') {
11388 parser->current.end = breakpoint + 1;
11389 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11390 break;
11391 }
11392
11393 breakpoint++;
11394 parser->current.end = breakpoint;
11395 pm_regexp_token_buffer_escape(parser, &token_buffer);
11396 token_buffer.base.cursor = breakpoint;
11397
11399 case '\n':
11400 // If we've hit a newline, then we need to track that in
11401 // the list of newlines.
11402 if (parser->heredoc_end == NULL) {
11403 pm_newline_list_append(&parser->newline_list, breakpoint);
11404 parser->current.end = breakpoint + 1;
11405 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11406 break;
11407 }
11408
11409 parser->current.end = breakpoint + 1;
11410 parser_flush_heredoc_end(parser);
11411 pm_regexp_token_buffer_flush(parser, &token_buffer);
11412 LEX(PM_TOKEN_STRING_CONTENT);
11413 case '\\': {
11414 // If we hit escapes, then we need to treat the next
11415 // token literally. In this case we'll skip past the
11416 // next character and find the next breakpoint.
11417 parser->current.end = breakpoint + 1;
11418
11419 // If we've hit the end of the file, then break out of
11420 // the loop by setting the breakpoint to NULL.
11421 if (parser->current.end == parser->end) {
11422 breakpoint = NULL;
11423 break;
11424 }
11425
11426 pm_regexp_token_buffer_escape(parser, &token_buffer);
11427 uint8_t peeked = peek(parser);
11428
11429 switch (peeked) {
11430 case '\r':
11431 parser->current.end++;
11432 if (peek(parser) != '\n') {
11433 if (lex_mode->as.regexp.terminator != '\r') {
11434 pm_token_buffer_push_byte(&token_buffer.base, '\\');
11435 }
11436 pm_regexp_token_buffer_push_byte(&token_buffer, '\r');
11437 pm_token_buffer_push_byte(&token_buffer.base, '\r');
11438 break;
11439 }
11441 case '\n':
11442 if (parser->heredoc_end) {
11443 // ... if we are on the same line as a heredoc,
11444 // flush the heredoc and continue parsing after
11445 // heredoc_end.
11446 parser_flush_heredoc_end(parser);
11447 pm_regexp_token_buffer_copy(parser, &token_buffer);
11448 LEX(PM_TOKEN_STRING_CONTENT);
11449 } else {
11450 // ... else track the newline.
11451 pm_newline_list_append(&parser->newline_list, parser->current.end);
11452 }
11453
11454 parser->current.end++;
11455 break;
11456 case 'c':
11457 case 'C':
11458 case 'M':
11459 case 'u':
11460 case 'x':
11461 escape_read(parser, &token_buffer.regexp_buffer, &token_buffer.base.buffer, PM_ESCAPE_FLAG_REGEXP);
11462 break;
11463 default:
11464 if (lex_mode->as.regexp.terminator == peeked) {
11465 // Some characters when they are used as the
11466 // terminator also receive an escape. They are
11467 // enumerated here.
11468 switch (peeked) {
11469 case '$': case ')': case '*': case '+':
11470 case '.': case '>': case '?': case ']':
11471 case '^': case '|': case '}':
11472 pm_token_buffer_push_byte(&token_buffer.base, '\\');
11473 break;
11474 default:
11475 break;
11476 }
11477
11478 pm_regexp_token_buffer_push_byte(&token_buffer, peeked);
11479 pm_token_buffer_push_byte(&token_buffer.base, peeked);
11480 parser->current.end++;
11481 break;
11482 }
11483
11484 if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer.base, '\\');
11485 pm_regexp_token_buffer_push_escaped(&token_buffer, parser);
11486 break;
11487 }
11488
11489 token_buffer.base.cursor = parser->current.end;
11490 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11491 break;
11492 }
11493 case '#': {
11494 // If we hit a #, then we will attempt to lex
11495 // interpolation.
11496 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11497
11498 if (type == PM_TOKEN_NOT_PROVIDED) {
11499 // If we haven't returned at this point then we had
11500 // something that looked like an interpolated class or
11501 // instance variable like "#@" but wasn't actually. In
11502 // this case we'll just skip to the next breakpoint.
11503 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11504 break;
11505 }
11506
11507 if (type == PM_TOKEN_STRING_CONTENT) {
11508 pm_regexp_token_buffer_flush(parser, &token_buffer);
11509 }
11510
11511 LEX(type);
11512 }
11513 default:
11514 assert(false && "unreachable");
11515 break;
11516 }
11517 }
11518
11519 if (parser->current.end > parser->current.start) {
11520 pm_regexp_token_buffer_flush(parser, &token_buffer);
11521 LEX(PM_TOKEN_STRING_CONTENT);
11522 }
11523
11524 // If we were unable to find a breakpoint, then this token hits the
11525 // end of the file.
11526 parser->current.end = parser->end;
11527 pm_regexp_token_buffer_flush(parser, &token_buffer);
11528 LEX(PM_TOKEN_STRING_CONTENT);
11529 }
11530 case PM_LEX_STRING: {
11531 // First, we'll set to start of this token to be the current end.
11532 if (parser->next_start == NULL) {
11533 parser->current.start = parser->current.end;
11534 } else {
11535 parser->current.start = parser->next_start;
11536 parser->current.end = parser->next_start;
11537 parser->next_start = NULL;
11538 }
11539
11540 // We'll check if we're at the end of the file. If we are, then we need to
11541 // return the EOF token.
11542 if (parser->current.end >= parser->end) {
11543 LEX(PM_TOKEN_EOF);
11544 }
11545
11546 // These are the places where we need to split up the content of the
11547 // string. We'll use strpbrk to find the first of these characters.
11548 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11549 const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
11550 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11551
11552 // If we haven't found an escape yet, then this buffer will be
11553 // unallocated since we can refer directly to the source string.
11554 pm_token_buffer_t token_buffer = { 0 };
11555
11556 while (breakpoint != NULL) {
11557 // If we hit the incrementor, then we'll increment then nesting and
11558 // continue lexing.
11559 if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
11560 lex_mode->as.string.nesting++;
11561 parser->current.end = breakpoint + 1;
11562 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11563 continue;
11564 }
11565
11566 uint8_t term = lex_mode->as.string.terminator;
11567 bool is_terminator = (*breakpoint == term);
11568
11569 // If the terminator is newline, we need to consider \r\n _also_ a newline
11570 // For example: `%r\nfoo\r\n`
11571 // The string should be /foo/, not /foo\r/
11572 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
11573 if (term == '\n') {
11574 is_terminator = true;
11575 }
11576
11577 // If the terminator is a CR, but we see a CRLF, we need to
11578 // treat the CRLF as a newline, meaning this is _not_ the
11579 // terminator
11580 if (term == '\r') {
11581 is_terminator = false;
11582 }
11583 }
11584
11585 // Note that we have to check the terminator here first because we could
11586 // potentially be parsing a % string that has a # character as the
11587 // terminator.
11588 if (is_terminator) {
11589 // If this terminator doesn't actually close the string, then we need
11590 // to continue on past it.
11591 if (lex_mode->as.string.nesting > 0) {
11592 parser->current.end = breakpoint + 1;
11593 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11594 lex_mode->as.string.nesting--;
11595 continue;
11596 }
11597
11598 // Here we've hit the terminator. If we have already consumed content
11599 // then we need to return that content as string content first.
11600 if (breakpoint > parser->current.start) {
11601 parser->current.end = breakpoint;
11602 pm_token_buffer_flush(parser, &token_buffer);
11603 LEX(PM_TOKEN_STRING_CONTENT);
11604 }
11605
11606 // Otherwise we need to switch back to the parent lex mode and
11607 // return the end of the string.
11608 size_t eol_length = match_eol_at(parser, breakpoint);
11609 if (eol_length) {
11610 parser->current.end = breakpoint + eol_length;
11611
11612 // Track the newline if we're not in a heredoc that
11613 // would have already have added the newline to the
11614 // list.
11615 if (parser->heredoc_end == NULL) {
11616 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
11617 }
11618 } else {
11619 parser->current.end = breakpoint + 1;
11620 }
11621
11622 if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
11623 parser->current.end++;
11624 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
11625 lex_mode_pop(parser);
11626 LEX(PM_TOKEN_LABEL_END);
11627 }
11628
11629 // When the delimiter itself is a newline, we won't
11630 // get a chance to flush heredocs in the usual places since
11631 // the newline is already consumed.
11632 if (term == '\n' && parser->heredoc_end) {
11633 parser_flush_heredoc_end(parser);
11634 }
11635
11636 lex_state_set(parser, PM_LEX_STATE_END);
11637 lex_mode_pop(parser);
11638 LEX(PM_TOKEN_STRING_END);
11639 }
11640
11641 switch (*breakpoint) {
11642 case '\0':
11643 // Skip directly past the null character.
11644 parser->current.end = breakpoint + 1;
11645 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11646 break;
11647 case '\r':
11648 if (peek_at(parser, breakpoint + 1) != '\n') {
11649 parser->current.end = breakpoint + 1;
11650 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11651 break;
11652 }
11653
11654 // If we hit a \r\n sequence, then we need to treat it
11655 // as a newline.
11656 breakpoint++;
11657 parser->current.end = breakpoint;
11658 pm_token_buffer_escape(parser, &token_buffer);
11659 token_buffer.cursor = breakpoint;
11660
11662 case '\n':
11663 // When we hit a newline, we need to flush any potential
11664 // heredocs. Note that this has to happen after we check
11665 // for the terminator in case the terminator is a
11666 // newline character.
11667 if (parser->heredoc_end == NULL) {
11668 pm_newline_list_append(&parser->newline_list, breakpoint);
11669 parser->current.end = breakpoint + 1;
11670 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11671 break;
11672 }
11673
11674 parser->current.end = breakpoint + 1;
11675 parser_flush_heredoc_end(parser);
11676 pm_token_buffer_flush(parser, &token_buffer);
11677 LEX(PM_TOKEN_STRING_CONTENT);
11678 case '\\': {
11679 // Here we hit escapes.
11680 parser->current.end = breakpoint + 1;
11681
11682 // If we've hit the end of the file, then break out of
11683 // the loop by setting the breakpoint to NULL.
11684 if (parser->current.end == parser->end) {
11685 breakpoint = NULL;
11686 continue;
11687 }
11688
11689 pm_token_buffer_escape(parser, &token_buffer);
11690 uint8_t peeked = peek(parser);
11691
11692 switch (peeked) {
11693 case '\\':
11694 pm_token_buffer_push_byte(&token_buffer, '\\');
11695 parser->current.end++;
11696 break;
11697 case '\r':
11698 parser->current.end++;
11699 if (peek(parser) != '\n') {
11700 if (!lex_mode->as.string.interpolation) {
11701 pm_token_buffer_push_byte(&token_buffer, '\\');
11702 }
11703 pm_token_buffer_push_byte(&token_buffer, '\r');
11704 break;
11705 }
11707 case '\n':
11708 if (!lex_mode->as.string.interpolation) {
11709 pm_token_buffer_push_byte(&token_buffer, '\\');
11710 pm_token_buffer_push_byte(&token_buffer, '\n');
11711 }
11712
11713 if (parser->heredoc_end) {
11714 // ... if we are on the same line as a heredoc,
11715 // flush the heredoc and continue parsing after
11716 // heredoc_end.
11717 parser_flush_heredoc_end(parser);
11718 pm_token_buffer_copy(parser, &token_buffer);
11719 LEX(PM_TOKEN_STRING_CONTENT);
11720 } else {
11721 // ... else track the newline.
11722 pm_newline_list_append(&parser->newline_list, parser->current.end);
11723 }
11724
11725 parser->current.end++;
11726 break;
11727 default:
11728 if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
11729 pm_token_buffer_push_byte(&token_buffer, peeked);
11730 parser->current.end++;
11731 } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
11732 pm_token_buffer_push_byte(&token_buffer, peeked);
11733 parser->current.end++;
11734 } else if (lex_mode->as.string.interpolation) {
11735 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
11736 } else {
11737 pm_token_buffer_push_byte(&token_buffer, '\\');
11738 pm_token_buffer_push_escaped(&token_buffer, parser);
11739 }
11740
11741 break;
11742 }
11743
11744 token_buffer.cursor = parser->current.end;
11745 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11746 break;
11747 }
11748 case '#': {
11749 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11750
11751 if (type == PM_TOKEN_NOT_PROVIDED) {
11752 // If we haven't returned at this point then we had something that
11753 // looked like an interpolated class or instance variable like "#@"
11754 // but wasn't actually. In this case we'll just skip to the next
11755 // breakpoint.
11756 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11757 break;
11758 }
11759
11760 if (type == PM_TOKEN_STRING_CONTENT) {
11761 pm_token_buffer_flush(parser, &token_buffer);
11762 }
11763
11764 LEX(type);
11765 }
11766 default:
11767 assert(false && "unreachable");
11768 }
11769 }
11770
11771 if (parser->current.end > parser->current.start) {
11772 pm_token_buffer_flush(parser, &token_buffer);
11773 LEX(PM_TOKEN_STRING_CONTENT);
11774 }
11775
11776 // If we've hit the end of the string, then this is an unterminated
11777 // string. In that case we'll return a string content token.
11778 parser->current.end = parser->end;
11779 pm_token_buffer_flush(parser, &token_buffer);
11780 LEX(PM_TOKEN_STRING_CONTENT);
11781 }
11782 case PM_LEX_HEREDOC: {
11783 // First, we'll set to start of this token.
11784 if (parser->next_start == NULL) {
11785 parser->current.start = parser->current.end;
11786 } else {
11787 parser->current.start = parser->next_start;
11788 parser->current.end = parser->next_start;
11789 parser->heredoc_end = NULL;
11790 parser->next_start = NULL;
11791 }
11792
11793 // Now let's grab the information about the identifier off of the
11794 // current lex mode.
11795 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11796 pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
11797
11798 bool line_continuation = lex_mode->as.heredoc.line_continuation;
11799 lex_mode->as.heredoc.line_continuation = false;
11800
11801 // We'll check if we're at the end of the file. If we are, then we
11802 // will add an error (because we weren't able to find the
11803 // terminator) but still continue parsing so that content after the
11804 // declaration of the heredoc can be parsed.
11805 if (parser->current.end >= parser->end) {
11806 pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
11807 parser->next_start = lex_mode->as.heredoc.next_start;
11808 parser->heredoc_end = parser->current.end;
11809 lex_state_set(parser, PM_LEX_STATE_END);
11810 lex_mode_pop(parser);
11811 LEX(PM_TOKEN_HEREDOC_END);
11812 }
11813
11814 const uint8_t *ident_start = heredoc_lex_mode->ident_start;
11815 size_t ident_length = heredoc_lex_mode->ident_length;
11816
11817 // If we are immediately following a newline and we have hit the
11818 // terminator, then we need to return the ending of the heredoc.
11819 if (current_token_starts_line(parser)) {
11820 const uint8_t *start = parser->current.start;
11821
11822 if (!line_continuation && (start + ident_length <= parser->end)) {
11823 const uint8_t *newline = next_newline(start, parser->end - start);
11824 const uint8_t *ident_end = newline;
11825 const uint8_t *terminator_end = newline;
11826
11827 if (newline == NULL) {
11828 terminator_end = parser->end;
11829 ident_end = parser->end;
11830 } else {
11831 terminator_end++;
11832 if (newline[-1] == '\r') {
11833 ident_end--; // Remove \r
11834 }
11835 }
11836
11837 const uint8_t *terminator_start = ident_end - ident_length;
11838 const uint8_t *cursor = start;
11839
11840 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
11841 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
11842 cursor++;
11843 }
11844 }
11845
11846 if (
11847 (cursor == terminator_start) &&
11848 (memcmp(terminator_start, ident_start, ident_length) == 0)
11849 ) {
11850 if (newline != NULL) {
11851 pm_newline_list_append(&parser->newline_list, newline);
11852 }
11853
11854 parser->current.end = terminator_end;
11855 if (*lex_mode->as.heredoc.next_start == '\\') {
11856 parser->next_start = NULL;
11857 } else {
11858 parser->next_start = lex_mode->as.heredoc.next_start;
11859 parser->heredoc_end = parser->current.end;
11860 }
11861
11862 lex_state_set(parser, PM_LEX_STATE_END);
11863 lex_mode_pop(parser);
11864 LEX(PM_TOKEN_HEREDOC_END);
11865 }
11866 }
11867
11868 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
11869 if (
11870 heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
11871 lex_mode->as.heredoc.common_whitespace != NULL &&
11872 (*lex_mode->as.heredoc.common_whitespace > whitespace) &&
11873 peek_at(parser, start) != '\n'
11874 ) {
11875 *lex_mode->as.heredoc.common_whitespace = whitespace;
11876 }
11877 }
11878
11879 // Otherwise we'll be parsing string content. These are the places
11880 // where we need to split up the content of the heredoc. We'll use
11881 // strpbrk to find the first of these characters.
11882 uint8_t breakpoints[] = "\r\n\\#";
11883
11884 pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
11885 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
11886 breakpoints[3] = '\0';
11887 }
11888
11889 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11890 pm_token_buffer_t token_buffer = { 0 };
11891 bool was_line_continuation = false;
11892
11893 while (breakpoint != NULL) {
11894 switch (*breakpoint) {
11895 case '\0':
11896 // Skip directly past the null character.
11897 parser->current.end = breakpoint + 1;
11898 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11899 break;
11900 case '\r':
11901 parser->current.end = breakpoint + 1;
11902
11903 if (peek_at(parser, breakpoint + 1) != '\n') {
11904 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11905 break;
11906 }
11907
11908 // If we hit a \r\n sequence, then we want to replace it
11909 // with a single \n character in the final string.
11910 breakpoint++;
11911 pm_token_buffer_escape(parser, &token_buffer);
11912 token_buffer.cursor = breakpoint;
11913
11915 case '\n': {
11916 if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
11917 parser_flush_heredoc_end(parser);
11918 parser->current.end = breakpoint + 1;
11919 pm_token_buffer_flush(parser, &token_buffer);
11920 LEX(PM_TOKEN_STRING_CONTENT);
11921 }
11922
11923 pm_newline_list_append(&parser->newline_list, breakpoint);
11924
11925 // If we have a - or ~ heredoc, then we can match after
11926 // some leading whitespace.
11927 const uint8_t *start = breakpoint + 1;
11928
11929 if (!was_line_continuation && (start + ident_length <= parser->end)) {
11930 // We want to match the terminator starting from the end of the line in case
11931 // there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
11932 const uint8_t *newline = next_newline(start, parser->end - start);
11933
11934 if (newline == NULL) {
11935 newline = parser->end;
11936 } else if (newline[-1] == '\r') {
11937 newline--; // Remove \r
11938 }
11939
11940 // Start of a possible terminator.
11941 const uint8_t *terminator_start = newline - ident_length;
11942
11943 // Cursor to check for the leading whitespace. We skip the
11944 // leading whitespace if we have a - or ~ heredoc.
11945 const uint8_t *cursor = start;
11946
11947 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
11948 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
11949 cursor++;
11950 }
11951 }
11952
11953 if (
11954 cursor == terminator_start &&
11955 (memcmp(terminator_start, ident_start, ident_length) == 0)
11956 ) {
11957 parser->current.end = breakpoint + 1;
11958 pm_token_buffer_flush(parser, &token_buffer);
11959 LEX(PM_TOKEN_STRING_CONTENT);
11960 }
11961 }
11962
11963 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
11964
11965 // If we have hit a newline that is followed by a valid
11966 // terminator, then we need to return the content of the
11967 // heredoc here as string content. Then, the next time a
11968 // token is lexed, it will match again and return the
11969 // end of the heredoc.
11970 if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
11971 if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
11972 *lex_mode->as.heredoc.common_whitespace = whitespace;
11973 }
11974
11975 parser->current.end = breakpoint + 1;
11976 pm_token_buffer_flush(parser, &token_buffer);
11977 LEX(PM_TOKEN_STRING_CONTENT);
11978 }
11979
11980 // Otherwise we hit a newline and it wasn't followed by
11981 // a terminator, so we can continue parsing.
11982 parser->current.end = breakpoint + 1;
11983 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11984 break;
11985 }
11986 case '\\': {
11987 // If we hit an escape, then we need to skip past
11988 // however many characters the escape takes up. However
11989 // it's important that if \n or \r\n are escaped, we
11990 // stop looping before the newline and not after the
11991 // newline so that we can still potentially find the
11992 // terminator of the heredoc.
11993 parser->current.end = breakpoint + 1;
11994
11995 // If we've hit the end of the file, then break out of
11996 // the loop by setting the breakpoint to NULL.
11997 if (parser->current.end == parser->end) {
11998 breakpoint = NULL;
11999 continue;
12000 }
12001
12002 pm_token_buffer_escape(parser, &token_buffer);
12003 uint8_t peeked = peek(parser);
12004
12005 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12006 switch (peeked) {
12007 case '\r':
12008 parser->current.end++;
12009 if (peek(parser) != '\n') {
12010 pm_token_buffer_push_byte(&token_buffer, '\\');
12011 pm_token_buffer_push_byte(&token_buffer, '\r');
12012 break;
12013 }
12015 case '\n':
12016 pm_token_buffer_push_byte(&token_buffer, '\\');
12017 pm_token_buffer_push_byte(&token_buffer, '\n');
12018 token_buffer.cursor = parser->current.end + 1;
12019 breakpoint = parser->current.end;
12020 continue;
12021 default:
12022 pm_token_buffer_push_byte(&token_buffer, '\\');
12023 pm_token_buffer_push_escaped(&token_buffer, parser);
12024 break;
12025 }
12026 } else {
12027 switch (peeked) {
12028 case '\r':
12029 parser->current.end++;
12030 if (peek(parser) != '\n') {
12031 pm_token_buffer_push_byte(&token_buffer, '\r');
12032 break;
12033 }
12035 case '\n':
12036 // If we are in a tilde here, we should
12037 // break out of the loop and return the
12038 // string content.
12039 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12040 const uint8_t *end = parser->current.end;
12041
12042 if (parser->heredoc_end == NULL) {
12043 pm_newline_list_append(&parser->newline_list, end);
12044 }
12045
12046 // Here we want the buffer to only
12047 // include up to the backslash.
12048 parser->current.end = breakpoint;
12049 pm_token_buffer_flush(parser, &token_buffer);
12050
12051 // Now we can advance the end of the
12052 // token past the newline.
12053 parser->current.end = end + 1;
12054 lex_mode->as.heredoc.line_continuation = true;
12055 LEX(PM_TOKEN_STRING_CONTENT);
12056 }
12057
12058 was_line_continuation = true;
12059 token_buffer.cursor = parser->current.end + 1;
12060 breakpoint = parser->current.end;
12061 continue;
12062 default:
12063 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12064 break;
12065 }
12066 }
12067
12068 token_buffer.cursor = parser->current.end;
12069 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12070 break;
12071 }
12072 case '#': {
12073 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12074
12075 if (type == PM_TOKEN_NOT_PROVIDED) {
12076 // If we haven't returned at this point then we had
12077 // something that looked like an interpolated class
12078 // or instance variable like "#@" but wasn't
12079 // actually. In this case we'll just skip to the
12080 // next breakpoint.
12081 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12082 break;
12083 }
12084
12085 if (type == PM_TOKEN_STRING_CONTENT) {
12086 pm_token_buffer_flush(parser, &token_buffer);
12087 }
12088
12089 LEX(type);
12090 }
12091 default:
12092 assert(false && "unreachable");
12093 }
12094
12095 was_line_continuation = false;
12096 }
12097
12098 if (parser->current.end > parser->current.start) {
12099 parser->current.end = parser->end;
12100 pm_token_buffer_flush(parser, &token_buffer);
12101 LEX(PM_TOKEN_STRING_CONTENT);
12102 }
12103
12104 // If we've hit the end of the string, then this is an unterminated
12105 // heredoc. In that case we'll return a string content token.
12106 parser->current.end = parser->end;
12107 pm_token_buffer_flush(parser, &token_buffer);
12108 LEX(PM_TOKEN_STRING_CONTENT);
12109 }
12110 }
12111
12112 assert(false && "unreachable");
12113}
12114
12115#undef LEX
12116
12117/******************************************************************************/
12118/* Parse functions */
12119/******************************************************************************/
12120
12129typedef enum {
12130 PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
12131 PM_BINDING_POWER_STATEMENT = 2,
12132 PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
12133 PM_BINDING_POWER_MODIFIER = 6, // if unless until while
12134 PM_BINDING_POWER_COMPOSITION = 8, // and or
12135 PM_BINDING_POWER_NOT = 10, // not
12136 PM_BINDING_POWER_MATCH = 12, // => in
12137 PM_BINDING_POWER_DEFINED = 14, // defined?
12138 PM_BINDING_POWER_MULTI_ASSIGNMENT = 16, // =
12139 PM_BINDING_POWER_ASSIGNMENT = 18, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
12140 PM_BINDING_POWER_TERNARY = 20, // ?:
12141 PM_BINDING_POWER_RANGE = 22, // .. ...
12142 PM_BINDING_POWER_LOGICAL_OR = 24, // ||
12143 PM_BINDING_POWER_LOGICAL_AND = 26, // &&
12144 PM_BINDING_POWER_EQUALITY = 28, // <=> == === != =~ !~
12145 PM_BINDING_POWER_COMPARISON = 30, // > >= < <=
12146 PM_BINDING_POWER_BITWISE_OR = 32, // | ^
12147 PM_BINDING_POWER_BITWISE_AND = 34, // &
12148 PM_BINDING_POWER_SHIFT = 36, // << >>
12149 PM_BINDING_POWER_TERM = 38, // + -
12150 PM_BINDING_POWER_FACTOR = 40, // * / %
12151 PM_BINDING_POWER_UMINUS = 42, // -@
12152 PM_BINDING_POWER_EXPONENT = 44, // **
12153 PM_BINDING_POWER_UNARY = 46, // ! ~ +@
12154 PM_BINDING_POWER_INDEX = 48, // [] []=
12155 PM_BINDING_POWER_CALL = 50, // :: .
12156 PM_BINDING_POWER_MAX = 52
12157} pm_binding_power_t;
12158
12163typedef struct {
12165 pm_binding_power_t left;
12166
12168 pm_binding_power_t right;
12169
12172
12179
12180#define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
12181#define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
12182#define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
12183#define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
12184#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
12185
12186pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
12187 // rescue
12188 [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = { PM_BINDING_POWER_MODIFIER_RESCUE, PM_BINDING_POWER_COMPOSITION, true, false },
12189
12190 // if unless until while
12191 [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12192 [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12193 [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12194 [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12195
12196 // and or
12197 [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12198 [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12199
12200 // => in
12201 [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12202 [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12203
12204 // &&= &= ^= = >>= <<= -= %= |= ||= += /= *= **=
12205 [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12206 [PM_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12207 [PM_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
12208 [PM_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT,
12209 [PM_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT,
12210 [PM_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12211 [PM_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12212 [PM_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT,
12213 [PM_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12214 [PM_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12215 [PM_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12216 [PM_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT,
12217 [PM_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12218 [PM_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12219
12220 // ?:
12221 [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
12222
12223 // .. ...
12224 [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12225 [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12226 [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12227 [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12228
12229 // ||
12230 [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
12231
12232 // &&
12233 [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
12234
12235 // != !~ == === =~ <=>
12236 [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12237 [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12238 [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12239 [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12240 [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12241 [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12242
12243 // > >= < <=
12244 [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12245 [PM_TOKEN_GREATER_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12246 [PM_TOKEN_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12247 [PM_TOKEN_LESS_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12248
12249 // ^ |
12250 [PM_TOKEN_CARET] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12251 [PM_TOKEN_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12252
12253 // &
12254 [PM_TOKEN_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_AND),
12255
12256 // >> <<
12257 [PM_TOKEN_GREATER_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12258 [PM_TOKEN_LESS_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12259
12260 // - +
12261 [PM_TOKEN_MINUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12262 [PM_TOKEN_PLUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12263
12264 // % / *
12265 [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12266 [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12267 [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12268 [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
12269
12270 // -@
12271 [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
12272 [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false, false },
12273
12274 // **
12275 [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
12276 [PM_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12277
12278 // ! ~ +@
12279 [PM_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12280 [PM_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12281 [PM_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12282
12283 // [
12284 [PM_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_INDEX),
12285
12286 // :: . &.
12287 [PM_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
12288 [PM_TOKEN_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
12289 [PM_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL)
12290};
12291
12292#undef BINDING_POWER_ASSIGNMENT
12293#undef LEFT_ASSOCIATIVE
12294#undef RIGHT_ASSOCIATIVE
12295#undef RIGHT_ASSOCIATIVE_UNARY
12296
12300static inline bool
12301match1(const pm_parser_t *parser, pm_token_type_t type) {
12302 return parser->current.type == type;
12303}
12304
12308static inline bool
12309match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
12310 return match1(parser, type1) || match1(parser, type2);
12311}
12312
12316static inline bool
12317match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
12318 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
12319}
12320
12324static inline bool
12325match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
12326 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
12327}
12328
12332static inline bool
12333match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
12334 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
12335}
12336
12340static inline bool
12341match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
12342 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
12343}
12344
12351static bool
12352accept1(pm_parser_t *parser, pm_token_type_t type) {
12353 if (match1(parser, type)) {
12354 parser_lex(parser);
12355 return true;
12356 }
12357 return false;
12358}
12359
12364static inline bool
12365accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
12366 if (match2(parser, type1, type2)) {
12367 parser_lex(parser);
12368 return true;
12369 }
12370 return false;
12371}
12372
12384static void
12385expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
12386 if (accept1(parser, type)) return;
12387
12388 const uint8_t *location = parser->previous.end;
12389 pm_parser_err(parser, location, location, diag_id);
12390
12391 parser->previous.start = location;
12392 parser->previous.type = PM_TOKEN_MISSING;
12393}
12394
12399static void
12400expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_diagnostic_id_t diag_id) {
12401 if (accept2(parser, type1, type2)) return;
12402
12403 const uint8_t *location = parser->previous.end;
12404 pm_parser_err(parser, location, location, diag_id);
12405
12406 parser->previous.start = location;
12407 parser->previous.type = PM_TOKEN_MISSING;
12408}
12409
12414static void
12415expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
12416 if (match1(parser, PM_TOKEN_HEREDOC_END)) {
12417 parser_lex(parser);
12418 } else {
12419 pm_parser_err_heredoc_term(parser, ident_start, ident_length);
12420 parser->previous.start = parser->previous.end;
12421 parser->previous.type = PM_TOKEN_MISSING;
12422 }
12423}
12424
12431static void
12432expect1_opening(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id, const pm_token_t *opening) {
12433 if (accept1(parser, type)) return;
12434
12435 pm_parser_err(parser, opening->start, opening->end, diag_id);
12436
12437 parser->previous.start = opening->end;
12438 parser->previous.type = PM_TOKEN_MISSING;
12439}
12440
12441static pm_node_t *
12442parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth);
12443
12448static pm_node_t *
12449parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
12450 pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
12451 pm_assert_value_expression(parser, node);
12452 return node;
12453}
12454
12473static inline bool
12474token_begins_expression_p(pm_token_type_t type) {
12475 switch (type) {
12476 case PM_TOKEN_EQUAL_GREATER:
12477 case PM_TOKEN_KEYWORD_IN:
12478 // We need to special case this because it is a binary operator that
12479 // should not be marked as beginning an expression.
12480 return false;
12481 case PM_TOKEN_BRACE_RIGHT:
12482 case PM_TOKEN_BRACKET_RIGHT:
12483 case PM_TOKEN_COLON:
12484 case PM_TOKEN_COMMA:
12485 case PM_TOKEN_EMBEXPR_END:
12486 case PM_TOKEN_EOF:
12487 case PM_TOKEN_LAMBDA_BEGIN:
12488 case PM_TOKEN_KEYWORD_DO:
12489 case PM_TOKEN_KEYWORD_DO_LOOP:
12490 case PM_TOKEN_KEYWORD_END:
12491 case PM_TOKEN_KEYWORD_ELSE:
12492 case PM_TOKEN_KEYWORD_ELSIF:
12493 case PM_TOKEN_KEYWORD_ENSURE:
12494 case PM_TOKEN_KEYWORD_THEN:
12495 case PM_TOKEN_KEYWORD_RESCUE:
12496 case PM_TOKEN_KEYWORD_WHEN:
12497 case PM_TOKEN_NEWLINE:
12498 case PM_TOKEN_PARENTHESIS_RIGHT:
12499 case PM_TOKEN_SEMICOLON:
12500 // The reason we need this short-circuit is because we're using the
12501 // binding powers table to tell us if the subsequent token could
12502 // potentially be the start of an expression. If there _is_ a binding
12503 // power for one of these tokens, then we should remove it from this list
12504 // and let it be handled by the default case below.
12505 assert(pm_binding_powers[type].left == PM_BINDING_POWER_UNSET);
12506 return false;
12507 case PM_TOKEN_UAMPERSAND:
12508 // This is a special case because this unary operator cannot appear
12509 // as a general operator, it only appears in certain circumstances.
12510 return false;
12511 case PM_TOKEN_UCOLON_COLON:
12512 case PM_TOKEN_UMINUS:
12513 case PM_TOKEN_UMINUS_NUM:
12514 case PM_TOKEN_UPLUS:
12515 case PM_TOKEN_BANG:
12516 case PM_TOKEN_TILDE:
12517 case PM_TOKEN_UDOT_DOT:
12518 case PM_TOKEN_UDOT_DOT_DOT:
12519 // These unary tokens actually do have binding power associated with them
12520 // so that we can correctly place them into the precedence order. But we
12521 // want them to be marked as beginning an expression, so we need to
12522 // special case them here.
12523 return true;
12524 default:
12525 return pm_binding_powers[type].left == PM_BINDING_POWER_UNSET;
12526 }
12527}
12528
12533static pm_node_t *
12534parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
12535 if (accept1(parser, PM_TOKEN_USTAR)) {
12536 pm_token_t operator = parser->previous;
12537 pm_node_t *expression = parse_value_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
12538 return UP(pm_splat_node_create(parser, &operator, expression));
12539 }
12540
12541 return parse_value_expression(parser, binding_power, accepts_command_call, false, diag_id, depth);
12542}
12543
12544static bool
12545pm_node_unreference_each(const pm_node_t *node, void *data) {
12546 switch (PM_NODE_TYPE(node)) {
12547 /* When we are about to destroy a set of nodes that could potentially
12548 * contain block exits for the current scope, we need to check if they
12549 * are contained in the list of block exits and remove them if they are.
12550 */
12551 case PM_BREAK_NODE:
12552 case PM_NEXT_NODE:
12553 case PM_REDO_NODE: {
12554 pm_parser_t *parser = (pm_parser_t *) data;
12555 size_t index = 0;
12556
12557 while (index < parser->current_block_exits->size) {
12558 pm_node_t *block_exit = parser->current_block_exits->nodes[index];
12559
12560 if (block_exit == node) {
12561 if (index + 1 < parser->current_block_exits->size) {
12562 memmove(
12563 &parser->current_block_exits->nodes[index],
12564 &parser->current_block_exits->nodes[index + 1],
12565 (parser->current_block_exits->size - index - 1) * sizeof(pm_node_t *)
12566 );
12567 }
12568 parser->current_block_exits->size--;
12569
12570 /* Note returning true here because these nodes could have
12571 * arguments that are themselves block exits. */
12572 return true;
12573 }
12574
12575 index++;
12576 }
12577
12578 return true;
12579 }
12580 /* When an implicit local variable is written to or targeted, it becomes
12581 * a regular, named local variable. This branch removes it from the list
12582 * of implicit parameters when that happens. */
12583 case PM_LOCAL_VARIABLE_READ_NODE:
12584 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12585 pm_parser_t *parser = (pm_parser_t *) data;
12586 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
12587
12588 for (size_t index = 0; index < implicit_parameters->size; index++) {
12589 if (implicit_parameters->nodes[index] == node) {
12590 /* If the node is not the last one in the list, we need to
12591 * shift the remaining nodes down to fill the gap. This is
12592 * extremely unlikely to happen. */
12593 if (index != implicit_parameters->size - 1) {
12594 memmove(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
12595 }
12596
12597 implicit_parameters->size--;
12598 break;
12599 }
12600 }
12601
12602 return false;
12603 }
12604 default:
12605 return true;
12606 }
12607}
12608
12614static void
12615pm_node_unreference(pm_parser_t *parser, const pm_node_t *node) {
12616 pm_visit_node(node, pm_node_unreference_each, parser);
12617}
12618
12623static void
12624parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
12625 // The method name needs to change. If we previously had
12626 // foo, we now need foo=. In this case we'll allocate a new
12627 // owned string, copy the previous method name in, and
12628 // append an =.
12629 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
12630 size_t length = constant->length;
12631 uint8_t *name = xcalloc(length + 1, sizeof(uint8_t));
12632 if (name == NULL) return;
12633
12634 memcpy(name, constant->start, length);
12635 name[length] = '=';
12636
12637 // Now switch the name to the new string.
12638 // This silences clang analyzer warning about leak of memory pointed by `name`.
12639 // NOLINTNEXTLINE(clang-analyzer-*)
12640 *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
12641}
12642
12649static pm_node_t *
12650parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
12651 switch (PM_NODE_TYPE(target)) {
12652 case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
12653 case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
12654 case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
12655 case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
12656 case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
12657 case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
12658 case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
12659 default: break;
12660 }
12661
12662 pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
12663 pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
12664
12665 pm_node_destroy(parser, target);
12666 return UP(result);
12667}
12668
12677static pm_node_t *
12678parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
12679 switch (PM_NODE_TYPE(target)) {
12680 case PM_MISSING_NODE:
12681 return target;
12682 case PM_SOURCE_ENCODING_NODE:
12683 case PM_FALSE_NODE:
12684 case PM_SOURCE_FILE_NODE:
12685 case PM_SOURCE_LINE_NODE:
12686 case PM_NIL_NODE:
12687 case PM_SELF_NODE:
12688 case PM_TRUE_NODE: {
12689 // In these special cases, we have specific error messages and we
12690 // will replace them with local variable writes.
12691 return parse_unwriteable_target(parser, target);
12692 }
12693 case PM_CLASS_VARIABLE_READ_NODE:
12695 target->type = PM_CLASS_VARIABLE_TARGET_NODE;
12696 return target;
12697 case PM_CONSTANT_PATH_NODE:
12698 if (context_def_p(parser)) {
12699 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
12700 }
12701
12703 target->type = PM_CONSTANT_PATH_TARGET_NODE;
12704
12705 return target;
12706 case PM_CONSTANT_READ_NODE:
12707 if (context_def_p(parser)) {
12708 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
12709 }
12710
12711 assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
12712 target->type = PM_CONSTANT_TARGET_NODE;
12713
12714 return target;
12715 case PM_BACK_REFERENCE_READ_NODE:
12716 case PM_NUMBERED_REFERENCE_READ_NODE:
12717 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
12718 return target;
12719 case PM_GLOBAL_VARIABLE_READ_NODE:
12721 target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
12722 return target;
12723 case PM_LOCAL_VARIABLE_READ_NODE: {
12724 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
12725 PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
12726 pm_node_unreference(parser, target);
12727 }
12728
12729 const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
12730 uint32_t name = cast->name;
12731 uint32_t depth = cast->depth;
12732 pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
12733
12735 target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
12736
12737 return target;
12738 }
12739 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12740 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
12741 pm_node_t *node = UP(pm_local_variable_target_node_create(parser, &target->location, name, 0));
12742
12743 pm_node_unreference(parser, target);
12744 pm_node_destroy(parser, target);
12745
12746 return node;
12747 }
12748 case PM_INSTANCE_VARIABLE_READ_NODE:
12750 target->type = PM_INSTANCE_VARIABLE_TARGET_NODE;
12751 return target;
12752 case PM_MULTI_TARGET_NODE:
12753 if (splat_parent) {
12754 // Multi target is not accepted in all positions. If this is one
12755 // of them, then we need to add an error.
12756 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
12757 }
12758
12759 return target;
12760 case PM_SPLAT_NODE: {
12761 pm_splat_node_t *splat = (pm_splat_node_t *) target;
12762
12763 if (splat->expression != NULL) {
12764 splat->expression = parse_target(parser, splat->expression, multiple, true);
12765 }
12766
12767 return UP(splat);
12768 }
12769 case PM_CALL_NODE: {
12770 pm_call_node_t *call = (pm_call_node_t *) target;
12771
12772 // If we have no arguments to the call node and we need this to be a
12773 // target then this is either a method call or a local variable
12774 // write.
12775 if (
12776 (call->message_loc.start != NULL) &&
12777 (call->message_loc.end[-1] != '!') &&
12778 (call->message_loc.end[-1] != '?') &&
12779 (call->opening_loc.start == NULL) &&
12780 (call->arguments == NULL) &&
12781 (call->block == NULL)
12782 ) {
12783 if (call->receiver == NULL) {
12784 // When we get here, we have a local variable write, because it
12785 // was previously marked as a method call but now we have an =.
12786 // This looks like:
12787 //
12788 // foo = 1
12789 //
12790 // When it was parsed in the prefix position, foo was seen as a
12791 // method call with no receiver and no arguments. Now we have an
12792 // =, so we know it's a local variable write.
12793 const pm_location_t message_loc = call->message_loc;
12794
12795 pm_constant_id_t name = pm_parser_local_add_location(parser, message_loc.start, message_loc.end, 0);
12796 pm_node_destroy(parser, target);
12797
12798 return UP(pm_local_variable_target_node_create(parser, &message_loc, name, 0));
12799 }
12800
12801 if (peek_at(parser, call->message_loc.start) == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
12802 if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
12803 pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
12804 }
12805
12806 parse_write_name(parser, &call->name);
12807 return UP(pm_call_target_node_create(parser, call));
12808 }
12809 }
12810
12811 // If there is no call operator and the message is "[]" then this is
12812 // an aref expression, and we can transform it into an aset
12813 // expression.
12814 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
12815 return UP(pm_index_target_node_create(parser, call));
12816 }
12817 }
12819 default:
12820 // In this case we have a node that we don't know how to convert
12821 // into a target. We need to treat it as an error. For now, we'll
12822 // mark it as an error and just skip right past it.
12823 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
12824 return target;
12825 }
12826}
12827
12832static pm_node_t *
12833parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
12834 pm_node_t *result = parse_target(parser, target, multiple, false);
12835
12836 // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
12837 // parens after the targets.
12838 if (
12839 !match1(parser, PM_TOKEN_EQUAL) &&
12840 !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
12841 !(context_p(parser, PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
12842 ) {
12843 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
12844 }
12845
12846 return result;
12847}
12848
12853static pm_node_t *
12854parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
12855 pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
12856
12857 if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
12858 return UP(pm_shareable_constant_node_create(parser, write, shareable_constant));
12859 }
12860
12861 return write;
12862}
12863
12867static pm_node_t *
12868parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
12869 switch (PM_NODE_TYPE(target)) {
12870 case PM_MISSING_NODE:
12871 pm_node_destroy(parser, value);
12872 return target;
12873 case PM_CLASS_VARIABLE_READ_NODE: {
12874 pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
12875 pm_node_destroy(parser, target);
12876 return UP(node);
12877 }
12878 case PM_CONSTANT_PATH_NODE: {
12879 pm_node_t *node = UP(pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value));
12880
12881 if (context_def_p(parser)) {
12882 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
12883 }
12884
12885 return parse_shareable_constant_write(parser, node);
12886 }
12887 case PM_CONSTANT_READ_NODE: {
12888 pm_node_t *node = UP(pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value));
12889
12890 if (context_def_p(parser)) {
12891 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
12892 }
12893
12894 pm_node_destroy(parser, target);
12895 return parse_shareable_constant_write(parser, node);
12896 }
12897 case PM_BACK_REFERENCE_READ_NODE:
12898 case PM_NUMBERED_REFERENCE_READ_NODE:
12899 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
12901 case PM_GLOBAL_VARIABLE_READ_NODE: {
12902 pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
12903 pm_node_destroy(parser, target);
12904 return UP(node);
12905 }
12906 case PM_LOCAL_VARIABLE_READ_NODE: {
12908
12909 pm_constant_id_t name = local_read->name;
12910 pm_location_t name_loc = target->location;
12911
12912 uint32_t depth = local_read->depth;
12913 pm_scope_t *scope = pm_parser_scope_find(parser, depth);
12914
12915 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
12916 pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
12917 PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
12918 pm_node_unreference(parser, target);
12919 }
12920
12921 pm_locals_unread(&scope->locals, name);
12922 pm_node_destroy(parser, target);
12923
12924 return UP(pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator));
12925 }
12926 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12927 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
12928 pm_node_t *node = UP(pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator));
12929
12930 pm_node_unreference(parser, target);
12931 pm_node_destroy(parser, target);
12932
12933 return node;
12934 }
12935 case PM_INSTANCE_VARIABLE_READ_NODE: {
12936 pm_node_t *write_node = UP(pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value));
12937 pm_node_destroy(parser, target);
12938 return write_node;
12939 }
12940 case PM_MULTI_TARGET_NODE:
12941 return UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value));
12942 case PM_SPLAT_NODE: {
12943 pm_splat_node_t *splat = (pm_splat_node_t *) target;
12944
12945 if (splat->expression != NULL) {
12946 splat->expression = parse_write(parser, splat->expression, operator, value);
12947 }
12948
12949 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
12950 pm_multi_target_node_targets_append(parser, multi_target, UP(splat));
12951
12952 return UP(pm_multi_write_node_create(parser, multi_target, operator, value));
12953 }
12954 case PM_CALL_NODE: {
12955 pm_call_node_t *call = (pm_call_node_t *) target;
12956
12957 // If we have no arguments to the call node and we need this to be a
12958 // target then this is either a method call or a local variable
12959 // write.
12960 if (
12961 (call->message_loc.start != NULL) &&
12962 (call->message_loc.end[-1] != '!') &&
12963 (call->message_loc.end[-1] != '?') &&
12964 (call->opening_loc.start == NULL) &&
12965 (call->arguments == NULL) &&
12966 (call->block == NULL)
12967 ) {
12968 if (call->receiver == NULL) {
12969 // When we get here, we have a local variable write, because it
12970 // was previously marked as a method call but now we have an =.
12971 // This looks like:
12972 //
12973 // foo = 1
12974 //
12975 // When it was parsed in the prefix position, foo was seen as a
12976 // method call with no receiver and no arguments. Now we have an
12977 // =, so we know it's a local variable write.
12978 const pm_location_t message = call->message_loc;
12979
12980 pm_parser_local_add_location(parser, message.start, message.end, 0);
12981 pm_node_destroy(parser, target);
12982
12983 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end);
12984 target = UP(pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator));
12985
12986 pm_refute_numbered_parameter(parser, message.start, message.end);
12987 return target;
12988 }
12989
12990 if (char_is_identifier_start(parser, call->message_loc.start, parser->end - call->message_loc.start)) {
12991 // When we get here, we have a method call, because it was
12992 // previously marked as a method call but now we have an =. This
12993 // looks like:
12994 //
12995 // foo.bar = 1
12996 //
12997 // When it was parsed in the prefix position, foo.bar was seen as a
12998 // method call with no arguments. Now we have an =, so we know it's
12999 // a method call with an argument. In this case we will create the
13000 // arguments node, parse the argument, and add it to the list.
13001 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
13002 call->arguments = arguments;
13003
13004 pm_arguments_node_arguments_append(arguments, value);
13005 call->base.location.end = arguments->base.location.end;
13006 call->equal_loc = PM_LOCATION_TOKEN_VALUE(operator);
13007
13008 parse_write_name(parser, &call->name);
13009 pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13010
13011 return UP(call);
13012 }
13013 }
13014
13015 // If there is no call operator and the message is "[]" then this is
13016 // an aref expression, and we can transform it into an aset
13017 // expression.
13018 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13019 if (call->arguments == NULL) {
13020 call->arguments = pm_arguments_node_create(parser);
13021 }
13022
13023 pm_arguments_node_arguments_append(call->arguments, value);
13024 target->location.end = value->location.end;
13025
13026 // Replace the name with "[]=".
13027 call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
13028 call->equal_loc = PM_LOCATION_TOKEN_VALUE(operator);
13029
13030 // Ensure that the arguments for []= don't contain keywords
13031 pm_index_arguments_check(parser, call->arguments, call->block);
13032 pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13033
13034 return target;
13035 }
13036
13037 // If there are arguments on the call node, then it can't be a
13038 // method call ending with = or a local variable write, so it must
13039 // be a syntax error. In this case we'll fall through to our default
13040 // handling. We need to free the value that we parsed because there
13041 // is no way for us to attach it to the tree at this point.
13042 //
13043 // Since it is possible for the value to contain an implicit
13044 // parameter somewhere in its subtree, we need to walk it and remove
13045 // any implicit parameters from the list of implicit parameters for
13046 // the current scope.
13047 pm_node_unreference(parser, value);
13048 pm_node_destroy(parser, value);
13049 }
13051 default:
13052 // In this case we have a node that we don't know how to convert into a
13053 // target. We need to treat it as an error. For now, we'll mark it as an
13054 // error and just skip right past it.
13055 pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
13056 return target;
13057 }
13058}
13059
13066static pm_node_t *
13067parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t *equals, pm_node_t *value) {
13068 switch (PM_NODE_TYPE(target)) {
13069 case PM_SOURCE_ENCODING_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13070 case PM_FALSE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13071 case PM_SOURCE_FILE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13072 case PM_SOURCE_LINE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13073 case PM_NIL_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13074 case PM_SELF_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13075 case PM_TRUE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13076 default: break;
13077 }
13078
13079 pm_constant_id_t name = pm_parser_local_add_location(parser, target->location.start, target->location.end, 1);
13080 pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
13081
13082 pm_node_destroy(parser, target);
13083 return UP(result);
13084}
13085
13096static pm_node_t *
13097parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13098 bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13099
13100 pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13101 pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
13102
13103 while (accept1(parser, PM_TOKEN_COMMA)) {
13104 if (accept1(parser, PM_TOKEN_USTAR)) {
13105 // Here we have a splat operator. It can have a name or be
13106 // anonymous. It can be the final target or be in the middle if
13107 // there haven't been any others yet.
13108 if (has_rest) {
13109 pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
13110 }
13111
13112 pm_token_t star_operator = parser->previous;
13113 pm_node_t *name = NULL;
13114
13115 if (token_begins_expression_p(parser->current.type)) {
13116 name = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13117 name = parse_target(parser, name, true, true);
13118 }
13119
13120 pm_node_t *splat = UP(pm_splat_node_create(parser, &star_operator, name));
13121 pm_multi_target_node_targets_append(parser, result, splat);
13122 has_rest = true;
13123 } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13124 context_push(parser, PM_CONTEXT_MULTI_TARGET);
13125 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13126 target = parse_target(parser, target, true, false);
13127
13128 pm_multi_target_node_targets_append(parser, result, target);
13129 context_pop(parser);
13130 } else if (token_begins_expression_p(parser->current.type)) {
13131 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13132 target = parse_target(parser, target, true, false);
13133
13134 pm_multi_target_node_targets_append(parser, result, target);
13135 } else if (!match1(parser, PM_TOKEN_EOF)) {
13136 // If we get here, then we have a trailing , in a multi target node.
13137 // We'll add an implicit rest node to represent this.
13138 pm_node_t *rest = UP(pm_implicit_rest_node_create(parser, &parser->previous));
13139 pm_multi_target_node_targets_append(parser, result, rest);
13140 break;
13141 }
13142 }
13143
13144 return UP(result);
13145}
13146
13151static pm_node_t *
13152parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13153 pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
13154 accept1(parser, PM_TOKEN_NEWLINE);
13155
13156 // Ensure that we have either an = or a ) after the targets.
13157 if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
13158 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13159 }
13160
13161 return result;
13162}
13163
13167static pm_statements_node_t *
13168parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
13169 // First, skip past any optional terminators that might be at the beginning
13170 // of the statements.
13171 while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13172
13173 // If we have a terminator, then we can just return NULL.
13174 if (context_terminator(context, &parser->current)) return NULL;
13175
13176 pm_statements_node_t *statements = pm_statements_node_create(parser);
13177
13178 // At this point we know we have at least one statement, and that it
13179 // immediately follows the current token.
13180 context_push(parser, context);
13181
13182 while (true) {
13183 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
13184 pm_statements_node_body_append(parser, statements, node, true);
13185
13186 // If we're recovering from a syntax error, then we need to stop parsing
13187 // the statements now.
13188 if (parser->recovering) {
13189 // If this is the level of context where the recovery has happened,
13190 // then we can mark the parser as done recovering.
13191 if (context_terminator(context, &parser->current)) parser->recovering = false;
13192 break;
13193 }
13194
13195 // If we have a terminator, then we will parse all consecutive
13196 // terminators and then continue parsing the statements list.
13197 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13198 // If we have a terminator, then we will continue parsing the
13199 // statements list.
13200 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13201 if (context_terminator(context, &parser->current)) break;
13202
13203 // Now we can continue parsing the list of statements.
13204 continue;
13205 }
13206
13207 // At this point we have a list of statements that are not terminated by
13208 // a newline or semicolon. At this point we need to check if we're at
13209 // the end of the statements list. If we are, then we should break out
13210 // of the loop.
13211 if (context_terminator(context, &parser->current)) break;
13212
13213 // At this point, we have a syntax error, because the statement was not
13214 // terminated by a newline or semicolon, and we're not at the end of the
13215 // statements list. Ideally we should scan forward to determine if we
13216 // should insert a missing terminator or break out of parsing the
13217 // statements list at this point.
13218 //
13219 // We don't have that yet, so instead we'll do a more naive approach. If
13220 // we were unable to parse an expression, then we will skip past this
13221 // token and continue parsing the statements list. Otherwise we'll add
13222 // an error and continue parsing the statements list.
13223 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
13224 parser_lex(parser);
13225
13226 // If we are at the end of the file, then we need to stop parsing
13227 // the statements entirely at this point. Mark the parser as
13228 // recovering, as we know that EOF closes the top-level context, and
13229 // then break out of the loop.
13230 if (match1(parser, PM_TOKEN_EOF)) {
13231 parser->recovering = true;
13232 break;
13233 }
13234
13235 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13236 if (context_terminator(context, &parser->current)) break;
13237 } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
13238 // This is an inlined version of accept1 because the error that we
13239 // want to add has varargs. If this happens again, we should
13240 // probably extract a helper function.
13241 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
13242 parser->previous.start = parser->previous.end;
13243 parser->previous.type = PM_TOKEN_MISSING;
13244 }
13245 }
13246
13247 context_pop(parser);
13248 bool last_value = true;
13249 switch (context) {
13252 last_value = false;
13253 break;
13254 default:
13255 break;
13256 }
13257 pm_void_statements_check(parser, statements, last_value);
13258
13259 return statements;
13260}
13261
13266static void
13267pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13268 const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
13269
13270 if (duplicated != NULL) {
13271 pm_buffer_t buffer = { 0 };
13272 pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
13273
13274 pm_diagnostic_list_append_format(
13275 &parser->warning_list,
13276 duplicated->location.start,
13277 duplicated->location.end,
13278 PM_WARN_DUPLICATED_HASH_KEY,
13279 (int) pm_buffer_length(&buffer),
13280 pm_buffer_value(&buffer),
13281 pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
13282 );
13283
13284 pm_buffer_free(&buffer);
13285 }
13286}
13287
13292static void
13293pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13294 pm_node_t *previous;
13295
13296 if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
13297 pm_diagnostic_list_append_format(
13298 &parser->warning_list,
13299 node->location.start,
13300 node->location.end,
13301 PM_WARN_DUPLICATED_WHEN_CLAUSE,
13302 pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
13303 pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
13304 );
13305 }
13306}
13307
13311static bool
13312parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node, uint16_t depth) {
13313 assert(PM_NODE_TYPE_P(node, PM_HASH_NODE) || PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE));
13314 bool contains_keyword_splat = false;
13315
13316 while (true) {
13317 pm_node_t *element;
13318
13319 switch (parser->current.type) {
13320 case PM_TOKEN_USTAR_STAR: {
13321 parser_lex(parser);
13322 pm_token_t operator = parser->previous;
13323 pm_node_t *value = NULL;
13324
13325 if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
13326 // If we're about to parse a nested hash that is being
13327 // pushed into this hash directly with **, then we want the
13328 // inner hash to share the static literals with the outer
13329 // hash.
13330 parser->current_hash_keys = literals;
13331 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
13332 } else if (token_begins_expression_p(parser->current.type)) {
13333 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
13334 } else {
13335 pm_parser_scope_forwarding_keywords_check(parser, &operator);
13336 }
13337
13338 element = UP(pm_assoc_splat_node_create(parser, value, &operator));
13339 contains_keyword_splat = true;
13340 break;
13341 }
13342 case PM_TOKEN_LABEL: {
13343 pm_token_t label = parser->current;
13344 parser_lex(parser);
13345
13346 pm_node_t *key = UP(pm_symbol_node_label_create(parser, &label));
13347 pm_hash_key_static_literals_add(parser, literals, key);
13348
13349 pm_token_t operator = not_provided(parser);
13350 pm_node_t *value = NULL;
13351
13352 if (token_begins_expression_p(parser->current.type)) {
13353 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
13354 } else {
13355 if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
13356 pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
13357 value = UP(pm_constant_read_node_create(parser, &constant));
13358 } else {
13359 int depth = -1;
13360 pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
13361
13362 if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
13363 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
13364 } else {
13365 depth = pm_parser_local_depth(parser, &identifier);
13366 }
13367
13368 if (depth == -1) {
13369 value = UP(pm_call_node_variable_call_create(parser, &identifier));
13370 } else {
13371 value = UP(pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth));
13372 }
13373 }
13374
13375 value->location.end++;
13376 value = UP(pm_implicit_node_create(parser, value));
13377 }
13378
13379 element = UP(pm_assoc_node_create(parser, key, &operator, value));
13380 break;
13381 }
13382 default: {
13383 pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
13384
13385 // Hash keys that are strings are automatically frozen. We will
13386 // mark that here.
13387 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
13388 pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
13389 }
13390
13391 pm_hash_key_static_literals_add(parser, literals, key);
13392
13393 pm_token_t operator;
13394 if (pm_symbol_node_label_p(key)) {
13395 operator = not_provided(parser);
13396 } else {
13397 expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
13398 operator = parser->previous;
13399 }
13400
13401 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
13402 element = UP(pm_assoc_node_create(parser, key, &operator, value));
13403 break;
13404 }
13405 }
13406
13407 if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
13408 pm_hash_node_elements_append((pm_hash_node_t *) node, element);
13409 } else {
13410 pm_keyword_hash_node_elements_append((pm_keyword_hash_node_t *) node, element);
13411 }
13412
13413 // If there's no comma after the element, then we're done.
13414 if (!accept1(parser, PM_TOKEN_COMMA)) break;
13415
13416 // If the next element starts with a label or a **, then we know we have
13417 // another element in the hash, so we'll continue parsing.
13418 if (match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)) continue;
13419
13420 // Otherwise we need to check if the subsequent token begins an expression.
13421 // If it does, then we'll continue parsing.
13422 if (token_begins_expression_p(parser->current.type)) continue;
13423
13424 // Otherwise by default we will exit out of this loop.
13425 break;
13426 }
13427
13428 return contains_keyword_splat;
13429}
13430
13431static inline bool
13432argument_allowed_for_bare_hash(pm_parser_t *parser, pm_node_t *argument) {
13433 if (pm_symbol_node_label_p(argument)) {
13434 return true;
13435 }
13436
13437 switch (PM_NODE_TYPE(argument)) {
13438 case PM_CALL_NODE: {
13439 pm_call_node_t *cast = (pm_call_node_t *) argument;
13440 if (cast->opening_loc.start == NULL && cast->arguments != NULL) {
13441 if (PM_NODE_FLAG_P(cast->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS | PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
13442 return false;
13443 }
13444 if (cast->block != NULL) {
13445 return false;
13446 }
13447 }
13448 break;
13449 }
13450 default: break;
13451 }
13452 return accept1(parser, PM_TOKEN_EQUAL_GREATER);
13453}
13454
13458static inline void
13459parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
13460 if (arguments->arguments == NULL) {
13461 arguments->arguments = pm_arguments_node_create(parser);
13462 }
13463
13464 pm_arguments_node_arguments_append(arguments->arguments, argument);
13465}
13466
13470static void
13471parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
13472 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
13473
13474 // First we need to check if the next token is one that could be the start
13475 // of an argument. If it's not, then we can just return.
13476 if (
13477 match2(parser, terminator, PM_TOKEN_EOF) ||
13478 (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
13479 context_terminator(parser->current_context->context, &parser->current)
13480 ) {
13481 return;
13482 }
13483
13484 bool parsed_first_argument = false;
13485 bool parsed_bare_hash = false;
13486 bool parsed_block_argument = false;
13487 bool parsed_forwarding_arguments = false;
13488
13489 while (!match1(parser, PM_TOKEN_EOF)) {
13490 if (parsed_forwarding_arguments) {
13491 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
13492 }
13493
13494 pm_node_t *argument = NULL;
13495
13496 switch (parser->current.type) {
13497 case PM_TOKEN_USTAR_STAR:
13498 case PM_TOKEN_LABEL: {
13499 if (parsed_bare_hash) {
13500 pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
13501 }
13502
13503 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
13504 argument = UP(hash);
13505
13506 pm_static_literals_t hash_keys = { 0 };
13507 bool contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(hash), (uint16_t) (depth + 1));
13508
13509 parse_arguments_append(parser, arguments, argument);
13510
13511 pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13512 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13513 pm_node_flag_set(UP(arguments->arguments), flags);
13514
13515 pm_static_literals_free(&hash_keys);
13516 parsed_bare_hash = true;
13517
13518 break;
13519 }
13520 case PM_TOKEN_UAMPERSAND: {
13521 parser_lex(parser);
13522 pm_token_t operator = parser->previous;
13523 pm_node_t *expression = NULL;
13524
13525 if (token_begins_expression_p(parser->current.type)) {
13526 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
13527 } else {
13528 pm_parser_scope_forwarding_block_check(parser, &operator);
13529 }
13530
13531 argument = UP(pm_block_argument_node_create(parser, &operator, expression));
13532 if (parsed_block_argument) {
13533 parse_arguments_append(parser, arguments, argument);
13534 } else {
13535 arguments->block = argument;
13536 }
13537
13538 if (match1(parser, PM_TOKEN_COMMA)) {
13539 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
13540 }
13541
13542 parsed_block_argument = true;
13543 break;
13544 }
13545 case PM_TOKEN_USTAR: {
13546 parser_lex(parser);
13547 pm_token_t operator = parser->previous;
13548
13549 if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
13550 pm_parser_scope_forwarding_positionals_check(parser, &operator);
13551 argument = UP(pm_splat_node_create(parser, &operator, NULL));
13552 if (parsed_bare_hash) {
13553 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
13554 }
13555 } else {
13556 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
13557
13558 if (parsed_bare_hash) {
13559 pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
13560 }
13561
13562 argument = UP(pm_splat_node_create(parser, &operator, expression));
13563 }
13564
13565 parse_arguments_append(parser, arguments, argument);
13566 break;
13567 }
13568 case PM_TOKEN_UDOT_DOT_DOT: {
13569 if (accepts_forwarding) {
13570 parser_lex(parser);
13571
13572 if (token_begins_expression_p(parser->current.type)) {
13573 // If the token begins an expression then this ... was
13574 // not actually argument forwarding but was instead a
13575 // range.
13576 pm_token_t operator = parser->previous;
13577 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
13578
13579 // If we parse a range, we need to validate that we
13580 // didn't accidentally violate the nonassoc rules of the
13581 // ... operator.
13582 if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
13583 pm_range_node_t *range = (pm_range_node_t *) right;
13584 pm_parser_err(parser, range->operator_loc.start, range->operator_loc.end, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
13585 }
13586
13587 argument = UP(pm_range_node_create(parser, NULL, &operator, right));
13588 } else {
13589 pm_parser_scope_forwarding_all_check(parser, &parser->previous);
13590 if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
13591 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
13592 }
13593
13594 argument = UP(pm_forwarding_arguments_node_create(parser, &parser->previous));
13595 parse_arguments_append(parser, arguments, argument);
13596 pm_node_flag_set(UP(arguments->arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
13597 arguments->has_forwarding = true;
13598 parsed_forwarding_arguments = true;
13599 break;
13600 }
13601 }
13602 }
13604 default: {
13605 if (argument == NULL) {
13606 argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
13607 }
13608
13609 bool contains_keywords = false;
13610 bool contains_keyword_splat = false;
13611
13612 if (argument_allowed_for_bare_hash(parser, argument)){
13613 if (parsed_bare_hash) {
13614 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
13615 }
13616
13617 pm_token_t operator;
13618 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
13619 operator = parser->previous;
13620 } else {
13621 operator = not_provided(parser);
13622 }
13623
13624 pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
13625 contains_keywords = true;
13626
13627 // Create the set of static literals for this hash.
13628 pm_static_literals_t hash_keys = { 0 };
13629 pm_hash_key_static_literals_add(parser, &hash_keys, argument);
13630
13631 // Finish parsing the one we are part way through.
13632 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
13633 argument = UP(pm_assoc_node_create(parser, argument, &operator, value));
13634
13635 pm_keyword_hash_node_elements_append(bare_hash, argument);
13636 argument = UP(bare_hash);
13637
13638 // Then parse more if we have a comma
13639 if (accept1(parser, PM_TOKEN_COMMA) && (
13640 token_begins_expression_p(parser->current.type) ||
13641 match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
13642 )) {
13643 contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(bare_hash), (uint16_t) (depth + 1));
13644 }
13645
13646 pm_static_literals_free(&hash_keys);
13647 parsed_bare_hash = true;
13648 }
13649
13650 parse_arguments_append(parser, arguments, argument);
13651
13652 pm_node_flags_t flags = 0;
13653 if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13654 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13655 pm_node_flag_set(UP(arguments->arguments), flags);
13656
13657 break;
13658 }
13659 }
13660
13661 parsed_first_argument = true;
13662
13663 // If parsing the argument failed, we need to stop parsing arguments.
13664 if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
13665
13666 // If the terminator of these arguments is not EOF, then we have a
13667 // specific token we're looking for. In that case we can accept a
13668 // newline here because it is not functioning as a statement terminator.
13669 bool accepted_newline = false;
13670 if (terminator != PM_TOKEN_EOF) {
13671 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
13672 }
13673
13674 if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
13675 // If we previously were on a comma and we just parsed a bare hash,
13676 // then we want to continue parsing arguments. This is because the
13677 // comma was grabbed up by the hash parser.
13678 } else if (accept1(parser, PM_TOKEN_COMMA)) {
13679 // If there was a comma, then we need to check if we also accepted a
13680 // newline. If we did, then this is a syntax error.
13681 if (accepted_newline) {
13682 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
13683 }
13684
13685 // If this is a command call and an argument takes a block,
13686 // there can be no further arguments. For example,
13687 // `foo(bar 1 do end, 2)` should be rejected.
13688 if (PM_NODE_TYPE_P(argument, PM_CALL_NODE)) {
13689 pm_call_node_t *call = (pm_call_node_t *) argument;
13690 if (call->opening_loc.start == NULL && call->arguments != NULL && call->block != NULL) {
13691 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
13692 break;
13693 }
13694 }
13695 } else {
13696 // If there is no comma at the end of the argument list then we're
13697 // done parsing arguments and can break out of this loop.
13698 break;
13699 }
13700
13701 // If we hit the terminator, then that means we have a trailing comma so
13702 // we can accept that output as well.
13703 if (match1(parser, terminator)) break;
13704 }
13705}
13706
13718parse_required_destructured_parameter(pm_parser_t *parser) {
13719 expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
13720
13721 pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
13722 pm_multi_target_node_opening_set(node, &parser->previous);
13723
13724 do {
13725 pm_node_t *param;
13726
13727 // If we get here then we have a trailing comma, which isn't allowed in
13728 // the grammar. In other places, multi targets _do_ allow trailing
13729 // commas, so here we'll assume this is a mistake of the user not
13730 // knowing it's not allowed here.
13731 if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
13732 param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
13733 pm_multi_target_node_targets_append(parser, node, param);
13734 pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
13735 break;
13736 }
13737
13738 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13739 param = UP(parse_required_destructured_parameter(parser));
13740 } else if (accept1(parser, PM_TOKEN_USTAR)) {
13741 pm_token_t star = parser->previous;
13742 pm_node_t *value = NULL;
13743
13744 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
13745 pm_token_t name = parser->previous;
13746 value = UP(pm_required_parameter_node_create(parser, &name));
13747 if (pm_parser_parameter_name_check(parser, &name)) {
13748 pm_node_flag_set_repeated_parameter(value);
13749 }
13750 pm_parser_local_add_token(parser, &name, 1);
13751 }
13752
13753 param = UP(pm_splat_node_create(parser, &star, value));
13754 } else {
13755 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
13756 pm_token_t name = parser->previous;
13757
13758 param = UP(pm_required_parameter_node_create(parser, &name));
13759 if (pm_parser_parameter_name_check(parser, &name)) {
13760 pm_node_flag_set_repeated_parameter(param);
13761 }
13762 pm_parser_local_add_token(parser, &name, 1);
13763 }
13764
13765 pm_multi_target_node_targets_append(parser, node, param);
13766 } while (accept1(parser, PM_TOKEN_COMMA));
13767
13768 accept1(parser, PM_TOKEN_NEWLINE);
13769 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
13770 pm_multi_target_node_closing_set(node, &parser->previous);
13771
13772 return node;
13773}
13774
13779typedef enum {
13780 PM_PARAMETERS_NO_CHANGE = 0, // Extra state for tokens that should not change the state
13781 PM_PARAMETERS_ORDER_NOTHING_AFTER = 1,
13782 PM_PARAMETERS_ORDER_KEYWORDS_REST,
13783 PM_PARAMETERS_ORDER_KEYWORDS,
13784 PM_PARAMETERS_ORDER_REST,
13785 PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
13786 PM_PARAMETERS_ORDER_OPTIONAL,
13787 PM_PARAMETERS_ORDER_NAMED,
13788 PM_PARAMETERS_ORDER_NONE,
13789} pm_parameters_order_t;
13790
13794static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
13795 [0] = PM_PARAMETERS_NO_CHANGE,
13796 [PM_TOKEN_UAMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
13797 [PM_TOKEN_AMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
13798 [PM_TOKEN_UDOT_DOT_DOT] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
13799 [PM_TOKEN_IDENTIFIER] = PM_PARAMETERS_ORDER_NAMED,
13800 [PM_TOKEN_PARENTHESIS_LEFT] = PM_PARAMETERS_ORDER_NAMED,
13801 [PM_TOKEN_EQUAL] = PM_PARAMETERS_ORDER_OPTIONAL,
13802 [PM_TOKEN_LABEL] = PM_PARAMETERS_ORDER_KEYWORDS,
13803 [PM_TOKEN_USTAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
13804 [PM_TOKEN_STAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
13805 [PM_TOKEN_USTAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST,
13806 [PM_TOKEN_STAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST
13807};
13808
13816static bool
13817update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
13818 pm_parameters_order_t state = parameters_ordering[token->type];
13819 if (state == PM_PARAMETERS_NO_CHANGE) return true;
13820
13821 // If we see another ordered argument after a optional argument
13822 // we only continue parsing ordered arguments until we stop seeing ordered arguments.
13823 if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
13824 *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
13825 return true;
13826 } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
13827 return true;
13828 }
13829
13830 if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
13831 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
13832 return false;
13833 } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
13834 pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
13835 return false;
13836 } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
13837 // We know what transition we failed on, so we can provide a better error here.
13838 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
13839 return false;
13840 }
13841
13842 if (state < *current) *current = state;
13843 return true;
13844}
13845
13849static pm_parameters_node_t *
13850parse_parameters(
13851 pm_parser_t *parser,
13852 pm_binding_power_t binding_power,
13853 bool uses_parentheses,
13854 bool allows_trailing_comma,
13855 bool allows_forwarding_parameters,
13856 bool accepts_blocks_in_defaults,
13857 bool in_block,
13858 uint16_t depth
13859) {
13860 pm_do_loop_stack_push(parser, false);
13861
13862 pm_parameters_node_t *params = pm_parameters_node_create(parser);
13863 pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
13864
13865 while (true) {
13866 bool parsing = true;
13867
13868 switch (parser->current.type) {
13869 case PM_TOKEN_PARENTHESIS_LEFT: {
13870 update_parameter_state(parser, &parser->current, &order);
13871 pm_node_t *param = UP(parse_required_destructured_parameter(parser));
13872
13873 if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
13874 pm_parameters_node_requireds_append(params, param);
13875 } else {
13876 pm_parameters_node_posts_append(params, param);
13877 }
13878 break;
13879 }
13880 case PM_TOKEN_UAMPERSAND:
13881 case PM_TOKEN_AMPERSAND: {
13882 update_parameter_state(parser, &parser->current, &order);
13883 parser_lex(parser);
13884
13885 pm_token_t operator = parser->previous;
13886 pm_token_t name;
13887
13888 bool repeated = false;
13889 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
13890 name = parser->previous;
13891 repeated = pm_parser_parameter_name_check(parser, &name);
13892 pm_parser_local_add_token(parser, &name, 1);
13893 } else {
13894 name = not_provided(parser);
13895 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
13896 }
13897
13898 pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
13899 if (repeated) {
13900 pm_node_flag_set_repeated_parameter(UP(param));
13901 }
13902 if (params->block == NULL) {
13903 pm_parameters_node_block_set(params, param);
13904 } else {
13905 pm_parser_err_node(parser, UP(param), PM_ERR_PARAMETER_BLOCK_MULTI);
13906 pm_parameters_node_posts_append(params, UP(param));
13907 }
13908
13909 break;
13910 }
13911 case PM_TOKEN_UDOT_DOT_DOT: {
13912 if (!allows_forwarding_parameters) {
13913 pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
13914 }
13915
13916 bool succeeded = update_parameter_state(parser, &parser->current, &order);
13917 parser_lex(parser);
13918
13919 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
13920 pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
13921
13922 if (params->keyword_rest != NULL) {
13923 // If we already have a keyword rest parameter, then we replace it with the
13924 // forwarding parameter and move the keyword rest parameter to the posts list.
13925 pm_node_t *keyword_rest = params->keyword_rest;
13926 pm_parameters_node_posts_append(params, keyword_rest);
13927 if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
13928 params->keyword_rest = NULL;
13929 }
13930
13931 pm_parameters_node_keyword_rest_set(params, UP(param));
13932 break;
13933 }
13934 case PM_TOKEN_CLASS_VARIABLE:
13935 case PM_TOKEN_IDENTIFIER:
13936 case PM_TOKEN_CONSTANT:
13937 case PM_TOKEN_INSTANCE_VARIABLE:
13938 case PM_TOKEN_GLOBAL_VARIABLE:
13939 case PM_TOKEN_METHOD_NAME: {
13940 parser_lex(parser);
13941 switch (parser->previous.type) {
13942 case PM_TOKEN_CONSTANT:
13943 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
13944 break;
13945 case PM_TOKEN_INSTANCE_VARIABLE:
13946 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
13947 break;
13948 case PM_TOKEN_GLOBAL_VARIABLE:
13949 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
13950 break;
13951 case PM_TOKEN_CLASS_VARIABLE:
13952 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
13953 break;
13954 case PM_TOKEN_METHOD_NAME:
13955 pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
13956 break;
13957 default: break;
13958 }
13959
13960 if (parser->current.type == PM_TOKEN_EQUAL) {
13961 update_parameter_state(parser, &parser->current, &order);
13962 } else {
13963 update_parameter_state(parser, &parser->previous, &order);
13964 }
13965
13966 pm_token_t name = parser->previous;
13967 bool repeated = pm_parser_parameter_name_check(parser, &name);
13968 pm_parser_local_add_token(parser, &name, 1);
13969
13970 if (match1(parser, PM_TOKEN_EQUAL)) {
13971 pm_token_t operator = parser->current;
13972 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
13973 parser_lex(parser);
13974
13975 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
13976 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
13977
13978 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
13979 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
13980 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
13981
13982 pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
13983
13984 if (repeated) {
13985 pm_node_flag_set_repeated_parameter(UP(param));
13986 }
13987 pm_parameters_node_optionals_append(params, param);
13988
13989 // If the value of the parameter increased the number of
13990 // reads of that parameter, then we need to warn that we
13991 // have a circular definition.
13992 if ((parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
13993 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
13994 }
13995
13996 context_pop(parser);
13997
13998 // If parsing the value of the parameter resulted in error recovery,
13999 // then we can put a missing node in its place and stop parsing the
14000 // parameters entirely now.
14001 if (parser->recovering) {
14002 parsing = false;
14003 break;
14004 }
14005 } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14006 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14007 if (repeated) {
14008 pm_node_flag_set_repeated_parameter(UP(param));
14009 }
14010 pm_parameters_node_requireds_append(params, UP(param));
14011 } else {
14012 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14013 if (repeated) {
14014 pm_node_flag_set_repeated_parameter(UP(param));
14015 }
14016 pm_parameters_node_posts_append(params, UP(param));
14017 }
14018
14019 break;
14020 }
14021 case PM_TOKEN_LABEL: {
14022 if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
14023 update_parameter_state(parser, &parser->current, &order);
14024
14025 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14026 parser_lex(parser);
14027
14028 pm_token_t name = parser->previous;
14029 pm_token_t local = name;
14030 local.end -= 1;
14031
14032 if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
14033 pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14034 } else if (local.end[-1] == '!' || local.end[-1] == '?') {
14035 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14036 }
14037
14038 bool repeated = pm_parser_parameter_name_check(parser, &local);
14039 pm_parser_local_add_token(parser, &local, 1);
14040
14041 switch (parser->current.type) {
14042 case PM_TOKEN_COMMA:
14043 case PM_TOKEN_PARENTHESIS_RIGHT:
14044 case PM_TOKEN_PIPE: {
14045 context_pop(parser);
14046
14047 pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14048 if (repeated) {
14049 pm_node_flag_set_repeated_parameter(param);
14050 }
14051
14052 pm_parameters_node_keywords_append(params, param);
14053 break;
14054 }
14055 case PM_TOKEN_SEMICOLON:
14056 case PM_TOKEN_NEWLINE: {
14057 context_pop(parser);
14058
14059 if (uses_parentheses) {
14060 parsing = false;
14061 break;
14062 }
14063
14064 pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14065 if (repeated) {
14066 pm_node_flag_set_repeated_parameter(param);
14067 }
14068
14069 pm_parameters_node_keywords_append(params, param);
14070 break;
14071 }
14072 default: {
14073 pm_node_t *param;
14074
14075 if (token_begins_expression_p(parser->current.type)) {
14076 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14077 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14078
14079 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14080 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
14081 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14082
14083 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14084 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
14085 }
14086
14087 param = UP(pm_optional_keyword_parameter_node_create(parser, &name, value));
14088 }
14089 else {
14090 param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14091 }
14092
14093 if (repeated) {
14094 pm_node_flag_set_repeated_parameter(param);
14095 }
14096
14097 context_pop(parser);
14098 pm_parameters_node_keywords_append(params, param);
14099
14100 // If parsing the value of the parameter resulted in error recovery,
14101 // then we can put a missing node in its place and stop parsing the
14102 // parameters entirely now.
14103 if (parser->recovering) {
14104 parsing = false;
14105 break;
14106 }
14107 }
14108 }
14109
14110 parser->in_keyword_arg = false;
14111 break;
14112 }
14113 case PM_TOKEN_USTAR:
14114 case PM_TOKEN_STAR: {
14115 update_parameter_state(parser, &parser->current, &order);
14116 parser_lex(parser);
14117
14118 pm_token_t operator = parser->previous;
14119 pm_token_t name;
14120 bool repeated = false;
14121
14122 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14123 name = parser->previous;
14124 repeated = pm_parser_parameter_name_check(parser, &name);
14125 pm_parser_local_add_token(parser, &name, 1);
14126 } else {
14127 name = not_provided(parser);
14128 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
14129 }
14130
14131 pm_node_t *param = UP(pm_rest_parameter_node_create(parser, &operator, &name));
14132 if (repeated) {
14133 pm_node_flag_set_repeated_parameter(param);
14134 }
14135
14136 if (params->rest == NULL) {
14137 pm_parameters_node_rest_set(params, param);
14138 } else {
14139 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
14140 pm_parameters_node_posts_append(params, param);
14141 }
14142
14143 break;
14144 }
14145 case PM_TOKEN_STAR_STAR:
14146 case PM_TOKEN_USTAR_STAR: {
14147 pm_parameters_order_t previous_order = order;
14148 update_parameter_state(parser, &parser->current, &order);
14149 parser_lex(parser);
14150
14151 pm_token_t operator = parser->previous;
14152 pm_node_t *param;
14153
14154 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14155 if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14156 pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14157 }
14158
14159 param = UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
14160 } else {
14161 pm_token_t name;
14162
14163 bool repeated = false;
14164 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14165 name = parser->previous;
14166 repeated = pm_parser_parameter_name_check(parser, &name);
14167 pm_parser_local_add_token(parser, &name, 1);
14168 } else {
14169 name = not_provided(parser);
14170 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
14171 }
14172
14173 param = UP(pm_keyword_rest_parameter_node_create(parser, &operator, &name));
14174 if (repeated) {
14175 pm_node_flag_set_repeated_parameter(param);
14176 }
14177 }
14178
14179 if (params->keyword_rest == NULL) {
14180 pm_parameters_node_keyword_rest_set(params, param);
14181 } else {
14182 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
14183 pm_parameters_node_posts_append(params, param);
14184 }
14185
14186 break;
14187 }
14188 default:
14189 if (parser->previous.type == PM_TOKEN_COMMA) {
14190 if (allows_trailing_comma && order >= PM_PARAMETERS_ORDER_NAMED) {
14191 // If we get here, then we have a trailing comma in a
14192 // block parameter list.
14193 pm_node_t *param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
14194
14195 if (params->rest == NULL) {
14196 pm_parameters_node_rest_set(params, param);
14197 } else {
14198 pm_parser_err_node(parser, UP(param), PM_ERR_PARAMETER_SPLAT_MULTI);
14199 pm_parameters_node_posts_append(params, UP(param));
14200 }
14201 } else {
14202 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14203 }
14204 }
14205
14206 parsing = false;
14207 break;
14208 }
14209
14210 // If we hit some kind of issue while parsing the parameter, this would
14211 // have been set to false. In that case, we need to break out of the
14212 // loop.
14213 if (!parsing) break;
14214
14215 bool accepted_newline = false;
14216 if (uses_parentheses) {
14217 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14218 }
14219
14220 if (accept1(parser, PM_TOKEN_COMMA)) {
14221 // If there was a comma, but we also accepted a newline, then this
14222 // is a syntax error.
14223 if (accepted_newline) {
14224 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14225 }
14226 } else {
14227 // If there was no comma, then we're done parsing parameters.
14228 break;
14229 }
14230 }
14231
14232 pm_do_loop_stack_pop(parser);
14233
14234 // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
14235 if (params->base.location.start == params->base.location.end) {
14236 pm_node_destroy(parser, UP(params));
14237 return NULL;
14238 }
14239
14240 return params;
14241}
14242
14247static size_t
14248token_newline_index(const pm_parser_t *parser) {
14249 if (parser->heredoc_end == NULL) {
14250 // This is the common case. In this case we can look at the previously
14251 // recorded newline in the newline list and subtract from the current
14252 // offset.
14253 return parser->newline_list.size - 1;
14254 } else {
14255 // This is unlikely. This is the case that we have already parsed the
14256 // start of a heredoc, so we cannot rely on looking at the previous
14257 // offset of the newline list, and instead must go through the whole
14258 // process of a binary search for the line number.
14259 return (size_t) pm_newline_list_line(&parser->newline_list, parser->current.start, 0);
14260 }
14261}
14262
14267static int64_t
14268token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
14269 const uint8_t *cursor = parser->start + parser->newline_list.offsets[newline_index];
14270 const uint8_t *end = token->start;
14271
14272 // Skip over the BOM if it is present.
14273 if (
14274 newline_index == 0 &&
14275 parser->start[0] == 0xef &&
14276 parser->start[1] == 0xbb &&
14277 parser->start[2] == 0xbf
14278 ) cursor += 3;
14279
14280 int64_t column = 0;
14281 for (; cursor < end; cursor++) {
14282 switch (*cursor) {
14283 case '\t':
14284 column = ((column / PM_TAB_WHITESPACE_SIZE) + 1) * PM_TAB_WHITESPACE_SIZE;
14285 break;
14286 case ' ':
14287 column++;
14288 break;
14289 default:
14290 column++;
14291 if (break_on_non_space) return -1;
14292 break;
14293 }
14294 }
14295
14296 return column;
14297}
14298
14303static void
14304parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening_token, bool if_after_else, bool allow_indent) {
14305 // If these warnings are disabled (unlikely), then we can just return.
14306 if (!parser->warn_mismatched_indentation) return;
14307
14308 // If the tokens are on the same line, we do not warn.
14309 size_t closing_newline_index = token_newline_index(parser);
14310 if (opening_newline_index == closing_newline_index) return;
14311
14312 // If the opening token has anything other than spaces or tabs before it,
14313 // then we do not warn. This is unless we are matching up an `if`/`end` pair
14314 // and the `if` immediately follows an `else` keyword.
14315 int64_t opening_column = token_column(parser, opening_newline_index, opening_token, !if_after_else);
14316 if (!if_after_else && (opening_column == -1)) return;
14317
14318 // Get a reference to the closing token off the current parser. This assumes
14319 // that the caller has placed this in the correct position.
14320 pm_token_t *closing_token = &parser->current;
14321
14322 // If the tokens are at the same indentation, we do not warn.
14323 int64_t closing_column = token_column(parser, closing_newline_index, closing_token, true);
14324 if ((closing_column == -1) || (opening_column == closing_column)) return;
14325
14326 // If the closing column is greater than the opening column and we are
14327 // allowing indentation, then we do not warn.
14328 if (allow_indent && (closing_column > opening_column)) return;
14329
14330 // Otherwise, add a warning.
14331 PM_PARSER_WARN_FORMAT(
14332 parser,
14333 closing_token->start,
14334 closing_token->end,
14335 PM_WARN_INDENTATION_MISMATCH,
14336 (int) (closing_token->end - closing_token->start),
14337 (const char *) closing_token->start,
14338 (int) (opening_token->end - opening_token->start),
14339 (const char *) opening_token->start,
14340 ((int32_t) opening_newline_index) + parser->start_line
14341 );
14342}
14343
14344typedef enum {
14345 PM_RESCUES_BEGIN = 1,
14346 PM_RESCUES_BLOCK,
14347 PM_RESCUES_CLASS,
14348 PM_RESCUES_DEF,
14349 PM_RESCUES_LAMBDA,
14350 PM_RESCUES_MODULE,
14351 PM_RESCUES_SCLASS
14352} pm_rescues_type_t;
14353
14358static inline void
14359parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
14360 pm_rescue_node_t *current = NULL;
14361
14362 while (match1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
14363 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14364 parser_lex(parser);
14365
14366 pm_rescue_node_t *rescue = pm_rescue_node_create(parser, &parser->previous);
14367
14368 switch (parser->current.type) {
14369 case PM_TOKEN_EQUAL_GREATER: {
14370 // Here we have an immediate => after the rescue keyword, in which case
14371 // we're going to have an empty list of exceptions to rescue (which
14372 // implies StandardError).
14373 parser_lex(parser);
14374 pm_rescue_node_operator_set(rescue, &parser->previous);
14375
14376 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
14377 reference = parse_target(parser, reference, false, false);
14378
14379 pm_rescue_node_reference_set(rescue, reference);
14380 break;
14381 }
14382 case PM_TOKEN_NEWLINE:
14383 case PM_TOKEN_SEMICOLON:
14384 case PM_TOKEN_KEYWORD_THEN:
14385 // Here we have a terminator for the rescue keyword, in which
14386 // case we're going to just continue on.
14387 break;
14388 default: {
14389 if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
14390 // Here we have something that could be an exception expression, so
14391 // we'll attempt to parse it here and any others delimited by commas.
14392
14393 do {
14394 pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
14395 pm_rescue_node_exceptions_append(rescue, expression);
14396
14397 // If we hit a newline, then this is the end of the rescue expression. We
14398 // can continue on to parse the statements.
14399 if (match3(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_THEN)) break;
14400
14401 // If we hit a `=>` then we're going to parse the exception variable. Once
14402 // we've done that, we'll break out of the loop and parse the statements.
14403 if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
14404 pm_rescue_node_operator_set(rescue, &parser->previous);
14405
14406 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
14407 reference = parse_target(parser, reference, false, false);
14408
14409 pm_rescue_node_reference_set(rescue, reference);
14410 break;
14411 }
14412 } while (accept1(parser, PM_TOKEN_COMMA));
14413 }
14414 }
14415 }
14416
14417 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
14418 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
14419 rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
14420 }
14421 } else {
14422 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
14423 rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
14424 }
14425
14426 if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
14427 pm_accepts_block_stack_push(parser, true);
14428 pm_context_t context;
14429
14430 switch (type) {
14431 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_RESCUE; break;
14432 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_RESCUE; break;
14433 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_RESCUE; break;
14434 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_RESCUE; break;
14435 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_RESCUE; break;
14436 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_RESCUE; break;
14437 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_RESCUE; break;
14438 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
14439 }
14440
14441 pm_statements_node_t *statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14442 if (statements != NULL) pm_rescue_node_statements_set(rescue, statements);
14443
14444 pm_accepts_block_stack_pop(parser);
14445 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14446 }
14447
14448 if (current == NULL) {
14449 pm_begin_node_rescue_clause_set(parent_node, rescue);
14450 } else {
14451 pm_rescue_node_subsequent_set(current, rescue);
14452 }
14453
14454 current = rescue;
14455 }
14456
14457 // The end node locations on rescue nodes will not be set correctly
14458 // since we won't know the end until we've found all subsequent
14459 // clauses. This sets the end location on all rescues once we know it.
14460 if (current != NULL) {
14461 const uint8_t *end_to_set = current->base.location.end;
14462 pm_rescue_node_t *clause = parent_node->rescue_clause;
14463
14464 while (clause != NULL) {
14465 clause->base.location.end = end_to_set;
14466 clause = clause->subsequent;
14467 }
14468 }
14469
14470 pm_token_t else_keyword;
14471 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
14472 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14473 opening_newline_index = token_newline_index(parser);
14474
14475 else_keyword = parser->current;
14476 opening = &else_keyword;
14477
14478 parser_lex(parser);
14479 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14480
14481 pm_statements_node_t *else_statements = NULL;
14482 if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
14483 pm_accepts_block_stack_push(parser, true);
14484 pm_context_t context;
14485
14486 switch (type) {
14487 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ELSE; break;
14488 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ELSE; break;
14489 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ELSE; break;
14490 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ELSE; break;
14491 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
14492 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
14493 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
14494 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
14495 }
14496
14497 else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14498 pm_accepts_block_stack_pop(parser);
14499
14500 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14501 }
14502
14503 pm_else_node_t *else_clause = pm_else_node_create(parser, &else_keyword, else_statements, &parser->current);
14504 pm_begin_node_else_clause_set(parent_node, else_clause);
14505
14506 // If we don't have a `current` rescue node, then this is a dangling
14507 // else, and it's an error.
14508 if (current == NULL) pm_parser_err_node(parser, UP(else_clause), PM_ERR_BEGIN_LONELY_ELSE);
14509 }
14510
14511 if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
14512 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14513 pm_token_t ensure_keyword = parser->current;
14514
14515 parser_lex(parser);
14516 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14517
14518 pm_statements_node_t *ensure_statements = NULL;
14519 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
14520 pm_accepts_block_stack_push(parser, true);
14521 pm_context_t context;
14522
14523 switch (type) {
14524 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ENSURE; break;
14525 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ENSURE; break;
14526 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ENSURE; break;
14527 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ENSURE; break;
14528 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ENSURE; break;
14529 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ENSURE; break;
14530 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ENSURE; break;
14531 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
14532 }
14533
14534 ensure_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14535 pm_accepts_block_stack_pop(parser);
14536
14537 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14538 }
14539
14540 pm_ensure_node_t *ensure_clause = pm_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->current);
14541 pm_begin_node_ensure_clause_set(parent_node, ensure_clause);
14542 }
14543
14544 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
14545 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14546 pm_begin_node_end_keyword_set(parent_node, &parser->current);
14547 } else {
14548 pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
14549 pm_begin_node_end_keyword_set(parent_node, &end_keyword);
14550 }
14551}
14552
14557static pm_begin_node_t *
14558parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
14559 pm_token_t begin_keyword = not_provided(parser);
14560 pm_begin_node_t *node = pm_begin_node_create(parser, &begin_keyword, statements);
14561
14562 parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
14563 node->base.location.start = start;
14564
14565 return node;
14566}
14567
14572parse_block_parameters(
14573 pm_parser_t *parser,
14574 bool allows_trailing_comma,
14575 const pm_token_t *opening,
14576 bool is_lambda_literal,
14577 bool accepts_blocks_in_defaults,
14578 uint16_t depth
14579) {
14580 pm_parameters_node_t *parameters = NULL;
14581 if (!match1(parser, PM_TOKEN_SEMICOLON)) {
14582 if (!is_lambda_literal) {
14583 context_push(parser, PM_CONTEXT_BLOCK_PARAMETERS);
14584 }
14585 parameters = parse_parameters(
14586 parser,
14587 is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
14588 false,
14589 allows_trailing_comma,
14590 false,
14591 accepts_blocks_in_defaults,
14592 true,
14593 (uint16_t) (depth + 1)
14594 );
14595 if (!is_lambda_literal) {
14596 context_pop(parser);
14597 }
14598 }
14599
14600 pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
14601 if ((opening->type != PM_TOKEN_NOT_PROVIDED)) {
14602 accept1(parser, PM_TOKEN_NEWLINE);
14603
14604 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
14605 do {
14606 switch (parser->current.type) {
14607 case PM_TOKEN_CONSTANT:
14608 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14609 parser_lex(parser);
14610 break;
14611 case PM_TOKEN_INSTANCE_VARIABLE:
14612 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14613 parser_lex(parser);
14614 break;
14615 case PM_TOKEN_GLOBAL_VARIABLE:
14616 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14617 parser_lex(parser);
14618 break;
14619 case PM_TOKEN_CLASS_VARIABLE:
14620 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14621 parser_lex(parser);
14622 break;
14623 default:
14624 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
14625 break;
14626 }
14627
14628 bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
14629 pm_parser_local_add_token(parser, &parser->previous, 1);
14630
14631 pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
14632 if (repeated) pm_node_flag_set_repeated_parameter(UP(local));
14633
14634 pm_block_parameters_node_append_local(block_parameters, local);
14635 } while (accept1(parser, PM_TOKEN_COMMA));
14636 }
14637 }
14638
14639 return block_parameters;
14640}
14641
14646static bool
14647outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
14648 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
14649 if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
14650 }
14651
14652 return false;
14653}
14654
14660static const char * const pm_numbered_parameter_names[] = {
14661 "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
14662};
14663
14669static pm_node_t *
14670parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
14671 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
14672
14673 // If we have ordinary parameters, then we will return them as the set of
14674 // parameters.
14675 if (parameters != NULL) {
14676 // If we also have implicit parameters, then this is an error.
14677 if (implicit_parameters->size > 0) {
14678 pm_node_t *node = implicit_parameters->nodes[0];
14679
14680 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14681 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
14682 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14683 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
14684 } else {
14685 assert(false && "unreachable");
14686 }
14687 }
14688
14689 return parameters;
14690 }
14691
14692 // If we don't have any implicit parameters, then the set of parameters is
14693 // NULL.
14694 if (implicit_parameters->size == 0) {
14695 return NULL;
14696 }
14697
14698 // If we don't have ordinary parameters, then we now must validate our set
14699 // of implicit parameters. We can only have numbered parameters or it, but
14700 // they cannot be mixed.
14701 uint8_t numbered_parameter = 0;
14702 bool it_parameter = false;
14703
14704 for (size_t index = 0; index < implicit_parameters->size; index++) {
14705 pm_node_t *node = implicit_parameters->nodes[index];
14706
14707 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14708 if (it_parameter) {
14709 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
14710 } else if (outer_scope_using_numbered_parameters_p(parser)) {
14711 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
14712 } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
14713 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
14714 } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
14715 numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
14716 } else {
14717 assert(false && "unreachable");
14718 }
14719 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14720 if (numbered_parameter > 0) {
14721 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
14722 } else {
14723 it_parameter = true;
14724 }
14725 }
14726 }
14727
14728 if (numbered_parameter > 0) {
14729 // Go through the parent scopes and mark them as being disallowed from
14730 // using numbered parameters because this inner scope is using them.
14731 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
14732 scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
14733 }
14734
14735 const pm_location_t location = { .start = opening->start, .end = closing->end };
14736 return UP(pm_numbered_parameters_node_create(parser, &location, numbered_parameter));
14737 }
14738
14739 if (it_parameter) {
14740 return UP(pm_it_parameters_node_create(parser, opening, closing));
14741 }
14742
14743 return NULL;
14744}
14745
14749static pm_block_node_t *
14750parse_block(pm_parser_t *parser, uint16_t depth) {
14751 pm_token_t opening = parser->previous;
14752 accept1(parser, PM_TOKEN_NEWLINE);
14753
14754 pm_accepts_block_stack_push(parser, true);
14755 pm_parser_scope_push(parser, false);
14756
14757 pm_block_parameters_node_t *block_parameters = NULL;
14758
14759 if (accept1(parser, PM_TOKEN_PIPE)) {
14760 pm_token_t block_parameters_opening = parser->previous;
14761 if (match1(parser, PM_TOKEN_PIPE)) {
14762 block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
14763 parser->command_start = true;
14764 parser_lex(parser);
14765 } else {
14766 block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false, true, (uint16_t) (depth + 1));
14767 accept1(parser, PM_TOKEN_NEWLINE);
14768 parser->command_start = true;
14769 expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
14770 }
14771
14772 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
14773 }
14774
14775 accept1(parser, PM_TOKEN_NEWLINE);
14776 pm_node_t *statements = NULL;
14777
14778 if (opening.type == PM_TOKEN_BRACE_LEFT) {
14779 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
14780 statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1)));
14781 }
14782
14783 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE, &opening);
14784 } else {
14785 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
14786 if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE)) {
14787 pm_accepts_block_stack_push(parser, true);
14788 statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1)));
14789 pm_accepts_block_stack_pop(parser);
14790 }
14791
14792 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
14793 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
14794 statements = UP(parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1)));
14795 }
14796 }
14797
14798 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END, &opening);
14799 }
14800
14801 pm_constant_id_list_t locals;
14802 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
14803 pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &opening, &parser->previous);
14804
14805 pm_parser_scope_pop(parser);
14806 pm_accepts_block_stack_pop(parser);
14807
14808 return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
14809}
14810
14816static bool
14817parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call, uint16_t depth) {
14818 bool found = false;
14819
14820 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
14821 found |= true;
14822 arguments->opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
14823
14824 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14825 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
14826 } else {
14827 pm_accepts_block_stack_push(parser, true);
14828 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1));
14829
14830 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14831 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
14832 parser->previous.start = parser->previous.end;
14833 parser->previous.type = PM_TOKEN_MISSING;
14834 }
14835
14836 pm_accepts_block_stack_pop(parser);
14837 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
14838 }
14839 } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
14840 found |= true;
14841 pm_accepts_block_stack_push(parser, false);
14842
14843 // If we get here, then the subsequent token cannot be used as an infix
14844 // operator. In this case we assume the subsequent token is part of an
14845 // argument to this method call.
14846 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, (uint16_t) (depth + 1));
14847
14848 // If we have done with the arguments and still not consumed the comma,
14849 // then we have a trailing comma where we need to check whether it is
14850 // allowed or not.
14851 if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
14852 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type));
14853 }
14854
14855 pm_accepts_block_stack_pop(parser);
14856 }
14857
14858 // If we're at the end of the arguments, we can now check if there is a block
14859 // node that starts with a {. If there is, then we can parse it and add it to
14860 // the arguments.
14861 if (accepts_block) {
14862 pm_block_node_t *block = NULL;
14863
14864 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
14865 found |= true;
14866 block = parse_block(parser, (uint16_t) (depth + 1));
14867 pm_arguments_validate_block(parser, arguments, block);
14868 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
14869 found |= true;
14870 block = parse_block(parser, (uint16_t) (depth + 1));
14871 }
14872
14873 if (block != NULL) {
14874 if (arguments->block == NULL && !arguments->has_forwarding) {
14875 arguments->block = UP(block);
14876 } else {
14877 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_BLOCK_MULTI);
14878
14879 if (arguments->block != NULL) {
14880 if (arguments->arguments == NULL) {
14881 arguments->arguments = pm_arguments_node_create(parser);
14882 }
14883 pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
14884 }
14885 arguments->block = UP(block);
14886 }
14887 }
14888 }
14889
14890 return found;
14891}
14892
14897static void
14898parse_return(pm_parser_t *parser, pm_node_t *node) {
14899 bool in_sclass = false;
14900 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
14901 switch (context_node->context) {
14905 case PM_CONTEXT_BEGIN:
14906 case PM_CONTEXT_CASE_IN:
14909 case PM_CONTEXT_DEFINED:
14910 case PM_CONTEXT_ELSE:
14911 case PM_CONTEXT_ELSIF:
14912 case PM_CONTEXT_EMBEXPR:
14914 case PM_CONTEXT_FOR:
14915 case PM_CONTEXT_IF:
14917 case PM_CONTEXT_MAIN:
14919 case PM_CONTEXT_PARENS:
14920 case PM_CONTEXT_POSTEXE:
14922 case PM_CONTEXT_PREEXE:
14924 case PM_CONTEXT_TERNARY:
14925 case PM_CONTEXT_UNLESS:
14926 case PM_CONTEXT_UNTIL:
14927 case PM_CONTEXT_WHILE:
14928 // Keep iterating up the lists of contexts, because returns can
14929 // see through these.
14930 continue;
14934 case PM_CONTEXT_SCLASS:
14935 in_sclass = true;
14936 continue;
14940 case PM_CONTEXT_CLASS:
14944 case PM_CONTEXT_MODULE:
14945 // These contexts are invalid for a return.
14946 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
14947 return;
14958 case PM_CONTEXT_DEF:
14964 // These contexts are valid for a return, and we should not
14965 // continue to loop.
14966 return;
14967 case PM_CONTEXT_NONE:
14968 // This case should never happen.
14969 assert(false && "unreachable");
14970 break;
14971 }
14972 }
14973 if (in_sclass && parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
14974 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
14975 }
14976}
14977
14982static void
14983parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
14984 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
14985 switch (context_node->context) {
14992 case PM_CONTEXT_DEFINED:
14993 case PM_CONTEXT_FOR:
15000 case PM_CONTEXT_POSTEXE:
15001 case PM_CONTEXT_UNTIL:
15002 case PM_CONTEXT_WHILE:
15003 // These are the good cases. We're allowed to have a block exit
15004 // in these contexts.
15005 return;
15006 case PM_CONTEXT_DEF:
15011 case PM_CONTEXT_MAIN:
15012 case PM_CONTEXT_PREEXE:
15013 case PM_CONTEXT_SCLASS:
15017 // These are the bad cases. We're not allowed to have a block
15018 // exit in these contexts.
15019 //
15020 // If we get here, then we're about to mark this block exit
15021 // as invalid. However, it could later _become_ valid if we
15022 // find a trailing while/until on the expression. In this
15023 // case instead of adding the error here, we'll add the
15024 // block exit to the list of exits for the expression, and
15025 // the node parsing will handle validating it instead.
15026 assert(parser->current_block_exits != NULL);
15027 pm_node_list_append(parser->current_block_exits, node);
15028 return;
15032 case PM_CONTEXT_BEGIN:
15033 case PM_CONTEXT_CASE_IN:
15038 case PM_CONTEXT_CLASS:
15040 case PM_CONTEXT_ELSE:
15041 case PM_CONTEXT_ELSIF:
15042 case PM_CONTEXT_EMBEXPR:
15044 case PM_CONTEXT_IF:
15048 case PM_CONTEXT_MODULE:
15050 case PM_CONTEXT_PARENS:
15053 case PM_CONTEXT_TERNARY:
15054 case PM_CONTEXT_UNLESS:
15055 // In these contexts we should continue walking up the list of
15056 // contexts.
15057 break;
15058 case PM_CONTEXT_NONE:
15059 // This case should never happen.
15060 assert(false && "unreachable");
15061 break;
15062 }
15063 }
15064}
15065
15070static pm_node_list_t *
15071push_block_exits(pm_parser_t *parser, pm_node_list_t *current_block_exits) {
15072 pm_node_list_t *previous_block_exits = parser->current_block_exits;
15073 parser->current_block_exits = current_block_exits;
15074 return previous_block_exits;
15075}
15076
15082static void
15083flush_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15084 pm_node_t *block_exit;
15085 PM_NODE_LIST_FOREACH(parser->current_block_exits, index, block_exit) {
15086 const char *type;
15087
15088 switch (PM_NODE_TYPE(block_exit)) {
15089 case PM_BREAK_NODE: type = "break"; break;
15090 case PM_NEXT_NODE: type = "next"; break;
15091 case PM_REDO_NODE: type = "redo"; break;
15092 default: assert(false && "unreachable"); type = ""; break;
15093 }
15094
15095 PM_PARSER_ERR_NODE_FORMAT(parser, block_exit, PM_ERR_INVALID_BLOCK_EXIT, type);
15096 }
15097
15098 parser->current_block_exits = previous_block_exits;
15099}
15100
15105static void
15106pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15107 if (match2(parser, PM_TOKEN_KEYWORD_WHILE_MODIFIER, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) {
15108 // If we matched a trailing while/until, then all of the block exits in
15109 // the contained list are valid. In this case we do not need to do
15110 // anything.
15111 parser->current_block_exits = previous_block_exits;
15112 } else if (previous_block_exits != NULL) {
15113 // If we did not matching a trailing while/until, then all of the block
15114 // exits contained in the list are invalid for this specific context.
15115 // However, they could still become valid in a higher level context if
15116 // there is another list above this one. In this case we'll push all of
15117 // the block exits up to the previous list.
15118 pm_node_list_concat(previous_block_exits, parser->current_block_exits);
15119 parser->current_block_exits = previous_block_exits;
15120 } else {
15121 // If we did not match a trailing while/until and this was the last
15122 // chance to do so, then all of the block exits in the list are invalid
15123 // and we need to add an error for each of them.
15124 flush_block_exits(parser, previous_block_exits);
15125 }
15126}
15127
15128static inline pm_node_t *
15129parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
15130 context_push(parser, PM_CONTEXT_PREDICATE);
15131 pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
15132 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, error_id, (uint16_t) (depth + 1));
15133
15134 // Predicates are closed by a term, a "then", or a term and then a "then".
15135 bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15136
15137 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15138 predicate_closed = true;
15139 *then_keyword = parser->previous;
15140 }
15141
15142 if (!predicate_closed) {
15143 pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
15144 }
15145
15146 context_pop(parser);
15147 return predicate;
15148}
15149
15150static inline pm_node_t *
15151parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
15152 pm_node_list_t current_block_exits = { 0 };
15153 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
15154
15155 pm_token_t keyword = parser->previous;
15156 pm_token_t then_keyword = not_provided(parser);
15157
15158 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
15159 pm_statements_node_t *statements = NULL;
15160
15161 if (!match3(parser, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
15162 pm_accepts_block_stack_push(parser, true);
15163 statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15164 pm_accepts_block_stack_pop(parser);
15165 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15166 }
15167
15168 pm_token_t end_keyword = not_provided(parser);
15169 pm_node_t *parent = NULL;
15170
15171 switch (context) {
15172 case PM_CONTEXT_IF:
15173 parent = UP(pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword));
15174 break;
15175 case PM_CONTEXT_UNLESS:
15176 parent = UP(pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements));
15177 break;
15178 default:
15179 assert(false && "unreachable");
15180 break;
15181 }
15182
15183 pm_node_t *current = parent;
15184
15185 // Parse any number of elsif clauses. This will form a linked list of if
15186 // nodes pointing to each other from the top.
15187 if (context == PM_CONTEXT_IF) {
15188 while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
15189 if (parser_end_of_line_p(parser)) {
15190 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
15191 }
15192
15193 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15194 pm_token_t elsif_keyword = parser->current;
15195 parser_lex(parser);
15196
15197 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
15198 pm_accepts_block_stack_push(parser, true);
15199
15200 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF, (uint16_t) (depth + 1));
15201 pm_accepts_block_stack_pop(parser);
15202 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15203
15204 pm_node_t *elsif = UP(pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword));
15205 ((pm_if_node_t *) current)->subsequent = elsif;
15206 current = elsif;
15207 }
15208 }
15209
15210 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15211 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15212 opening_newline_index = token_newline_index(parser);
15213
15214 parser_lex(parser);
15215 pm_token_t else_keyword = parser->previous;
15216
15217 pm_accepts_block_stack_push(parser, true);
15218 pm_statements_node_t *else_statements = parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1));
15219 pm_accepts_block_stack_pop(parser);
15220
15221 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15222 parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
15223 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE, &keyword);
15224
15225 pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
15226
15227 switch (context) {
15228 case PM_CONTEXT_IF:
15229 ((pm_if_node_t *) current)->subsequent = UP(else_node);
15230 break;
15231 case PM_CONTEXT_UNLESS:
15232 ((pm_unless_node_t *) parent)->else_clause = else_node;
15233 break;
15234 default:
15235 assert(false && "unreachable");
15236 break;
15237 }
15238 } else {
15239 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
15240 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM, &keyword);
15241 }
15242
15243 // Set the appropriate end location for all of the nodes in the subtree.
15244 switch (context) {
15245 case PM_CONTEXT_IF: {
15246 pm_node_t *current = parent;
15247 bool recursing = true;
15248
15249 while (recursing) {
15250 switch (PM_NODE_TYPE(current)) {
15251 case PM_IF_NODE:
15252 pm_if_node_end_keyword_loc_set((pm_if_node_t *) current, &parser->previous);
15253 current = ((pm_if_node_t *) current)->subsequent;
15254 recursing = current != NULL;
15255 break;
15256 case PM_ELSE_NODE:
15257 pm_else_node_end_keyword_loc_set((pm_else_node_t *) current, &parser->previous);
15258 recursing = false;
15259 break;
15260 default: {
15261 recursing = false;
15262 break;
15263 }
15264 }
15265 }
15266 break;
15267 }
15268 case PM_CONTEXT_UNLESS:
15269 pm_unless_node_end_keyword_loc_set((pm_unless_node_t *) parent, &parser->previous);
15270 break;
15271 default:
15272 assert(false && "unreachable");
15273 break;
15274 }
15275
15276 pop_block_exits(parser, previous_block_exits);
15277 pm_node_list_free(&current_block_exits);
15278
15279 return parent;
15280}
15281
15286#define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15287 case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
15288 case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
15289 case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
15290 case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
15291 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
15292 case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
15293 case PM_TOKEN_KEYWORD_OR: case PM_TOKEN_KEYWORD_REDO: case PM_TOKEN_KEYWORD_RESCUE: case PM_TOKEN_KEYWORD_RETRY: \
15294 case PM_TOKEN_KEYWORD_RETURN: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_SUPER: case PM_TOKEN_KEYWORD_THEN: \
15295 case PM_TOKEN_KEYWORD_TRUE: case PM_TOKEN_KEYWORD_UNDEF: case PM_TOKEN_KEYWORD_UNLESS: case PM_TOKEN_KEYWORD_UNTIL: \
15296 case PM_TOKEN_KEYWORD_WHEN: case PM_TOKEN_KEYWORD_WHILE: case PM_TOKEN_KEYWORD_YIELD
15297
15302#define PM_CASE_OPERATOR PM_TOKEN_AMPERSAND: case PM_TOKEN_BACKTICK: case PM_TOKEN_BANG_EQUAL: \
15303 case PM_TOKEN_BANG_TILDE: case PM_TOKEN_BANG: case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \
15304 case PM_TOKEN_BRACKET_LEFT_RIGHT: case PM_TOKEN_CARET: case PM_TOKEN_EQUAL_EQUAL_EQUAL: case PM_TOKEN_EQUAL_EQUAL: \
15305 case PM_TOKEN_EQUAL_TILDE: case PM_TOKEN_GREATER_EQUAL: case PM_TOKEN_GREATER_GREATER: case PM_TOKEN_GREATER: \
15306 case PM_TOKEN_LESS_EQUAL_GREATER: case PM_TOKEN_LESS_EQUAL: case PM_TOKEN_LESS_LESS: case PM_TOKEN_LESS: \
15307 case PM_TOKEN_MINUS: case PM_TOKEN_PERCENT: case PM_TOKEN_PIPE: case PM_TOKEN_PLUS: case PM_TOKEN_SLASH: \
15308 case PM_TOKEN_STAR_STAR: case PM_TOKEN_STAR: case PM_TOKEN_TILDE: case PM_TOKEN_UAMPERSAND: case PM_TOKEN_UMINUS: \
15309 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_UPLUS: case PM_TOKEN_USTAR: case PM_TOKEN_USTAR_STAR
15310
15316#define PM_CASE_PRIMITIVE PM_TOKEN_INTEGER: case PM_TOKEN_INTEGER_IMAGINARY: case PM_TOKEN_INTEGER_RATIONAL: \
15317 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: case PM_TOKEN_FLOAT: case PM_TOKEN_FLOAT_IMAGINARY: \
15318 case PM_TOKEN_FLOAT_RATIONAL: case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: case PM_TOKEN_SYMBOL_BEGIN: \
15319 case PM_TOKEN_REGEXP_BEGIN: case PM_TOKEN_BACKTICK: case PM_TOKEN_PERCENT_LOWER_X: case PM_TOKEN_PERCENT_LOWER_I: \
15320 case PM_TOKEN_PERCENT_LOWER_W: case PM_TOKEN_PERCENT_UPPER_I: case PM_TOKEN_PERCENT_UPPER_W: \
15321 case PM_TOKEN_STRING_BEGIN: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_TRUE: \
15322 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15323 case PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_MINUS_GREATER: case PM_TOKEN_HEREDOC_START: \
15324 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_CHARACTER_LITERAL
15325
15330#define PM_CASE_PARAMETER PM_TOKEN_UAMPERSAND: case PM_TOKEN_AMPERSAND: case PM_TOKEN_UDOT_DOT_DOT: \
15331 case PM_TOKEN_IDENTIFIER: case PM_TOKEN_LABEL: case PM_TOKEN_USTAR: case PM_TOKEN_STAR: case PM_TOKEN_STAR_STAR: \
15332 case PM_TOKEN_USTAR_STAR: case PM_TOKEN_CONSTANT: case PM_TOKEN_INSTANCE_VARIABLE: case PM_TOKEN_GLOBAL_VARIABLE: \
15333 case PM_TOKEN_CLASS_VARIABLE
15334
15339#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
15340 case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
15341 case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
15342 case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
15343
15344// Assert here that the flags are the same so that we can safely switch the type
15345// of the node without having to move the flags.
15346PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING), "Expected the flags to match.");
15347
15352static inline pm_node_flags_t
15353parse_unescaped_encoding(const pm_parser_t *parser) {
15354 if (parser->explicit_encoding != NULL) {
15356 // If the there's an explicit encoding and it's using a UTF-8 escape
15357 // sequence, then mark the string as UTF-8.
15358 return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
15359 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
15360 // If there's a non-UTF-8 escape sequence being used, then the
15361 // string uses the source encoding, unless the source is marked as
15362 // US-ASCII. In that case the string is forced as ASCII-8BIT in
15363 // order to keep the string valid.
15364 return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
15365 }
15366 }
15367 return 0;
15368}
15369
15374static pm_node_t *
15375parse_string_part(pm_parser_t *parser, uint16_t depth) {
15376 switch (parser->current.type) {
15377 // Here the lexer has returned to us plain string content. In this case
15378 // we'll create a string node that has no opening or closing and return that
15379 // as the part. These kinds of parts look like:
15380 //
15381 // "aaa #{bbb} #@ccc ddd"
15382 // ^^^^ ^ ^^^^
15383 case PM_TOKEN_STRING_CONTENT: {
15384 pm_token_t opening = not_provided(parser);
15385 pm_token_t closing = not_provided(parser);
15386
15387 pm_node_t *node = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
15388 pm_node_flag_set(node, parse_unescaped_encoding(parser));
15389
15390 parser_lex(parser);
15391 return node;
15392 }
15393 // Here the lexer has returned the beginning of an embedded expression. In
15394 // that case we'll parse the inner statements and return that as the part.
15395 // These kinds of parts look like:
15396 //
15397 // "aaa #{bbb} #@ccc ddd"
15398 // ^^^^^^
15399 case PM_TOKEN_EMBEXPR_BEGIN: {
15400 // Ruby disallows seeing encoding around interpolation in strings,
15401 // even though it is known at parse time.
15402 parser->explicit_encoding = NULL;
15403
15404 pm_lex_state_t state = parser->lex_state;
15405 int brace_nesting = parser->brace_nesting;
15406
15407 parser->brace_nesting = 0;
15408 lex_state_set(parser, PM_LEX_STATE_BEG);
15409 parser_lex(parser);
15410
15411 pm_token_t opening = parser->previous;
15412 pm_statements_node_t *statements = NULL;
15413
15414 if (!match1(parser, PM_TOKEN_EMBEXPR_END)) {
15415 pm_accepts_block_stack_push(parser, true);
15416 statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
15417 pm_accepts_block_stack_pop(parser);
15418 }
15419
15420 parser->brace_nesting = brace_nesting;
15421 lex_state_set(parser, state);
15422
15423 expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
15424 pm_token_t closing = parser->previous;
15425
15426 // If this set of embedded statements only contains a single
15427 // statement, then Ruby does not consider it as a possible statement
15428 // that could emit a line event.
15429 if (statements != NULL && statements->body.size == 1) {
15430 pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
15431 }
15432
15433 return UP(pm_embedded_statements_node_create(parser, &opening, statements, &closing));
15434 }
15435
15436 // Here the lexer has returned the beginning of an embedded variable.
15437 // In that case we'll parse the variable and create an appropriate node
15438 // for it and then return that node. These kinds of parts look like:
15439 //
15440 // "aaa #{bbb} #@ccc ddd"
15441 // ^^^^^
15442 case PM_TOKEN_EMBVAR: {
15443 // Ruby disallows seeing encoding around interpolation in strings,
15444 // even though it is known at parse time.
15445 parser->explicit_encoding = NULL;
15446
15447 lex_state_set(parser, PM_LEX_STATE_BEG);
15448 parser_lex(parser);
15449
15450 pm_token_t operator = parser->previous;
15451 pm_node_t *variable;
15452
15453 switch (parser->current.type) {
15454 // In this case a back reference is being interpolated. We'll
15455 // create a global variable read node.
15456 case PM_TOKEN_BACK_REFERENCE:
15457 parser_lex(parser);
15458 variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
15459 break;
15460 // In this case an nth reference is being interpolated. We'll
15461 // create a global variable read node.
15462 case PM_TOKEN_NUMBERED_REFERENCE:
15463 parser_lex(parser);
15464 variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
15465 break;
15466 // In this case a global variable is being interpolated. We'll
15467 // create a global variable read node.
15468 case PM_TOKEN_GLOBAL_VARIABLE:
15469 parser_lex(parser);
15470 variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
15471 break;
15472 // In this case an instance variable is being interpolated.
15473 // We'll create an instance variable read node.
15474 case PM_TOKEN_INSTANCE_VARIABLE:
15475 parser_lex(parser);
15476 variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
15477 break;
15478 // In this case a class variable is being interpolated. We'll
15479 // create a class variable read node.
15480 case PM_TOKEN_CLASS_VARIABLE:
15481 parser_lex(parser);
15482 variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
15483 break;
15484 // We can hit here if we got an invalid token. In that case
15485 // we'll not attempt to lex this token and instead just return a
15486 // missing node.
15487 default:
15488 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
15489 variable = UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
15490 break;
15491 }
15492
15493 return UP(pm_embedded_variable_node_create(parser, &operator, variable));
15494 }
15495 default:
15496 parser_lex(parser);
15497 pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
15498 return NULL;
15499 }
15500}
15501
15507static const uint8_t *
15508parse_operator_symbol_name(const pm_token_t *name) {
15509 switch (name->type) {
15510 case PM_TOKEN_TILDE:
15511 case PM_TOKEN_BANG:
15512 if (name->end[-1] == '@') return name->end - 1;
15514 default:
15515 return name->end;
15516 }
15517}
15518
15519static pm_node_t *
15520parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
15521 pm_token_t closing = not_provided(parser);
15522 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
15523
15524 const uint8_t *end = parse_operator_symbol_name(&parser->current);
15525
15526 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15527 parser_lex(parser);
15528
15529 pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
15530 pm_node_flag_set(UP(symbol), PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
15531
15532 return UP(symbol);
15533}
15534
15540static pm_node_t *
15541parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state, uint16_t depth) {
15542 const pm_token_t opening = parser->previous;
15543
15544 if (lex_mode->mode != PM_LEX_STRING) {
15545 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15546
15547 switch (parser->current.type) {
15548 case PM_CASE_OPERATOR:
15549 return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
15550 case PM_TOKEN_IDENTIFIER:
15551 case PM_TOKEN_CONSTANT:
15552 case PM_TOKEN_INSTANCE_VARIABLE:
15553 case PM_TOKEN_METHOD_NAME:
15554 case PM_TOKEN_CLASS_VARIABLE:
15555 case PM_TOKEN_GLOBAL_VARIABLE:
15556 case PM_TOKEN_NUMBERED_REFERENCE:
15557 case PM_TOKEN_BACK_REFERENCE:
15558 case PM_CASE_KEYWORD:
15559 parser_lex(parser);
15560 break;
15561 default:
15562 expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
15563 break;
15564 }
15565
15566 pm_token_t closing = not_provided(parser);
15567 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
15568
15569 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15570 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15571
15572 return UP(symbol);
15573 }
15574
15575 if (lex_mode->as.string.interpolation) {
15576 // If we have the end of the symbol, then we can return an empty symbol.
15577 if (match1(parser, PM_TOKEN_STRING_END)) {
15578 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15579 parser_lex(parser);
15580
15581 pm_token_t content = not_provided(parser);
15582 pm_token_t closing = parser->previous;
15583 return UP(pm_symbol_node_create(parser, &opening, &content, &closing));
15584 }
15585
15586 // Now we can parse the first part of the symbol.
15587 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
15588
15589 // If we got a string part, then it's possible that we could transform
15590 // what looks like an interpolated symbol into a regular symbol.
15591 if (part && PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15592 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15593 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15594
15595 return UP(pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous));
15596 }
15597
15598 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
15599 if (part) pm_interpolated_symbol_node_append(symbol, part);
15600
15601 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15602 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
15603 pm_interpolated_symbol_node_append(symbol, part);
15604 }
15605 }
15606
15607 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15608 if (match1(parser, PM_TOKEN_EOF)) {
15609 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15610 } else {
15611 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15612 }
15613
15614 pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
15615 return UP(symbol);
15616 }
15617
15618 pm_token_t content;
15619 pm_string_t unescaped;
15620
15621 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15622 content = parser->current;
15623 unescaped = parser->current_string;
15624 parser_lex(parser);
15625
15626 // If we have two string contents in a row, then the content of this
15627 // symbol is split because of heredoc contents. This looks like:
15628 //
15629 // <<A; :'a
15630 // A
15631 // b'
15632 //
15633 // In this case, the best way we have to represent this is as an
15634 // interpolated string node, so that's what we'll do here.
15635 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15636 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
15637 pm_token_t bounds = not_provided(parser);
15638
15639 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped));
15640 pm_interpolated_symbol_node_append(symbol, part);
15641
15642 part = UP(pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string));
15643 pm_interpolated_symbol_node_append(symbol, part);
15644
15645 if (next_state != PM_LEX_STATE_NONE) {
15646 lex_state_set(parser, next_state);
15647 }
15648
15649 parser_lex(parser);
15650 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
15651
15652 pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
15653 return UP(symbol);
15654 }
15655 } else {
15656 content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
15657 pm_string_shared_init(&unescaped, content.start, content.end);
15658 }
15659
15660 if (next_state != PM_LEX_STATE_NONE) {
15661 lex_state_set(parser, next_state);
15662 }
15663
15664 if (match1(parser, PM_TOKEN_EOF)) {
15665 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
15666 } else {
15667 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
15668 }
15669
15670 return UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false)));
15671}
15672
15677static inline pm_node_t *
15678parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
15679 switch (parser->current.type) {
15680 case PM_CASE_OPERATOR: {
15681 const pm_token_t opening = not_provided(parser);
15682 return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
15683 }
15684 case PM_CASE_KEYWORD:
15685 case PM_TOKEN_CONSTANT:
15686 case PM_TOKEN_IDENTIFIER:
15687 case PM_TOKEN_METHOD_NAME: {
15688 parser_lex(parser);
15689
15690 pm_token_t opening = not_provided(parser);
15691 pm_token_t closing = not_provided(parser);
15692 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
15693
15694 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15695 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15696
15697 return UP(symbol);
15698 }
15699 case PM_TOKEN_SYMBOL_BEGIN: {
15700 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
15701 parser_lex(parser);
15702
15703 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
15704 }
15705 default:
15706 pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
15707 return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
15708 }
15709}
15710
15717static inline pm_node_t *
15718parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
15719 switch (parser->current.type) {
15720 case PM_CASE_OPERATOR: {
15721 const pm_token_t opening = not_provided(parser);
15722 return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
15723 }
15724 case PM_CASE_KEYWORD:
15725 case PM_TOKEN_CONSTANT:
15726 case PM_TOKEN_IDENTIFIER:
15727 case PM_TOKEN_METHOD_NAME: {
15728 if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
15729 parser_lex(parser);
15730
15731 pm_token_t opening = not_provided(parser);
15732 pm_token_t closing = not_provided(parser);
15733 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
15734
15735 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15736 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15737
15738 return UP(symbol);
15739 }
15740 case PM_TOKEN_SYMBOL_BEGIN: {
15741 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
15742 parser_lex(parser);
15743
15744 return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
15745 }
15746 case PM_TOKEN_BACK_REFERENCE:
15747 parser_lex(parser);
15748 return UP(pm_back_reference_read_node_create(parser, &parser->previous));
15749 case PM_TOKEN_NUMBERED_REFERENCE:
15750 parser_lex(parser);
15751 return UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
15752 case PM_TOKEN_GLOBAL_VARIABLE:
15753 parser_lex(parser);
15754 return UP(pm_global_variable_read_node_create(parser, &parser->previous));
15755 default:
15756 pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
15757 return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
15758 }
15759}
15760
15765static pm_node_t *
15766parse_variable(pm_parser_t *parser) {
15767 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
15768 int depth;
15769 bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
15770
15771 if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
15772 return UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false));
15773 }
15774
15775 pm_scope_t *current_scope = parser->current_scope;
15776 if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
15777 if (is_numbered_param) {
15778 // When you use a numbered parameter, it implies the existence of
15779 // all of the locals that exist before it. For example, referencing
15780 // _2 means that _1 must exist. Therefore here we loop through all
15781 // of the possibilities and add them into the constant pool.
15782 uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
15783 for (uint8_t number = 1; number <= maximum; number++) {
15784 pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
15785 }
15786
15787 if (!match1(parser, PM_TOKEN_EQUAL)) {
15788 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
15789 }
15790
15791 pm_node_t *node = UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false));
15792 pm_node_list_append(&current_scope->implicit_parameters, node);
15793
15794 return node;
15795 } else if ((parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
15796 pm_node_t *node = UP(pm_it_local_variable_read_node_create(parser, &parser->previous));
15797 pm_node_list_append(&current_scope->implicit_parameters, node);
15798
15799 return node;
15800 }
15801 }
15802
15803 return NULL;
15804}
15805
15809static pm_node_t *
15810parse_variable_call(pm_parser_t *parser) {
15811 pm_node_flags_t flags = 0;
15812
15813 if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
15814 pm_node_t *node = parse_variable(parser);
15815 if (node != NULL) return node;
15816 flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
15817 }
15818
15819 pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
15820 pm_node_flag_set(UP(node), flags);
15821
15822 return UP(node);
15823}
15824
15830static inline pm_token_t
15831parse_method_definition_name(pm_parser_t *parser) {
15832 switch (parser->current.type) {
15833 case PM_CASE_KEYWORD:
15834 case PM_TOKEN_CONSTANT:
15835 case PM_TOKEN_METHOD_NAME:
15836 parser_lex(parser);
15837 return parser->previous;
15838 case PM_TOKEN_IDENTIFIER:
15839 pm_refute_numbered_parameter(parser, parser->current.start, parser->current.end);
15840 parser_lex(parser);
15841 return parser->previous;
15842 case PM_CASE_OPERATOR:
15843 lex_state_set(parser, PM_LEX_STATE_ENDFN);
15844 parser_lex(parser);
15845 return parser->previous;
15846 default:
15847 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type));
15848 return (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->current.start, .end = parser->current.end };
15849 }
15850}
15851
15852static void
15853parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
15854 // Get a reference to the string struct that is being held by the string
15855 // node. This is the value we're going to actually manipulate.
15856 pm_string_ensure_owned(string);
15857
15858 // Now get the bounds of the existing string. We'll use this as a
15859 // destination to move bytes into. We'll also use it for bounds checking
15860 // since we don't require that these strings be null terminated.
15861 size_t dest_length = pm_string_length(string);
15862 const uint8_t *source_cursor = (uint8_t *) string->source;
15863 const uint8_t *source_end = source_cursor + dest_length;
15864
15865 // We're going to move bytes backward in the string when we get leading
15866 // whitespace, so we'll maintain a pointer to the current position in the
15867 // string that we're writing to.
15868 size_t trimmed_whitespace = 0;
15869
15870 // While we haven't reached the amount of common whitespace that we need to
15871 // trim and we haven't reached the end of the string, we'll keep trimming
15872 // whitespace. Trimming in this context means skipping over these bytes such
15873 // that they aren't copied into the new string.
15874 while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
15875 if (*source_cursor == '\t') {
15876 trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
15877 if (trimmed_whitespace > common_whitespace) break;
15878 } else {
15879 trimmed_whitespace++;
15880 }
15881
15882 source_cursor++;
15883 dest_length--;
15884 }
15885
15886 memmove((uint8_t *) string->source, source_cursor, (size_t) (source_end - source_cursor));
15887 string->length = dest_length;
15888}
15889
15893static void
15894parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
15895 // The next node should be dedented if it's the first node in the list or if
15896 // it follows a string node.
15897 bool dedent_next = true;
15898
15899 // Iterate over all nodes, and trim whitespace accordingly. We're going to
15900 // keep around two indices: a read and a write. If we end up trimming all of
15901 // the whitespace from a node, then we'll drop it from the list entirely.
15902 size_t write_index = 0;
15903
15904 pm_node_t *node;
15905 PM_NODE_LIST_FOREACH(nodes, read_index, node) {
15906 // We're not manipulating child nodes that aren't strings. In this case
15907 // we'll skip past it and indicate that the subsequent node should not
15908 // be dedented.
15909 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) {
15910 nodes->nodes[write_index++] = node;
15911 dedent_next = false;
15912 continue;
15913 }
15914
15915 pm_string_node_t *string_node = ((pm_string_node_t *) node);
15916 if (dedent_next) {
15917 parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace);
15918 }
15919
15920 if (string_node->unescaped.length == 0) {
15921 pm_node_destroy(parser, node);
15922 } else {
15923 nodes->nodes[write_index++] = node;
15924 }
15925
15926 // We always dedent the next node if it follows a string node.
15927 dedent_next = true;
15928 }
15929
15930 nodes->size = write_index;
15931}
15932
15936static pm_token_t
15937parse_strings_empty_content(const uint8_t *location) {
15938 return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
15939}
15940
15944static inline pm_node_t *
15945parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
15946 assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
15947 bool concating = false;
15948
15949 while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
15950 pm_node_t *node = NULL;
15951
15952 // Here we have found a string literal. We'll parse it and add it to
15953 // the list of strings.
15954 const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
15955 assert(lex_mode->mode == PM_LEX_STRING);
15956 bool lex_interpolation = lex_mode->as.string.interpolation;
15957 bool label_allowed = lex_mode->as.string.label_allowed && accepts_label;
15958
15959 pm_token_t opening = parser->current;
15960 parser_lex(parser);
15961
15962 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15963 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
15964 // If we get here, then we have an end immediately after a
15965 // start. In that case we'll create an empty content token and
15966 // return an uninterpolated string.
15967 pm_token_t content = parse_strings_empty_content(parser->previous.start);
15968 pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
15969
15970 pm_string_shared_init(&string->unescaped, content.start, content.end);
15971 node = UP(string);
15972 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
15973 // If we get here, then we have an end of a label immediately
15974 // after a start. In that case we'll create an empty symbol
15975 // node.
15976 pm_token_t content = parse_strings_empty_content(parser->previous.start);
15977 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
15978
15979 pm_string_shared_init(&symbol->unescaped, content.start, content.end);
15980 node = UP(symbol);
15981
15982 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
15983 } else if (!lex_interpolation) {
15984 // If we don't accept interpolation then we expect the string to
15985 // start with a single string content node.
15986 pm_string_t unescaped;
15987 pm_token_t content;
15988
15989 if (match1(parser, PM_TOKEN_EOF)) {
15990 unescaped = PM_STRING_EMPTY;
15991 content = not_provided(parser);
15992 } else {
15993 unescaped = parser->current_string;
15994 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
15995 content = parser->previous;
15996 }
15997
15998 // It is unfortunately possible to have multiple string content
15999 // nodes in a row in the case that there's heredoc content in
16000 // the middle of the string, like this cursed example:
16001 //
16002 // <<-END+'b
16003 // a
16004 // END
16005 // c'+'d'
16006 //
16007 // In that case we need to switch to an interpolated string to
16008 // be able to contain all of the parts.
16009 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16010 pm_node_list_t parts = { 0 };
16011
16012 pm_token_t delimiters = not_provided(parser);
16013 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped));
16014 pm_node_list_append(&parts, part);
16015
16016 do {
16017 part = UP(pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters));
16018 pm_node_list_append(&parts, part);
16019 parser_lex(parser);
16020 } while (match1(parser, PM_TOKEN_STRING_CONTENT));
16021
16022 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16023 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16024
16025 pm_node_list_free(&parts);
16026 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16027 node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
16028 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16029 } else if (match1(parser, PM_TOKEN_EOF)) {
16030 pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16031 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
16032 } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16033 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
16034 } else {
16035 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
16036 parser->previous.start = parser->previous.end;
16037 parser->previous.type = PM_TOKEN_MISSING;
16038 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
16039 }
16040 } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16041 // In this case we've hit string content so we know the string
16042 // at least has something in it. We'll need to check if the
16043 // following token is the end (in which case we can return a
16044 // plain string) or if it's not then it has interpolation.
16045 pm_token_t content = parser->current;
16046 pm_string_t unescaped = parser->current_string;
16047 parser_lex(parser);
16048
16049 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16050 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
16051 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16052
16053 // Kind of odd behavior, but basically if we have an
16054 // unterminated string and it ends in a newline, we back up one
16055 // character so that the error message is on the last line of
16056 // content in the string.
16057 if (!accept1(parser, PM_TOKEN_STRING_END)) {
16058 const uint8_t *location = parser->previous.end;
16059 if (location > parser->start && location[-1] == '\n') location--;
16060 pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
16061
16062 parser->previous.start = parser->previous.end;
16063 parser->previous.type = PM_TOKEN_MISSING;
16064 }
16065 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16066 node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
16067 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16068 } else {
16069 // If we get here, then we have interpolation so we'll need
16070 // to create a string or symbol node with interpolation.
16071 pm_node_list_t parts = { 0 };
16072 pm_token_t string_opening = not_provided(parser);
16073 pm_token_t string_closing = not_provided(parser);
16074
16075 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped));
16076 pm_node_flag_set(part, parse_unescaped_encoding(parser));
16077 pm_node_list_append(&parts, part);
16078
16079 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16080 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16081 pm_node_list_append(&parts, part);
16082 }
16083 }
16084
16085 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16086 node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
16087 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16088 } else if (match1(parser, PM_TOKEN_EOF)) {
16089 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16090 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
16091 } else {
16092 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16093 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16094 }
16095
16096 pm_node_list_free(&parts);
16097 }
16098 } else {
16099 // If we get here, then the first part of the string is not plain
16100 // string content, in which case we need to parse the string as an
16101 // interpolated string.
16102 pm_node_list_t parts = { 0 };
16103 pm_node_t *part;
16104
16105 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16106 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16107 pm_node_list_append(&parts, part);
16108 }
16109 }
16110
16111 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16112 node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
16113 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16114 } else if (match1(parser, PM_TOKEN_EOF)) {
16115 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16116 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
16117 } else {
16118 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16119 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16120 }
16121
16122 pm_node_list_free(&parts);
16123 }
16124
16125 if (current == NULL) {
16126 // If the node we just parsed is a symbol node, then we can't
16127 // concatenate it with anything else, so we can now return that
16128 // node.
16129 if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
16130 return node;
16131 }
16132
16133 // If we don't already have a node, then it's fine and we can just
16134 // set the result to be the node we just parsed.
16135 current = node;
16136 } else {
16137 // Otherwise we need to check the type of the node we just parsed.
16138 // If it cannot be concatenated with the previous node, then we'll
16139 // need to add a syntax error.
16140 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
16141 pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16142 }
16143
16144 // If we haven't already created our container for concatenation,
16145 // we'll do that now.
16146 if (!concating) {
16147 if (!PM_NODE_TYPE_P(current, PM_STRING_NODE) && !PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
16148 pm_parser_err_node(parser, current, PM_ERR_STRING_CONCATENATION);
16149 }
16150
16151 concating = true;
16152 pm_token_t bounds = not_provided(parser);
16153
16154 pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
16155 pm_interpolated_string_node_append(container, current);
16156 current = UP(container);
16157 }
16158
16159 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
16160 }
16161 }
16162
16163 return current;
16164}
16165
16166#define PM_PARSE_PATTERN_SINGLE 0
16167#define PM_PARSE_PATTERN_TOP 1
16168#define PM_PARSE_PATTERN_MULTI 2
16169
16170static pm_node_t *
16171parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
16172
16178static void
16179parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
16180 // Skip this capture if it starts with an underscore.
16181 if (peek_at(parser, location->start) == '_') return;
16182
16183 if (pm_constant_id_list_includes(captures, capture)) {
16184 pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
16185 } else {
16186 pm_constant_id_list_append(captures, capture);
16187 }
16188}
16189
16193static pm_node_t *
16194parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node, uint16_t depth) {
16195 // Now, if there are any :: operators that follow, parse them as constant
16196 // path nodes.
16197 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
16198 pm_token_t delimiter = parser->previous;
16199 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16200 node = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
16201 }
16202
16203 // If there is a [ or ( that follows, then this is part of a larger pattern
16204 // expression. We'll parse the inner pattern here, then modify the returned
16205 // inner pattern with our constant path attached.
16206 if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
16207 return node;
16208 }
16209
16210 pm_token_t opening;
16211 pm_token_t closing;
16212 pm_node_t *inner = NULL;
16213
16214 if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
16215 opening = parser->previous;
16216 accept1(parser, PM_TOKEN_NEWLINE);
16217
16218 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16219 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16220 accept1(parser, PM_TOKEN_NEWLINE);
16221 expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
16222 }
16223
16224 closing = parser->previous;
16225 } else {
16226 parser_lex(parser);
16227 opening = parser->previous;
16228 accept1(parser, PM_TOKEN_NEWLINE);
16229
16230 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
16231 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16232 accept1(parser, PM_TOKEN_NEWLINE);
16233 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
16234 }
16235
16236 closing = parser->previous;
16237 }
16238
16239 if (!inner) {
16240 // If there was no inner pattern, then we have something like Foo() or
16241 // Foo[]. In that case we'll create an array pattern with no requireds.
16242 return UP(pm_array_pattern_node_constant_create(parser, node, &opening, &closing));
16243 }
16244
16245 // Now that we have the inner pattern, check to see if it's an array, find,
16246 // or hash pattern. If it is, then we'll attach our constant path to it if
16247 // it doesn't already have a constant. If it's not one of those node types
16248 // or it does have a constant, then we'll create an array pattern.
16249 switch (PM_NODE_TYPE(inner)) {
16250 case PM_ARRAY_PATTERN_NODE: {
16251 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16252
16253 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16254 pattern_node->base.location.start = node->location.start;
16255 pattern_node->base.location.end = closing.end;
16256
16257 pattern_node->constant = node;
16258 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16259 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16260
16261 return UP(pattern_node);
16262 }
16263
16264 break;
16265 }
16266 case PM_FIND_PATTERN_NODE: {
16267 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16268
16269 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16270 pattern_node->base.location.start = node->location.start;
16271 pattern_node->base.location.end = closing.end;
16272
16273 pattern_node->constant = node;
16274 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16275 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16276
16277 return UP(pattern_node);
16278 }
16279
16280 break;
16281 }
16282 case PM_HASH_PATTERN_NODE: {
16283 pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
16284
16285 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16286 pattern_node->base.location.start = node->location.start;
16287 pattern_node->base.location.end = closing.end;
16288
16289 pattern_node->constant = node;
16290 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16291 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16292
16293 return UP(pattern_node);
16294 }
16295
16296 break;
16297 }
16298 default:
16299 break;
16300 }
16301
16302 // If we got here, then we didn't return one of the inner patterns by
16303 // attaching its constant. In this case we'll create an array pattern and
16304 // attach our constant to it.
16305 pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16306 pm_array_pattern_node_requireds_append(pattern_node, inner);
16307 return UP(pattern_node);
16308}
16309
16313static pm_splat_node_t *
16314parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16315 assert(parser->previous.type == PM_TOKEN_USTAR);
16316 pm_token_t operator = parser->previous;
16317 pm_node_t *name = NULL;
16318
16319 // Rest patterns don't necessarily have a name associated with them. So we
16320 // will check for that here. If they do, then we'll add it to the local
16321 // table since this pattern will cause it to become a local variable.
16322 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16323 pm_token_t identifier = parser->previous;
16324 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &identifier);
16325
16326 int depth;
16327 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16328 pm_parser_local_add(parser, constant_id, identifier.start, identifier.end, 0);
16329 }
16330
16331 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier));
16332 name = UP(pm_local_variable_target_node_create(
16333 parser,
16334 &PM_LOCATION_TOKEN_VALUE(&identifier),
16335 constant_id,
16336 (uint32_t) (depth == -1 ? 0 : depth)
16337 ));
16338 }
16339
16340 // Finally we can return the created node.
16341 return pm_splat_node_create(parser, &operator, name);
16342}
16343
16347static pm_node_t *
16348parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16349 assert(parser->current.type == PM_TOKEN_USTAR_STAR);
16350 parser_lex(parser);
16351
16352 pm_token_t operator = parser->previous;
16353 pm_node_t *value = NULL;
16354
16355 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
16356 return UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
16357 }
16358
16359 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16360 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16361
16362 int depth;
16363 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16364 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16365 }
16366
16367 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
16368 value = UP(pm_local_variable_target_node_create(
16369 parser,
16370 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
16371 constant_id,
16372 (uint32_t) (depth == -1 ? 0 : depth)
16373 ));
16374 }
16375
16376 return UP(pm_assoc_splat_node_create(parser, value, &operator));
16377}
16378
16383static bool
16384pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
16385 ptrdiff_t length = end - start;
16386 if (length == 0) return false;
16387
16388 // First ensure that it starts with a valid identifier starting character.
16389 size_t width = char_is_identifier_start(parser, start, end - start);
16390 if (width == 0) return false;
16391
16392 // Next, ensure that it's not an uppercase character.
16393 if (parser->encoding_changed) {
16394 if (parser->encoding->isupper_char(start, length)) return false;
16395 } else {
16396 if (pm_encoding_utf_8_isupper_char(start, length)) return false;
16397 }
16398
16399 // Next, iterate through all of the bytes of the string to ensure that they
16400 // are all valid identifier characters.
16401 const uint8_t *cursor = start + width;
16402 while ((width = char_is_identifier(parser, cursor, end - cursor))) cursor += width;
16403 return cursor == end;
16404}
16405
16410static pm_node_t *
16411parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
16412 const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
16413
16414 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
16415 int depth = -1;
16416
16417 if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
16418 depth = pm_parser_local_depth_constant_id(parser, constant_id);
16419 } else {
16420 pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
16421
16422 if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
16423 PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
16424 }
16425 }
16426
16427 if (depth == -1) {
16428 pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0);
16429 }
16430
16431 parse_pattern_capture(parser, captures, constant_id, value_loc);
16432 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
16433 parser,
16434 value_loc,
16435 constant_id,
16436 (uint32_t) (depth == -1 ? 0 : depth)
16437 );
16438
16439 return UP(pm_implicit_node_create(parser, UP(target)));
16440}
16441
16446static void
16447parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
16448 if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
16449 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
16450 }
16451}
16452
16457parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, uint16_t depth) {
16458 pm_node_list_t assocs = { 0 };
16459 pm_static_literals_t keys = { 0 };
16460 pm_node_t *rest = NULL;
16461
16462 switch (PM_NODE_TYPE(first_node)) {
16463 case PM_ASSOC_SPLAT_NODE:
16464 case PM_NO_KEYWORDS_PARAMETER_NODE:
16465 rest = first_node;
16466 break;
16467 case PM_SYMBOL_NODE: {
16468 if (pm_symbol_node_label_p(first_node)) {
16469 parse_pattern_hash_key(parser, &keys, first_node);
16470 pm_node_t *value;
16471
16472 if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
16473 // Otherwise, we will create an implicit local variable
16474 // target for the value.
16475 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
16476 } else {
16477 // Here we have a value for the first assoc in the list, so
16478 // we will parse it now.
16479 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
16480 }
16481
16482 pm_token_t operator = not_provided(parser);
16483 pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, &operator, value));
16484
16485 pm_node_list_append(&assocs, assoc);
16486 break;
16487 }
16488 }
16490 default: {
16491 // If we get anything else, then this is an error. For this we'll
16492 // create a missing node for the value and create an assoc node for
16493 // the first node in the list.
16494 pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
16495 pm_parser_err_node(parser, first_node, diag_id);
16496
16497 pm_token_t operator = not_provided(parser);
16498 pm_node_t *value = UP(pm_missing_node_create(parser, first_node->location.start, first_node->location.end));
16499 pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, &operator, value));
16500
16501 pm_node_list_append(&assocs, assoc);
16502 break;
16503 }
16504 }
16505
16506 // If there are any other assocs, then we'll parse them now.
16507 while (accept1(parser, PM_TOKEN_COMMA)) {
16508 // Here we need to break to support trailing commas.
16509 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
16510 // Trailing commas are not allowed to follow a rest pattern.
16511 if (rest != NULL) {
16512 pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16513 }
16514
16515 break;
16516 }
16517
16518 if (match1(parser, PM_TOKEN_USTAR_STAR)) {
16519 pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures);
16520
16521 if (rest == NULL) {
16522 rest = assoc;
16523 } else {
16524 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16525 pm_node_list_append(&assocs, assoc);
16526 }
16527 } else {
16528 pm_node_t *key;
16529
16530 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16531 key = parse_strings(parser, NULL, true, (uint16_t) (depth + 1));
16532
16533 if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
16534 pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
16535 } else if (!pm_symbol_node_label_p(key)) {
16536 pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16537 }
16538 } else {
16539 expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16540 key = UP(pm_symbol_node_label_create(parser, &parser->previous));
16541 }
16542
16543 parse_pattern_hash_key(parser, &keys, key);
16544 pm_node_t *value = NULL;
16545
16546 if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
16547 if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) {
16548 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
16549 } else {
16550 value = UP(pm_missing_node_create(parser, key->location.end, key->location.end));
16551 }
16552 } else {
16553 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
16554 }
16555
16556 pm_token_t operator = not_provided(parser);
16557 pm_node_t *assoc = UP(pm_assoc_node_create(parser, key, &operator, value));
16558
16559 if (rest != NULL) {
16560 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16561 }
16562
16563 pm_node_list_append(&assocs, assoc);
16564 }
16565 }
16566
16567 pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
16568 xfree(assocs.nodes);
16569
16570 pm_static_literals_free(&keys);
16571 return node;
16572}
16573
16577static pm_node_t *
16578parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id, uint16_t depth) {
16579 switch (parser->current.type) {
16580 case PM_TOKEN_IDENTIFIER:
16581 case PM_TOKEN_METHOD_NAME: {
16582 parser_lex(parser);
16583 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16584
16585 int depth;
16586 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16587 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16588 }
16589
16590 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
16591 return UP(pm_local_variable_target_node_create(
16592 parser,
16593 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
16594 constant_id,
16595 (uint32_t) (depth == -1 ? 0 : depth)
16596 ));
16597 }
16598 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
16599 pm_token_t opening = parser->current;
16600 parser_lex(parser);
16601
16602 if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16603 // If we have an empty array pattern, then we'll just return a new
16604 // array pattern node.
16605 return UP(pm_array_pattern_node_empty_create(parser, &opening, &parser->previous));
16606 }
16607
16608 // Otherwise, we'll parse the inner pattern, then deal with it depending
16609 // on the type it returns.
16610 pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16611
16612 accept1(parser, PM_TOKEN_NEWLINE);
16613 expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
16614 pm_token_t closing = parser->previous;
16615
16616 switch (PM_NODE_TYPE(inner)) {
16617 case PM_ARRAY_PATTERN_NODE: {
16618 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16619 if (pattern_node->opening_loc.start == NULL) {
16620 pattern_node->base.location.start = opening.start;
16621 pattern_node->base.location.end = closing.end;
16622
16623 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16624 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16625
16626 return UP(pattern_node);
16627 }
16628
16629 break;
16630 }
16631 case PM_FIND_PATTERN_NODE: {
16632 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16633 if (pattern_node->opening_loc.start == NULL) {
16634 pattern_node->base.location.start = opening.start;
16635 pattern_node->base.location.end = closing.end;
16636
16637 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16638 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16639
16640 return UP(pattern_node);
16641 }
16642
16643 break;
16644 }
16645 default:
16646 break;
16647 }
16648
16649 pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
16650 pm_array_pattern_node_requireds_append(node, inner);
16651 return UP(node);
16652 }
16653 case PM_TOKEN_BRACE_LEFT: {
16654 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
16655 parser->pattern_matching_newlines = false;
16656
16658 pm_token_t opening = parser->current;
16659 parser_lex(parser);
16660
16661 if (accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
16662 // If we have an empty hash pattern, then we'll just return a new hash
16663 // pattern node.
16664 node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
16665 } else {
16666 pm_node_t *first_node;
16667
16668 switch (parser->current.type) {
16669 case PM_TOKEN_LABEL:
16670 parser_lex(parser);
16671 first_node = UP(pm_symbol_node_label_create(parser, &parser->previous));
16672 break;
16673 case PM_TOKEN_USTAR_STAR:
16674 first_node = parse_pattern_keyword_rest(parser, captures);
16675 break;
16676 case PM_TOKEN_STRING_BEGIN:
16677 first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
16678 break;
16679 default: {
16680 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
16681 parser_lex(parser);
16682
16683 first_node = UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
16684 break;
16685 }
16686 }
16687
16688 node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
16689
16690 accept1(parser, PM_TOKEN_NEWLINE);
16691 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE, &opening);
16692 pm_token_t closing = parser->previous;
16693
16694 node->base.location.start = opening.start;
16695 node->base.location.end = closing.end;
16696
16697 node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16698 node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16699 }
16700
16701 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
16702 return UP(node);
16703 }
16704 case PM_TOKEN_UDOT_DOT:
16705 case PM_TOKEN_UDOT_DOT_DOT: {
16706 pm_token_t operator = parser->current;
16707 parser_lex(parser);
16708
16709 // Since we have a unary range operator, we need to parse the subsequent
16710 // expression as the right side of the range.
16711 switch (parser->current.type) {
16712 case PM_CASE_PRIMITIVE: {
16713 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
16714 return UP(pm_range_node_create(parser, NULL, &operator, right));
16715 }
16716 default: {
16717 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
16718 pm_node_t *right = UP(pm_missing_node_create(parser, operator.start, operator.end));
16719 return UP(pm_range_node_create(parser, NULL, &operator, right));
16720 }
16721 }
16722 }
16723 case PM_CASE_PRIMITIVE: {
16724 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, diag_id, (uint16_t) (depth + 1));
16725
16726 // If we found a label, we need to immediately return to the caller.
16727 if (pm_symbol_node_label_p(node)) return node;
16728
16729 // Call nodes (arithmetic operations) are not allowed in patterns
16730 if (PM_NODE_TYPE(node) == PM_CALL_NODE) {
16731 pm_parser_err_node(parser, node, diag_id);
16732 pm_missing_node_t *missing_node = pm_missing_node_create(parser, node->location.start, node->location.end);
16733
16734 pm_node_unreference(parser, node);
16735 pm_node_destroy(parser, node);
16736 return UP(missing_node);
16737 }
16738
16739 // Now that we have a primitive, we need to check if it's part of a range.
16740 if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
16741 pm_token_t operator = parser->previous;
16742
16743 // Now that we have the operator, we need to check if this is followed
16744 // by another expression. If it is, then we will create a full range
16745 // node. Otherwise, we'll create an endless range.
16746 switch (parser->current.type) {
16747 case PM_CASE_PRIMITIVE: {
16748 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
16749 return UP(pm_range_node_create(parser, node, &operator, right));
16750 }
16751 default:
16752 return UP(pm_range_node_create(parser, node, &operator, NULL));
16753 }
16754 }
16755
16756 return node;
16757 }
16758 case PM_TOKEN_CARET: {
16759 parser_lex(parser);
16760 pm_token_t operator = parser->previous;
16761
16762 // At this point we have a pin operator. We need to check the subsequent
16763 // expression to determine if it's a variable or an expression.
16764 switch (parser->current.type) {
16765 case PM_TOKEN_IDENTIFIER: {
16766 parser_lex(parser);
16767 pm_node_t *variable = UP(parse_variable(parser));
16768
16769 if (variable == NULL) {
16770 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
16771 variable = UP(pm_local_variable_read_node_missing_create(parser, &parser->previous, 0));
16772 }
16773
16774 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16775 }
16776 case PM_TOKEN_INSTANCE_VARIABLE: {
16777 parser_lex(parser);
16778 pm_node_t *variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
16779
16780 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16781 }
16782 case PM_TOKEN_CLASS_VARIABLE: {
16783 parser_lex(parser);
16784 pm_node_t *variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
16785
16786 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16787 }
16788 case PM_TOKEN_GLOBAL_VARIABLE: {
16789 parser_lex(parser);
16790 pm_node_t *variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
16791
16792 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16793 }
16794 case PM_TOKEN_NUMBERED_REFERENCE: {
16795 parser_lex(parser);
16796 pm_node_t *variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
16797
16798 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16799 }
16800 case PM_TOKEN_BACK_REFERENCE: {
16801 parser_lex(parser);
16802 pm_node_t *variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
16803
16804 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16805 }
16806 case PM_TOKEN_PARENTHESIS_LEFT: {
16807 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
16808 parser->pattern_matching_newlines = false;
16809
16810 pm_token_t lparen = parser->current;
16811 parser_lex(parser);
16812
16813 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
16814 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
16815
16816 accept1(parser, PM_TOKEN_NEWLINE);
16817 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &lparen);
16818 return UP(pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous));
16819 }
16820 default: {
16821 // If we get here, then we have a pin operator followed by something
16822 // not understood. We'll create a missing node and return that.
16823 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
16824 pm_node_t *variable = UP(pm_missing_node_create(parser, operator.start, operator.end));
16825 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16826 }
16827 }
16828 }
16829 case PM_TOKEN_UCOLON_COLON: {
16830 pm_token_t delimiter = parser->current;
16831 parser_lex(parser);
16832
16833 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16834 pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
16835
16836 return parse_pattern_constant_path(parser, captures, UP(node), (uint16_t) (depth + 1));
16837 }
16838 case PM_TOKEN_CONSTANT: {
16839 pm_token_t constant = parser->current;
16840 parser_lex(parser);
16841
16842 pm_node_t *node = UP(pm_constant_read_node_create(parser, &constant));
16843 return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
16844 }
16845 default:
16846 pm_parser_err_current(parser, diag_id);
16847 return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
16848 }
16849}
16850
16851static bool
16852parse_pattern_alternation_error_each(const pm_node_t *node, void *data) {
16853 switch (PM_NODE_TYPE(node)) {
16854 case PM_LOCAL_VARIABLE_TARGET_NODE:
16855 pm_parser_err((pm_parser_t *) data, node->location.start, node->location.end, PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE);
16856 return false;
16857 default:
16858 return true;
16859 }
16860}
16861
16866static void
16867parse_pattern_alternation_error(pm_parser_t *parser, const pm_node_t *node) {
16868 pm_visit_node(node, parse_pattern_alternation_error_each, parser);
16869}
16870
16875static pm_node_t *
16876parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
16877 pm_node_t *node = first_node;
16878 bool alternation = false;
16879
16880 while ((node == NULL) || (alternation = accept1(parser, PM_TOKEN_PIPE))) {
16881 if (alternation && !PM_NODE_TYPE_P(node, PM_ALTERNATION_PATTERN_NODE) && captures->size) {
16882 parse_pattern_alternation_error(parser, node);
16883 }
16884
16885 switch (parser->current.type) {
16886 case PM_TOKEN_IDENTIFIER:
16887 case PM_TOKEN_BRACKET_LEFT_ARRAY:
16888 case PM_TOKEN_BRACE_LEFT:
16889 case PM_TOKEN_CARET:
16890 case PM_TOKEN_CONSTANT:
16891 case PM_TOKEN_UCOLON_COLON:
16892 case PM_TOKEN_UDOT_DOT:
16893 case PM_TOKEN_UDOT_DOT_DOT:
16894 case PM_CASE_PRIMITIVE: {
16895 if (!alternation) {
16896 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
16897 } else {
16898 pm_token_t operator = parser->previous;
16899 pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
16900
16901 if (captures->size) parse_pattern_alternation_error(parser, right);
16902 node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
16903 }
16904
16905 break;
16906 }
16907 case PM_TOKEN_PARENTHESIS_LEFT:
16908 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
16909 pm_token_t operator = parser->previous;
16910 pm_token_t opening = parser->current;
16911 parser_lex(parser);
16912
16913 pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16914 accept1(parser, PM_TOKEN_NEWLINE);
16915 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
16916 pm_node_t *right = UP(pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0));
16917
16918 if (!alternation) {
16919 node = right;
16920 } else {
16921 if (captures->size) parse_pattern_alternation_error(parser, right);
16922 node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
16923 }
16924
16925 break;
16926 }
16927 default: {
16928 pm_parser_err_current(parser, diag_id);
16929 pm_node_t *right = UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
16930
16931 if (!alternation) {
16932 node = right;
16933 } else {
16934 if (captures->size) parse_pattern_alternation_error(parser, right);
16935 node = UP(pm_alternation_pattern_node_create(parser, node, right, &parser->previous));
16936 }
16937
16938 break;
16939 }
16940 }
16941 }
16942
16943 // If we have an =>, then we are assigning this pattern to a variable.
16944 // In this case we should create an assignment node.
16945 while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
16946 pm_token_t operator = parser->previous;
16947 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
16948
16949 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16950 int depth;
16951
16952 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16953 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16954 }
16955
16956 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
16957 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
16958 parser,
16959 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
16960 constant_id,
16961 (uint32_t) (depth == -1 ? 0 : depth)
16962 );
16963
16964 node = UP(pm_capture_pattern_node_create(parser, node, target, &operator));
16965 }
16966
16967 return node;
16968}
16969
16973static pm_node_t *
16974parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
16975 pm_node_t *node = NULL;
16976
16977 bool leading_rest = false;
16978 bool trailing_rest = false;
16979
16980 switch (parser->current.type) {
16981 case PM_TOKEN_LABEL: {
16982 parser_lex(parser);
16983 pm_node_t *key = UP(pm_symbol_node_label_create(parser, &parser->previous));
16984 node = UP(parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1)));
16985
16986 if (!(flags & PM_PARSE_PATTERN_TOP)) {
16987 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
16988 }
16989
16990 return node;
16991 }
16992 case PM_TOKEN_USTAR_STAR: {
16993 node = parse_pattern_keyword_rest(parser, captures);
16994 node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
16995
16996 if (!(flags & PM_PARSE_PATTERN_TOP)) {
16997 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
16998 }
16999
17000 return node;
17001 }
17002 case PM_TOKEN_STRING_BEGIN: {
17003 // We need special handling for string beginnings because they could
17004 // be dynamic symbols leading to hash patterns.
17005 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17006
17007 if (pm_symbol_node_label_p(node)) {
17008 node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
17009
17010 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17011 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17012 }
17013
17014 return node;
17015 }
17016
17017 node = parse_pattern_primitives(parser, captures, node, diag_id, (uint16_t) (depth + 1));
17018 break;
17019 }
17020 case PM_TOKEN_USTAR: {
17021 if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
17022 parser_lex(parser);
17023 node = UP(parse_pattern_rest(parser, captures));
17024 leading_rest = true;
17025 break;
17026 }
17027 }
17029 default:
17030 node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
17031 break;
17032 }
17033
17034 // If we got a dynamic label symbol, then we need to treat it like the
17035 // beginning of a hash pattern.
17036 if (pm_symbol_node_label_p(node)) {
17037 return UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
17038 }
17039
17040 if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
17041 // If we have a comma, then we are now parsing either an array pattern
17042 // or a find pattern. We need to parse all of the patterns, put them
17043 // into a big list, and then determine which type of node we have.
17044 pm_node_list_t nodes = { 0 };
17045 pm_node_list_append(&nodes, node);
17046
17047 // Gather up all of the patterns into the list.
17048 while (accept1(parser, PM_TOKEN_COMMA)) {
17049 // Break early here in case we have a trailing comma.
17050 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
17051 node = UP(pm_implicit_rest_node_create(parser, &parser->previous));
17052 pm_node_list_append(&nodes, node);
17053 trailing_rest = true;
17054 break;
17055 }
17056
17057 if (accept1(parser, PM_TOKEN_USTAR)) {
17058 node = UP(parse_pattern_rest(parser, captures));
17059
17060 // If we have already parsed a splat pattern, then this is an
17061 // error. We will continue to parse the rest of the patterns,
17062 // but we will indicate it as an error.
17063 if (trailing_rest) {
17064 pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
17065 }
17066
17067 trailing_rest = true;
17068 } else {
17069 node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
17070 }
17071
17072 pm_node_list_append(&nodes, node);
17073 }
17074
17075 // If the first pattern and the last pattern are rest patterns, then we
17076 // will call this a find pattern, regardless of how many rest patterns
17077 // are in between because we know we already added the appropriate
17078 // errors. Otherwise we will create an array pattern.
17079 if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
17080 node = UP(pm_find_pattern_node_create(parser, &nodes));
17081
17082 if (nodes.size == 2) {
17083 pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
17084 }
17085 } else {
17086 node = UP(pm_array_pattern_node_node_list_create(parser, &nodes));
17087
17088 if (leading_rest && trailing_rest) {
17089 pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
17090 }
17091 }
17092
17093 xfree(nodes.nodes);
17094 } else if (leading_rest) {
17095 // Otherwise, if we parsed a single splat pattern, then we know we have
17096 // an array pattern, so we can go ahead and create that node.
17097 node = UP(pm_array_pattern_node_rest_create(parser, node));
17098 }
17099
17100 return node;
17101}
17102
17108static inline void
17109parse_negative_numeric(pm_node_t *node) {
17110 switch (PM_NODE_TYPE(node)) {
17111 case PM_INTEGER_NODE: {
17112 pm_integer_node_t *cast = (pm_integer_node_t *) node;
17113 cast->base.location.start--;
17114 cast->value.negative = true;
17115 break;
17116 }
17117 case PM_FLOAT_NODE: {
17118 pm_float_node_t *cast = (pm_float_node_t *) node;
17119 cast->base.location.start--;
17120 cast->value = -cast->value;
17121 break;
17122 }
17123 case PM_RATIONAL_NODE: {
17124 pm_rational_node_t *cast = (pm_rational_node_t *) node;
17125 cast->base.location.start--;
17126 cast->numerator.negative = true;
17127 break;
17128 }
17129 case PM_IMAGINARY_NODE:
17130 node->location.start--;
17131 parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
17132 break;
17133 default:
17134 assert(false && "unreachable");
17135 break;
17136 }
17137}
17138
17144static void
17145pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
17146 switch (diag_id) {
17147 case PM_ERR_HASH_KEY: {
17148 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
17149 break;
17150 }
17151 case PM_ERR_HASH_VALUE:
17152 case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17153 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17154 break;
17155 }
17156 case PM_ERR_UNARY_RECEIVER: {
17157 const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
17158 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
17159 break;
17160 }
17161 case PM_ERR_UNARY_DISALLOWED:
17162 case PM_ERR_EXPECT_ARGUMENT: {
17163 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17164 break;
17165 }
17166 default:
17167 pm_parser_err_previous(parser, diag_id);
17168 break;
17169 }
17170}
17171
17175static void
17176parse_retry(pm_parser_t *parser, const pm_node_t *node) {
17177#define CONTEXT_NONE 0
17178#define CONTEXT_THROUGH_ENSURE 1
17179#define CONTEXT_THROUGH_ELSE 2
17180
17181 pm_context_node_t *context_node = parser->current_context;
17182 int context = CONTEXT_NONE;
17183
17184 while (context_node != NULL) {
17185 switch (context_node->context) {
17193 case PM_CONTEXT_DEFINED:
17195 // These are the good cases. We're allowed to have a retry here.
17196 return;
17197 case PM_CONTEXT_CLASS:
17198 case PM_CONTEXT_DEF:
17200 case PM_CONTEXT_MAIN:
17201 case PM_CONTEXT_MODULE:
17202 case PM_CONTEXT_PREEXE:
17203 case PM_CONTEXT_SCLASS:
17204 // These are the bad cases. We're not allowed to have a retry in
17205 // these contexts.
17206 if (context == CONTEXT_NONE) {
17207 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE);
17208 } else if (context == CONTEXT_THROUGH_ENSURE) {
17209 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ENSURE);
17210 } else if (context == CONTEXT_THROUGH_ELSE) {
17211 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ELSE);
17212 }
17213 return;
17221 // These are also bad cases, but with a more specific error
17222 // message indicating the else.
17223 context = CONTEXT_THROUGH_ELSE;
17224 break;
17232 // These are also bad cases, but with a more specific error
17233 // message indicating the ensure.
17234 context = CONTEXT_THROUGH_ENSURE;
17235 break;
17236 case PM_CONTEXT_NONE:
17237 // This case should never happen.
17238 assert(false && "unreachable");
17239 break;
17240 case PM_CONTEXT_BEGIN:
17244 case PM_CONTEXT_CASE_IN:
17247 case PM_CONTEXT_ELSE:
17248 case PM_CONTEXT_ELSIF:
17249 case PM_CONTEXT_EMBEXPR:
17251 case PM_CONTEXT_FOR:
17252 case PM_CONTEXT_IF:
17257 case PM_CONTEXT_PARENS:
17258 case PM_CONTEXT_POSTEXE:
17260 case PM_CONTEXT_TERNARY:
17261 case PM_CONTEXT_UNLESS:
17262 case PM_CONTEXT_UNTIL:
17263 case PM_CONTEXT_WHILE:
17264 // In these contexts we should continue walking up the list of
17265 // contexts.
17266 break;
17267 }
17268
17269 context_node = context_node->prev;
17270 }
17271
17272#undef CONTEXT_NONE
17273#undef CONTEXT_ENSURE
17274#undef CONTEXT_ELSE
17275}
17276
17280static void
17281parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17282 pm_context_node_t *context_node = parser->current_context;
17283
17284 while (context_node != NULL) {
17285 switch (context_node->context) {
17286 case PM_CONTEXT_DEF:
17288 case PM_CONTEXT_DEFINED:
17292 // These are the good cases. We're allowed to have a block exit
17293 // in these contexts.
17294 return;
17295 case PM_CONTEXT_CLASS:
17299 case PM_CONTEXT_MAIN:
17300 case PM_CONTEXT_MODULE:
17304 case PM_CONTEXT_SCLASS:
17308 // These are the bad cases. We're not allowed to have a retry in
17309 // these contexts.
17310 pm_parser_err_node(parser, node, PM_ERR_INVALID_YIELD);
17311 return;
17312 case PM_CONTEXT_NONE:
17313 // This case should never happen.
17314 assert(false && "unreachable");
17315 break;
17316 case PM_CONTEXT_BEGIN:
17326 case PM_CONTEXT_CASE_IN:
17329 case PM_CONTEXT_ELSE:
17330 case PM_CONTEXT_ELSIF:
17331 case PM_CONTEXT_EMBEXPR:
17333 case PM_CONTEXT_FOR:
17334 case PM_CONTEXT_IF:
17342 case PM_CONTEXT_PARENS:
17343 case PM_CONTEXT_POSTEXE:
17345 case PM_CONTEXT_PREEXE:
17347 case PM_CONTEXT_TERNARY:
17348 case PM_CONTEXT_UNLESS:
17349 case PM_CONTEXT_UNTIL:
17350 case PM_CONTEXT_WHILE:
17351 // In these contexts we should continue walking up the list of
17352 // contexts.
17353 break;
17354 }
17355
17356 context_node = context_node->prev;
17357 }
17358}
17359
17364typedef struct {
17367
17369 const uint8_t *start;
17370
17372 const uint8_t *end;
17373
17382
17387static void
17388parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
17390 pm_location_t location;
17391
17392 if (callback_data->shared) {
17393 location = (pm_location_t) { .start = start, .end = end };
17394 } else {
17395 location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
17396 }
17397
17398 PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
17399}
17400
17404static void
17405parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
17406 const pm_string_t *unescaped = &node->unescaped;
17408 .parser = parser,
17409 .start = node->base.location.start,
17410 .end = node->base.location.end,
17411 .shared = unescaped->type == PM_STRING_SHARED
17412 };
17413
17414 pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED), NULL, NULL, parse_regular_expression_error, &error_data);
17415}
17416
17420static inline pm_node_t *
17421parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
17422 switch (parser->current.type) {
17423 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
17424 parser_lex(parser);
17425
17426 pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
17427 pm_accepts_block_stack_push(parser, true);
17428 bool parsed_bare_hash = false;
17429
17430 while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
17431 bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
17432
17433 // Handle the case where we don't have a comma and we have a
17434 // newline followed by a right bracket.
17435 if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17436 break;
17437 }
17438
17439 // Ensure that we have a comma between elements in the array.
17440 if (array->elements.size > 0) {
17441 if (accept1(parser, PM_TOKEN_COMMA)) {
17442 // If there was a comma but we also accepts a newline,
17443 // then this is a syntax error.
17444 if (accepted_newline) {
17445 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
17446 }
17447 } else {
17448 // If there was no comma, then we need to add a syntax
17449 // error.
17450 const uint8_t *location = parser->previous.end;
17451 PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
17452
17453 parser->previous.start = location;
17454 parser->previous.type = PM_TOKEN_MISSING;
17455 }
17456 }
17457
17458 // If we have a right bracket immediately following a comma,
17459 // this is allowed since it's a trailing comma. In this case we
17460 // can break out of the loop.
17461 if (match1(parser, PM_TOKEN_BRACKET_RIGHT)) break;
17462
17463 pm_node_t *element;
17464
17465 if (accept1(parser, PM_TOKEN_USTAR)) {
17466 pm_token_t operator = parser->previous;
17467 pm_node_t *expression = NULL;
17468
17469 if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
17470 pm_parser_scope_forwarding_positionals_check(parser, &operator);
17471 } else {
17472 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
17473 }
17474
17475 element = UP(pm_splat_node_create(parser, &operator, expression));
17476 } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
17477 if (parsed_bare_hash) {
17478 pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
17479 }
17480
17481 element = UP(pm_keyword_hash_node_create(parser));
17482 pm_static_literals_t hash_keys = { 0 };
17483
17484 if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
17485 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
17486 }
17487
17488 pm_static_literals_free(&hash_keys);
17489 parsed_bare_hash = true;
17490 } else {
17491 element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
17492
17493 if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
17494 if (parsed_bare_hash) {
17495 pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
17496 }
17497
17498 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
17499 pm_static_literals_t hash_keys = { 0 };
17500 pm_hash_key_static_literals_add(parser, &hash_keys, element);
17501
17502 pm_token_t operator;
17503 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
17504 operator = parser->previous;
17505 } else {
17506 operator = not_provided(parser);
17507 }
17508
17509 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
17510 pm_node_t *assoc = UP(pm_assoc_node_create(parser, element, &operator, value));
17511 pm_keyword_hash_node_elements_append(hash, assoc);
17512
17513 element = UP(hash);
17514 if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17515 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
17516 }
17517
17518 pm_static_literals_free(&hash_keys);
17519 parsed_bare_hash = true;
17520 }
17521 }
17522
17523 pm_array_node_elements_append(array, element);
17524 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
17525 }
17526
17527 accept1(parser, PM_TOKEN_NEWLINE);
17528
17529 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17530 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
17531 parser->previous.start = parser->previous.end;
17532 parser->previous.type = PM_TOKEN_MISSING;
17533 }
17534
17535 pm_array_node_close_set(array, &parser->previous);
17536 pm_accepts_block_stack_pop(parser);
17537
17538 return UP(array);
17539 }
17540 case PM_TOKEN_PARENTHESIS_LEFT:
17541 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
17542 pm_token_t opening = parser->current;
17543 pm_node_flags_t flags = 0;
17544
17545 pm_node_list_t current_block_exits = { 0 };
17546 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
17547
17548 parser_lex(parser);
17549 while (true) {
17550 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
17551 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17552 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
17553 break;
17554 }
17555 }
17556
17557 // If this is the end of the file or we match a right parenthesis, then
17558 // we have an empty parentheses node, and we can immediately return.
17559 if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
17560 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
17561
17562 pop_block_exits(parser, previous_block_exits);
17563 pm_node_list_free(&current_block_exits);
17564
17565 return UP(pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, flags));
17566 }
17567
17568 // Otherwise, we're going to parse the first statement in the list
17569 // of statements within the parentheses.
17570 pm_accepts_block_stack_push(parser, true);
17571 context_push(parser, PM_CONTEXT_PARENS);
17572 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
17573 context_pop(parser);
17574
17575 // Determine if this statement is followed by a terminator. In the
17576 // case of a single statement, this is fine. But in the case of
17577 // multiple statements it's required.
17578 bool terminator_found = false;
17579
17580 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
17581 terminator_found = true;
17582 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17583 } else if (accept1(parser, PM_TOKEN_NEWLINE)) {
17584 terminator_found = true;
17585 }
17586
17587 if (terminator_found) {
17588 while (true) {
17589 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
17590 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17591 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
17592 break;
17593 }
17594 }
17595 }
17596
17597 // If we hit a right parenthesis, then we're done parsing the
17598 // parentheses node, and we can check which kind of node we should
17599 // return.
17600 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
17601 if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
17602 lex_state_set(parser, PM_LEX_STATE_ENDARG);
17603 }
17604
17605 parser_lex(parser);
17606 pm_accepts_block_stack_pop(parser);
17607
17608 pop_block_exits(parser, previous_block_exits);
17609 pm_node_list_free(&current_block_exits);
17610
17611 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
17612 // If we have a single statement and are ending on a right
17613 // parenthesis, then we need to check if this is possibly a
17614 // multiple target node.
17615 pm_multi_target_node_t *multi_target;
17616
17617 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) {
17618 multi_target = (pm_multi_target_node_t *) statement;
17619 } else {
17620 multi_target = pm_multi_target_node_create(parser);
17621 pm_multi_target_node_targets_append(parser, multi_target, statement);
17622 }
17623
17624 pm_location_t lparen_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17625 pm_location_t rparen_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
17626
17627 multi_target->lparen_loc = lparen_loc;
17628 multi_target->rparen_loc = rparen_loc;
17629 multi_target->base.location.start = lparen_loc.start;
17630 multi_target->base.location.end = rparen_loc.end;
17631
17632 pm_node_t *result;
17633 if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
17634 result = parse_targets(parser, UP(multi_target), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17635 accept1(parser, PM_TOKEN_NEWLINE);
17636 } else {
17637 result = UP(multi_target);
17638 }
17639
17640 if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
17641 // All set, this is explicitly allowed by the parent
17642 // context.
17643 } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
17644 // All set, we're inside a for loop and we're parsing
17645 // multiple targets.
17646 } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
17647 // Multi targets are not allowed when it's not a
17648 // statement level.
17649 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
17650 } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
17651 // Multi targets must be followed by an equal sign in
17652 // order to be valid (or a right parenthesis if they are
17653 // nested).
17654 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
17655 }
17656
17657 return result;
17658 }
17659
17660 // If we have a single statement and are ending on a right parenthesis
17661 // and we didn't return a multiple assignment node, then we can return a
17662 // regular parentheses node now.
17663 pm_statements_node_t *statements = pm_statements_node_create(parser);
17664 pm_statements_node_body_append(parser, statements, statement, true);
17665
17666 return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, flags));
17667 }
17668
17669 // If we have more than one statement in the set of parentheses,
17670 // then we are going to parse all of them as a list of statements.
17671 // We'll do that here.
17672 context_push(parser, PM_CONTEXT_PARENS);
17673 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17674
17675 pm_statements_node_t *statements = pm_statements_node_create(parser);
17676 pm_statements_node_body_append(parser, statements, statement, true);
17677
17678 // If we didn't find a terminator and we didn't find a right
17679 // parenthesis, then this is a syntax error.
17680 if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
17681 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
17682 }
17683
17684 // Parse each statement within the parentheses.
17685 while (true) {
17686 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
17687 pm_statements_node_body_append(parser, statements, node, true);
17688
17689 // If we're recovering from a syntax error, then we need to stop
17690 // parsing the statements now.
17691 if (parser->recovering) {
17692 // If this is the level of context where the recovery has
17693 // happened, then we can mark the parser as done recovering.
17694 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
17695 break;
17696 }
17697
17698 // If we couldn't parse an expression at all, then we need to
17699 // bail out of the loop.
17700 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) break;
17701
17702 // If we successfully parsed a statement, then we are going to
17703 // need terminator to delimit them.
17704 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
17705 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
17706 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
17707 } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
17708 break;
17709 } else if (!match1(parser, PM_TOKEN_EOF)) {
17710 // If we're at the end of the file, then we're going to add
17711 // an error after this for the ) anyway.
17712 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
17713 }
17714 }
17715
17716 context_pop(parser);
17717 pm_accepts_block_stack_pop(parser);
17718 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
17719
17720 // When we're parsing multi targets, we allow them to be followed by
17721 // a right parenthesis if they are at the statement level. This is
17722 // only possible if they are the final statement in a parentheses.
17723 // We need to explicitly reject that here.
17724 {
17725 pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
17726
17727 if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
17728 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
17729 pm_multi_target_node_targets_append(parser, multi_target, statement);
17730
17731 statement = UP(multi_target);
17732 statements->body.nodes[statements->body.size - 1] = statement;
17733 }
17734
17735 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
17736 const uint8_t *offset = statement->location.end;
17737 pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
17738 pm_node_t *value = UP(pm_missing_node_create(parser, offset, offset));
17739
17740 statement = UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value));
17741 statements->body.nodes[statements->body.size - 1] = statement;
17742
17743 pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
17744 }
17745 }
17746
17747 pop_block_exits(parser, previous_block_exits);
17748 pm_node_list_free(&current_block_exits);
17749
17750 pm_void_statements_check(parser, statements, true);
17751 return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, flags));
17752 }
17753 case PM_TOKEN_BRACE_LEFT: {
17754 // If we were passed a current_hash_keys via the parser, then that
17755 // means we're already parsing a hash and we want to share the set
17756 // of hash keys with this inner hash we're about to parse for the
17757 // sake of warnings. We'll set it to NULL after we grab it to make
17758 // sure subsequent expressions don't use it. Effectively this is a
17759 // way of getting around passing it to every call to
17760 // parse_expression.
17761 pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
17762 parser->current_hash_keys = NULL;
17763
17764 pm_accepts_block_stack_push(parser, true);
17765 parser_lex(parser);
17766
17767 pm_token_t opening = parser->previous;
17768 pm_hash_node_t *node = pm_hash_node_create(parser, &opening);
17769
17770 if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
17771 if (current_hash_keys != NULL) {
17772 parse_assocs(parser, current_hash_keys, UP(node), (uint16_t) (depth + 1));
17773 } else {
17774 pm_static_literals_t hash_keys = { 0 };
17775 parse_assocs(parser, &hash_keys, UP(node), (uint16_t) (depth + 1));
17776 pm_static_literals_free(&hash_keys);
17777 }
17778
17779 accept1(parser, PM_TOKEN_NEWLINE);
17780 }
17781
17782 pm_accepts_block_stack_pop(parser);
17783 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM, &opening);
17784 pm_hash_node_closing_loc_set(node, &parser->previous);
17785
17786 return UP(node);
17787 }
17788 case PM_TOKEN_CHARACTER_LITERAL: {
17789 pm_token_t closing = not_provided(parser);
17790 pm_node_t *node = UP(pm_string_node_create_current_string(
17791 parser,
17792 &(pm_token_t) {
17793 .type = PM_TOKEN_STRING_BEGIN,
17794 .start = parser->current.start,
17795 .end = parser->current.start + 1
17796 },
17797 &(pm_token_t) {
17798 .type = PM_TOKEN_STRING_CONTENT,
17799 .start = parser->current.start + 1,
17800 .end = parser->current.end
17801 },
17802 &closing
17803 ));
17804
17805 pm_node_flag_set(node, parse_unescaped_encoding(parser));
17806
17807 // Skip past the character literal here, since now we have handled
17808 // parser->explicit_encoding correctly.
17809 parser_lex(parser);
17810
17811 // Characters can be followed by strings in which case they are
17812 // automatically concatenated.
17813 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
17814 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
17815 }
17816
17817 return node;
17818 }
17819 case PM_TOKEN_CLASS_VARIABLE: {
17820 parser_lex(parser);
17821 pm_node_t *node = UP(pm_class_variable_read_node_create(parser, &parser->previous));
17822
17823 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17824 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17825 }
17826
17827 return node;
17828 }
17829 case PM_TOKEN_CONSTANT: {
17830 parser_lex(parser);
17831 pm_token_t constant = parser->previous;
17832
17833 // If a constant is immediately followed by parentheses, then this is in
17834 // fact a method call, not a constant read.
17835 if (
17836 match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
17837 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
17838 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
17839 match1(parser, PM_TOKEN_BRACE_LEFT)
17840 ) {
17841 pm_arguments_t arguments = { 0 };
17842 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
17843 return UP(pm_call_node_fcall_create(parser, &constant, &arguments));
17844 }
17845
17846 pm_node_t *node = UP(pm_constant_read_node_create(parser, &parser->previous));
17847
17848 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17849 // If we get here, then we have a comma immediately following a
17850 // constant, so we're going to parse this as a multiple assignment.
17851 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17852 }
17853
17854 return node;
17855 }
17856 case PM_TOKEN_UCOLON_COLON: {
17857 parser_lex(parser);
17858 pm_token_t delimiter = parser->previous;
17859
17860 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17861 pm_node_t *node = UP(pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous));
17862
17863 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17864 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17865 }
17866
17867 return node;
17868 }
17869 case PM_TOKEN_UDOT_DOT:
17870 case PM_TOKEN_UDOT_DOT_DOT: {
17871 pm_token_t operator = parser->current;
17872 parser_lex(parser);
17873
17874 pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
17875
17876 // Unary .. and ... are special because these are non-associative
17877 // operators that can also be unary operators. In this case we need
17878 // to explicitly reject code that has a .. or ... that follows this
17879 // expression.
17880 if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
17881 pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
17882 }
17883
17884 return UP(pm_range_node_create(parser, NULL, &operator, right));
17885 }
17886 case PM_TOKEN_FLOAT:
17887 parser_lex(parser);
17888 return UP(pm_float_node_create(parser, &parser->previous));
17889 case PM_TOKEN_FLOAT_IMAGINARY:
17890 parser_lex(parser);
17891 return UP(pm_float_node_imaginary_create(parser, &parser->previous));
17892 case PM_TOKEN_FLOAT_RATIONAL:
17893 parser_lex(parser);
17894 return UP(pm_float_node_rational_create(parser, &parser->previous));
17895 case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
17896 parser_lex(parser);
17897 return UP(pm_float_node_rational_imaginary_create(parser, &parser->previous));
17898 case PM_TOKEN_NUMBERED_REFERENCE: {
17899 parser_lex(parser);
17900 pm_node_t *node = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
17901
17902 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17903 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17904 }
17905
17906 return node;
17907 }
17908 case PM_TOKEN_GLOBAL_VARIABLE: {
17909 parser_lex(parser);
17910 pm_node_t *node = UP(pm_global_variable_read_node_create(parser, &parser->previous));
17911
17912 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17913 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17914 }
17915
17916 return node;
17917 }
17918 case PM_TOKEN_BACK_REFERENCE: {
17919 parser_lex(parser);
17920 pm_node_t *node = UP(pm_back_reference_read_node_create(parser, &parser->previous));
17921
17922 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17923 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17924 }
17925
17926 return node;
17927 }
17928 case PM_TOKEN_IDENTIFIER:
17929 case PM_TOKEN_METHOD_NAME: {
17930 parser_lex(parser);
17931 pm_token_t identifier = parser->previous;
17932 pm_node_t *node = parse_variable_call(parser);
17933
17934 if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
17935 // If parse_variable_call returned with a call node, then we
17936 // know the identifier is not in the local table. In that case
17937 // we need to check if there are arguments following the
17938 // identifier.
17939 pm_call_node_t *call = (pm_call_node_t *) node;
17940 pm_arguments_t arguments = { 0 };
17941
17942 if (parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1))) {
17943 // Since we found arguments, we need to turn off the
17944 // variable call bit in the flags.
17945 pm_node_flag_unset(UP(call), PM_CALL_NODE_FLAGS_VARIABLE_CALL);
17946
17947 call->opening_loc = arguments.opening_loc;
17948 call->arguments = arguments.arguments;
17949 call->closing_loc = arguments.closing_loc;
17950 call->block = arguments.block;
17951
17952 const uint8_t *end = pm_arguments_end(&arguments);
17953 if (!end) {
17954 end = call->message_loc.end;
17955 }
17956 call->base.location.end = end;
17957 }
17958 } else {
17959 // Otherwise, we know the identifier is in the local table. This
17960 // can still be a method call if it is followed by arguments or
17961 // a block, so we need to check for that here.
17962 if (
17963 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
17964 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
17965 match1(parser, PM_TOKEN_BRACE_LEFT)
17966 ) {
17967 pm_arguments_t arguments = { 0 };
17968 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
17969 pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
17970
17971 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
17972 // If we're about to convert an 'it' implicit local
17973 // variable read into a method call, we need to remove
17974 // it from the list of implicit local variables.
17975 pm_node_unreference(parser, node);
17976 } else {
17977 // Otherwise, we're about to convert a regular local
17978 // variable read into a method call, in which case we
17979 // need to indicate that this was not a read for the
17980 // purposes of warnings.
17981 assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
17982
17983 if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
17984 pm_node_unreference(parser, node);
17985 } else {
17987 pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
17988 }
17989 }
17990
17991 pm_node_destroy(parser, node);
17992 return UP(fcall);
17993 }
17994 }
17995
17996 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17997 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17998 }
17999
18000 return node;
18001 }
18002 case PM_TOKEN_HEREDOC_START: {
18003 // Here we have found a heredoc. We'll parse it and add it to the
18004 // list of strings.
18005 assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
18006 pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
18007
18008 size_t common_whitespace = (size_t) -1;
18009 parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
18010
18011 parser_lex(parser);
18012 pm_token_t opening = parser->previous;
18013
18014 pm_node_t *node;
18015 pm_node_t *part;
18016
18017 if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18018 // If we get here, then we have an empty heredoc. We'll create
18019 // an empty content token and return an empty string node.
18020 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18021 pm_token_t content = parse_strings_empty_content(parser->previous.start);
18022
18023 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18024 node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
18025 } else {
18026 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
18027 }
18028
18029 node->location.end = opening.end;
18030 } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
18031 // If we get here, then we tried to find something in the
18032 // heredoc but couldn't actually parse anything, so we'll just
18033 // return a missing node.
18034 //
18035 // parse_string_part handles its own errors, so there is no need
18036 // for us to add one here.
18037 node = UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
18038 } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18039 // If we get here, then the part that we parsed was plain string
18040 // content and we're at the end of the heredoc, so we can return
18041 // just a string node with the heredoc opening and closing as
18042 // its opening and closing.
18043 pm_node_flag_set(part, parse_unescaped_encoding(parser));
18044 pm_string_node_t *cast = (pm_string_node_t *) part;
18045
18046 cast->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18047 cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current);
18048 cast->base.location = cast->opening_loc;
18049
18050 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18051 assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
18052 cast->base.type = PM_X_STRING_NODE;
18053 }
18054
18055 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18056 parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
18057 }
18058
18059 node = UP(cast);
18060 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18061 } else {
18062 // If we get here, then we have multiple parts in the heredoc,
18063 // so we'll need to create an interpolated string node to hold
18064 // them all.
18065 pm_node_list_t parts = { 0 };
18066 pm_node_list_append(&parts, part);
18067
18068 while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18069 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
18070 pm_node_list_append(&parts, part);
18071 }
18072 }
18073
18074 // Now that we have all of the parts, create the correct type of
18075 // interpolated node.
18076 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18077 pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
18078 cast->parts = parts;
18079
18080 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18081 pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
18082
18083 cast->base.location = cast->opening_loc;
18084 node = UP(cast);
18085 } else {
18086 pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
18087 pm_node_list_free(&parts);
18088
18089 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18090 pm_interpolated_string_node_closing_set(cast, &parser->previous);
18091
18092 cast->base.location = cast->opening_loc;
18093 node = UP(cast);
18094 }
18095
18096 // If this is a heredoc that is indented with a ~, then we need
18097 // to dedent each line by the common leading whitespace.
18098 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18099 pm_node_list_t *nodes;
18100 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18101 nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
18102 } else {
18103 nodes = &((pm_interpolated_string_node_t *) node)->parts;
18104 }
18105
18106 parse_heredoc_dedent(parser, nodes, common_whitespace);
18107 }
18108 }
18109
18110 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18111 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18112 }
18113
18114 return node;
18115 }
18116 case PM_TOKEN_INSTANCE_VARIABLE: {
18117 parser_lex(parser);
18118 pm_node_t *node = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
18119
18120 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18121 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18122 }
18123
18124 return node;
18125 }
18126 case PM_TOKEN_INTEGER: {
18127 pm_node_flags_t base = parser->integer_base;
18128 parser_lex(parser);
18129 return UP(pm_integer_node_create(parser, base, &parser->previous));
18130 }
18131 case PM_TOKEN_INTEGER_IMAGINARY: {
18132 pm_node_flags_t base = parser->integer_base;
18133 parser_lex(parser);
18134 return UP(pm_integer_node_imaginary_create(parser, base, &parser->previous));
18135 }
18136 case PM_TOKEN_INTEGER_RATIONAL: {
18137 pm_node_flags_t base = parser->integer_base;
18138 parser_lex(parser);
18139 return UP(pm_integer_node_rational_create(parser, base, &parser->previous));
18140 }
18141 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: {
18142 pm_node_flags_t base = parser->integer_base;
18143 parser_lex(parser);
18144 return UP(pm_integer_node_rational_imaginary_create(parser, base, &parser->previous));
18145 }
18146 case PM_TOKEN_KEYWORD___ENCODING__:
18147 parser_lex(parser);
18148 return UP(pm_source_encoding_node_create(parser, &parser->previous));
18149 case PM_TOKEN_KEYWORD___FILE__:
18150 parser_lex(parser);
18151 return UP(pm_source_file_node_create(parser, &parser->previous));
18152 case PM_TOKEN_KEYWORD___LINE__:
18153 parser_lex(parser);
18154 return UP(pm_source_line_node_create(parser, &parser->previous));
18155 case PM_TOKEN_KEYWORD_ALIAS: {
18156 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18157 pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
18158 }
18159
18160 parser_lex(parser);
18161 pm_token_t keyword = parser->previous;
18162
18163 pm_node_t *new_name = parse_alias_argument(parser, true, (uint16_t) (depth + 1));
18164 pm_node_t *old_name = parse_alias_argument(parser, false, (uint16_t) (depth + 1));
18165
18166 switch (PM_NODE_TYPE(new_name)) {
18167 case PM_BACK_REFERENCE_READ_NODE:
18168 case PM_NUMBERED_REFERENCE_READ_NODE:
18169 case PM_GLOBAL_VARIABLE_READ_NODE: {
18170 if (PM_NODE_TYPE_P(old_name, PM_BACK_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_GLOBAL_VARIABLE_READ_NODE)) {
18171 if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) {
18172 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
18173 }
18174 } else {
18175 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18176 }
18177
18178 return UP(pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name));
18179 }
18180 case PM_SYMBOL_NODE:
18181 case PM_INTERPOLATED_SYMBOL_NODE: {
18182 if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE)) {
18183 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18184 }
18185 }
18187 default:
18188 return UP(pm_alias_method_node_create(parser, &keyword, new_name, old_name));
18189 }
18190 }
18191 case PM_TOKEN_KEYWORD_CASE: {
18192 size_t opening_newline_index = token_newline_index(parser);
18193 parser_lex(parser);
18194
18195 pm_token_t case_keyword = parser->previous;
18196 pm_node_t *predicate = NULL;
18197
18198 pm_node_list_t current_block_exits = { 0 };
18199 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18200
18201 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18202 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18203 predicate = NULL;
18204 } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
18205 predicate = NULL;
18206 } else if (!token_begins_expression_p(parser->current.type)) {
18207 predicate = NULL;
18208 } else {
18209 predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
18210 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18211 }
18212
18213 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
18214 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18215 parser_lex(parser);
18216
18217 pop_block_exits(parser, previous_block_exits);
18218 pm_node_list_free(&current_block_exits);
18219
18220 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18221 return UP(pm_case_node_create(parser, &case_keyword, predicate, &parser->previous));
18222 }
18223
18224 // At this point we can create a case node, though we don't yet know
18225 // if it is a case-in or case-when node.
18226 pm_token_t end_keyword = not_provided(parser);
18227 pm_node_t *node;
18228
18229 if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18230 pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword);
18231 pm_static_literals_t literals = { 0 };
18232
18233 // At this point we've seen a when keyword, so we know this is a
18234 // case-when node. We will continue to parse the when nodes
18235 // until we hit the end of the list.
18236 while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18237 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18238 parser_lex(parser);
18239
18240 pm_token_t when_keyword = parser->previous;
18241 pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
18242
18243 do {
18244 if (accept1(parser, PM_TOKEN_USTAR)) {
18245 pm_token_t operator = parser->previous;
18246 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18247
18248 pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
18249 pm_when_node_conditions_append(when_node, UP(splat_node));
18250
18251 if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
18252 } else {
18253 pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
18254 pm_when_node_conditions_append(when_node, condition);
18255
18256 // If we found a missing node, then this is a syntax
18257 // error and we should stop looping.
18258 if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
18259
18260 // If this is a string node, then we need to mark it
18261 // as frozen because when clause strings are frozen.
18262 if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
18263 pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
18264 } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
18265 pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
18266 }
18267
18268 pm_when_clause_static_literals_add(parser, &literals, condition);
18269 }
18270 } while (accept1(parser, PM_TOKEN_COMMA));
18271
18272 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18273 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18274 pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18275 }
18276 } else {
18277 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
18278 pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18279 }
18280
18281 if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18282 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
18283 if (statements != NULL) {
18284 pm_when_node_statements_set(when_node, statements);
18285 }
18286 }
18287
18288 pm_case_node_condition_append(case_node, UP(when_node));
18289 }
18290
18291 // If we didn't parse any conditions (in or when) then we need
18292 // to indicate that we have an error.
18293 if (case_node->conditions.size == 0) {
18294 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18295 }
18296
18297 pm_static_literals_free(&literals);
18298 node = UP(case_node);
18299 } else {
18300 pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword);
18301
18302 // If this is a case-match node (i.e., it is a pattern matching
18303 // case statement) then we must have a predicate.
18304 if (predicate == NULL) {
18305 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
18306 }
18307
18308 // At this point we expect that we're parsing a case-in node. We
18309 // will continue to parse the in nodes until we hit the end of
18310 // the list.
18311 while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
18312 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18313
18314 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
18315 parser->pattern_matching_newlines = true;
18316
18317 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
18318 parser->command_start = false;
18319 parser_lex(parser);
18320
18321 pm_token_t in_keyword = parser->previous;
18322
18323 pm_constant_id_list_t captures = { 0 };
18324 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
18325
18326 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
18327 pm_constant_id_list_free(&captures);
18328
18329 // Since we're in the top-level of the case-in node we need
18330 // to check for guard clauses in the form of `if` or
18331 // `unless` statements.
18332 if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
18333 pm_token_t keyword = parser->previous;
18334 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
18335 pattern = UP(pm_if_node_modifier_create(parser, pattern, &keyword, predicate));
18336 } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
18337 pm_token_t keyword = parser->previous;
18338 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
18339 pattern = UP(pm_unless_node_modifier_create(parser, pattern, &keyword, predicate));
18340 }
18341
18342 // Now we need to check for the terminator of the in node's
18343 // pattern. It can be a newline or semicolon optionally
18344 // followed by a `then` keyword.
18345 pm_token_t then_keyword;
18346 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18347 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18348 then_keyword = parser->previous;
18349 } else {
18350 then_keyword = not_provided(parser);
18351 }
18352 } else {
18353 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
18354 then_keyword = parser->previous;
18355 }
18356
18357 // Now we can actually parse the statements associated with
18358 // the in node.
18359 pm_statements_node_t *statements;
18360 if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18361 statements = NULL;
18362 } else {
18363 statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
18364 }
18365
18366 // Now that we have the full pattern and statements, we can
18367 // create the node and attach it to the case node.
18368 pm_node_t *condition = UP(pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword));
18369 pm_case_match_node_condition_append(case_node, condition);
18370 }
18371
18372 // If we didn't parse any conditions (in or when) then we need
18373 // to indicate that we have an error.
18374 if (case_node->conditions.size == 0) {
18375 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18376 }
18377
18378 node = UP(case_node);
18379 }
18380
18381 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18382 if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
18383 pm_token_t else_keyword = parser->previous;
18384 pm_else_node_t *else_node;
18385
18386 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
18387 else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
18388 } else {
18389 else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
18390 }
18391
18392 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18393 pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
18394 } else {
18395 pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
18396 }
18397 }
18398
18399 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18400 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM, &case_keyword);
18401
18402 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18403 pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous);
18404 } else {
18405 pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous);
18406 }
18407
18408 pop_block_exits(parser, previous_block_exits);
18409 pm_node_list_free(&current_block_exits);
18410
18411 return node;
18412 }
18413 case PM_TOKEN_KEYWORD_BEGIN: {
18414 size_t opening_newline_index = token_newline_index(parser);
18415 parser_lex(parser);
18416
18417 pm_token_t begin_keyword = parser->previous;
18418 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18419
18420 pm_node_list_t current_block_exits = { 0 };
18421 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18422 pm_statements_node_t *begin_statements = NULL;
18423
18424 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18425 pm_accepts_block_stack_push(parser, true);
18426 begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN, (uint16_t) (depth + 1));
18427 pm_accepts_block_stack_pop(parser);
18428 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18429 }
18430
18431 pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
18432 parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
18433 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM, &begin_keyword);
18434
18435 begin_node->base.location.end = parser->previous.end;
18436 pm_begin_node_end_keyword_set(begin_node, &parser->previous);
18437
18438 pop_block_exits(parser, previous_block_exits);
18439 pm_node_list_free(&current_block_exits);
18440
18441 return UP(begin_node);
18442 }
18443 case PM_TOKEN_KEYWORD_BEGIN_UPCASE: {
18444 pm_node_list_t current_block_exits = { 0 };
18445 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18446
18447 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18448 pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
18449 }
18450
18451 parser_lex(parser);
18452 pm_token_t keyword = parser->previous;
18453
18454 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE);
18455 pm_token_t opening = parser->previous;
18456 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
18457
18458 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM, &opening);
18459 pm_context_t context = parser->current_context->context;
18460 if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
18461 pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
18462 }
18463
18464 flush_block_exits(parser, previous_block_exits);
18465 pm_node_list_free(&current_block_exits);
18466
18467 return UP(pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
18468 }
18469 case PM_TOKEN_KEYWORD_BREAK:
18470 case PM_TOKEN_KEYWORD_NEXT:
18471 case PM_TOKEN_KEYWORD_RETURN: {
18472 parser_lex(parser);
18473
18474 pm_token_t keyword = parser->previous;
18475 pm_arguments_t arguments = { 0 };
18476
18477 if (
18478 token_begins_expression_p(parser->current.type) ||
18479 match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)
18480 ) {
18481 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
18482
18483 if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
18484 pm_token_t next = parser->current;
18485 parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, (uint16_t) (depth + 1));
18486
18487 // Reject `foo && return bar`.
18488 if (!accepts_command_call && arguments.arguments != NULL) {
18489 PM_PARSER_ERR_TOKEN_FORMAT(parser, next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(next.type));
18490 }
18491 }
18492 }
18493
18494 switch (keyword.type) {
18495 case PM_TOKEN_KEYWORD_BREAK: {
18496 pm_node_t *node = UP(pm_break_node_create(parser, &keyword, arguments.arguments));
18497 if (!parser->partial_script) parse_block_exit(parser, node);
18498 return node;
18499 }
18500 case PM_TOKEN_KEYWORD_NEXT: {
18501 pm_node_t *node = UP(pm_next_node_create(parser, &keyword, arguments.arguments));
18502 if (!parser->partial_script) parse_block_exit(parser, node);
18503 return node;
18504 }
18505 case PM_TOKEN_KEYWORD_RETURN: {
18506 pm_node_t *node = UP(pm_return_node_create(parser, &keyword, arguments.arguments));
18507 parse_return(parser, node);
18508 return node;
18509 }
18510 default:
18511 assert(false && "unreachable");
18512 return UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
18513 }
18514 }
18515 case PM_TOKEN_KEYWORD_SUPER: {
18516 parser_lex(parser);
18517
18518 pm_token_t keyword = parser->previous;
18519 pm_arguments_t arguments = { 0 };
18520 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18521
18522 if (
18523 arguments.opening_loc.start == NULL &&
18524 arguments.arguments == NULL &&
18525 ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
18526 ) {
18527 return UP(pm_forwarding_super_node_create(parser, &keyword, &arguments));
18528 }
18529
18530 return UP(pm_super_node_create(parser, &keyword, &arguments));
18531 }
18532 case PM_TOKEN_KEYWORD_YIELD: {
18533 parser_lex(parser);
18534
18535 pm_token_t keyword = parser->previous;
18536 pm_arguments_t arguments = { 0 };
18537 parse_arguments_list(parser, &arguments, false, accepts_command_call, (uint16_t) (depth + 1));
18538
18539 // It's possible that we've parsed a block argument through our
18540 // call to parse_arguments_list. If we found one, we should mark it
18541 // as invalid and destroy it, as we don't have a place for it on the
18542 // yield node.
18543 if (arguments.block != NULL) {
18544 pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
18545 pm_node_unreference(parser, arguments.block);
18546 pm_node_destroy(parser, arguments.block);
18547 arguments.block = NULL;
18548 }
18549
18550 pm_node_t *node = UP(pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc));
18551 if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
18552
18553 return node;
18554 }
18555 case PM_TOKEN_KEYWORD_CLASS: {
18556 size_t opening_newline_index = token_newline_index(parser);
18557 parser_lex(parser);
18558
18559 pm_token_t class_keyword = parser->previous;
18560 pm_do_loop_stack_push(parser, false);
18561
18562 pm_node_list_t current_block_exits = { 0 };
18563 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18564
18565 if (accept1(parser, PM_TOKEN_LESS_LESS)) {
18566 pm_token_t operator = parser->previous;
18567 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
18568
18569 pm_parser_scope_push(parser, true);
18570 if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18571 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type));
18572 }
18573
18574 pm_node_t *statements = NULL;
18575 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18576 pm_accepts_block_stack_push(parser, true);
18577 statements = UP(parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1)));
18578 pm_accepts_block_stack_pop(parser);
18579 }
18580
18581 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
18582 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18583 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1)));
18584 } else {
18585 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
18586 }
18587
18588 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
18589
18590 pm_constant_id_list_t locals;
18591 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18592
18593 pm_parser_scope_pop(parser);
18594 pm_do_loop_stack_pop(parser);
18595
18596 flush_block_exits(parser, previous_block_exits);
18597 pm_node_list_free(&current_block_exits);
18598
18599 return UP(pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous));
18600 }
18601
18602 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
18603 pm_token_t name = parser->previous;
18604 if (name.type != PM_TOKEN_CONSTANT) {
18605 pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
18606 }
18607
18608 pm_token_t inheritance_operator;
18609 pm_node_t *superclass;
18610
18611 if (match1(parser, PM_TOKEN_LESS)) {
18612 inheritance_operator = parser->current;
18613 lex_state_set(parser, PM_LEX_STATE_BEG);
18614
18615 parser->command_start = true;
18616 parser_lex(parser);
18617
18618 superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
18619 } else {
18620 inheritance_operator = not_provided(parser);
18621 superclass = NULL;
18622 }
18623
18624 pm_parser_scope_push(parser, true);
18625
18626 if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
18627 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
18628 } else {
18629 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18630 }
18631 pm_node_t *statements = NULL;
18632
18633 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18634 pm_accepts_block_stack_push(parser, true);
18635 statements = UP(parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1)));
18636 pm_accepts_block_stack_pop(parser);
18637 }
18638
18639 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
18640 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18641 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1)));
18642 } else {
18643 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
18644 }
18645
18646 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
18647
18648 if (context_def_p(parser)) {
18649 pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
18650 }
18651
18652 pm_constant_id_list_t locals;
18653 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18654
18655 pm_parser_scope_pop(parser);
18656 pm_do_loop_stack_pop(parser);
18657
18658 if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
18659 pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
18660 }
18661
18662 pop_block_exits(parser, previous_block_exits);
18663 pm_node_list_free(&current_block_exits);
18664
18665 return UP(pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous));
18666 }
18667 case PM_TOKEN_KEYWORD_DEF: {
18668 pm_node_list_t current_block_exits = { 0 };
18669 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18670
18671 pm_token_t def_keyword = parser->current;
18672 size_t opening_newline_index = token_newline_index(parser);
18673
18674 pm_node_t *receiver = NULL;
18675 pm_token_t operator = not_provided(parser);
18676 pm_token_t name;
18677
18678 // This context is necessary for lexing `...` in a bare params
18679 // correctly. It must be pushed before lexing the first param, so it
18680 // is here.
18681 context_push(parser, PM_CONTEXT_DEF_PARAMS);
18682 parser_lex(parser);
18683
18684 // This will be false if the method name is not a valid identifier
18685 // but could be followed by an operator.
18686 bool valid_name = true;
18687
18688 switch (parser->current.type) {
18689 case PM_CASE_OPERATOR:
18690 pm_parser_scope_push(parser, true);
18691 lex_state_set(parser, PM_LEX_STATE_ENDFN);
18692 parser_lex(parser);
18693
18694 name = parser->previous;
18695 break;
18696 case PM_TOKEN_IDENTIFIER: {
18697 parser_lex(parser);
18698
18699 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
18700 receiver = parse_variable_call(parser);
18701
18702 pm_parser_scope_push(parser, true);
18703 lex_state_set(parser, PM_LEX_STATE_FNAME);
18704 parser_lex(parser);
18705
18706 operator = parser->previous;
18707 name = parse_method_definition_name(parser);
18708 } else {
18709 pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
18710 pm_parser_scope_push(parser, true);
18711
18712 name = parser->previous;
18713 }
18714
18715 break;
18716 }
18717 case PM_TOKEN_INSTANCE_VARIABLE:
18718 case PM_TOKEN_CLASS_VARIABLE:
18719 case PM_TOKEN_GLOBAL_VARIABLE:
18720 valid_name = false;
18722 case PM_TOKEN_CONSTANT:
18723 case PM_TOKEN_KEYWORD_NIL:
18724 case PM_TOKEN_KEYWORD_SELF:
18725 case PM_TOKEN_KEYWORD_TRUE:
18726 case PM_TOKEN_KEYWORD_FALSE:
18727 case PM_TOKEN_KEYWORD___FILE__:
18728 case PM_TOKEN_KEYWORD___LINE__:
18729 case PM_TOKEN_KEYWORD___ENCODING__: {
18730 pm_parser_scope_push(parser, true);
18731 parser_lex(parser);
18732
18733 pm_token_t identifier = parser->previous;
18734
18735 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
18736 lex_state_set(parser, PM_LEX_STATE_FNAME);
18737 parser_lex(parser);
18738 operator = parser->previous;
18739
18740 switch (identifier.type) {
18741 case PM_TOKEN_CONSTANT:
18742 receiver = UP(pm_constant_read_node_create(parser, &identifier));
18743 break;
18744 case PM_TOKEN_INSTANCE_VARIABLE:
18745 receiver = UP(pm_instance_variable_read_node_create(parser, &identifier));
18746 break;
18747 case PM_TOKEN_CLASS_VARIABLE:
18748 receiver = UP(pm_class_variable_read_node_create(parser, &identifier));
18749 break;
18750 case PM_TOKEN_GLOBAL_VARIABLE:
18751 receiver = UP(pm_global_variable_read_node_create(parser, &identifier));
18752 break;
18753 case PM_TOKEN_KEYWORD_NIL:
18754 receiver = UP(pm_nil_node_create(parser, &identifier));
18755 break;
18756 case PM_TOKEN_KEYWORD_SELF:
18757 receiver = UP(pm_self_node_create(parser, &identifier));
18758 break;
18759 case PM_TOKEN_KEYWORD_TRUE:
18760 receiver = UP(pm_true_node_create(parser, &identifier));
18761 break;
18762 case PM_TOKEN_KEYWORD_FALSE:
18763 receiver = UP(pm_false_node_create(parser, &identifier));
18764 break;
18765 case PM_TOKEN_KEYWORD___FILE__:
18766 receiver = UP(pm_source_file_node_create(parser, &identifier));
18767 break;
18768 case PM_TOKEN_KEYWORD___LINE__:
18769 receiver = UP(pm_source_line_node_create(parser, &identifier));
18770 break;
18771 case PM_TOKEN_KEYWORD___ENCODING__:
18772 receiver = UP(pm_source_encoding_node_create(parser, &identifier));
18773 break;
18774 default:
18775 break;
18776 }
18777
18778 name = parse_method_definition_name(parser);
18779 } else {
18780 if (!valid_name) {
18781 PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
18782 }
18783
18784 name = identifier;
18785 }
18786 break;
18787 }
18788 case PM_TOKEN_PARENTHESIS_LEFT: {
18789 // The current context is `PM_CONTEXT_DEF_PARAMS`, however
18790 // the inner expression of this parenthesis should not be
18791 // processed under this context. Thus, the context is popped
18792 // here.
18793 context_pop(parser);
18794 parser_lex(parser);
18795
18796 pm_token_t lparen = parser->previous;
18797 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
18798
18799 accept1(parser, PM_TOKEN_NEWLINE);
18800 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18801 pm_token_t rparen = parser->previous;
18802
18803 lex_state_set(parser, PM_LEX_STATE_FNAME);
18804 expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
18805
18806 operator = parser->previous;
18807 receiver = UP(pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0));
18808
18809 // To push `PM_CONTEXT_DEF_PARAMS` again is for the same
18810 // reason as described the above.
18811 pm_parser_scope_push(parser, true);
18812 context_push(parser, PM_CONTEXT_DEF_PARAMS);
18813 name = parse_method_definition_name(parser);
18814 break;
18815 }
18816 default:
18817 pm_parser_scope_push(parser, true);
18818 name = parse_method_definition_name(parser);
18819 break;
18820 }
18821
18822 pm_token_t lparen;
18823 pm_token_t rparen;
18824 pm_parameters_node_t *params;
18825
18826 bool accept_endless_def = true;
18827 switch (parser->current.type) {
18828 case PM_TOKEN_PARENTHESIS_LEFT: {
18829 parser_lex(parser);
18830 lparen = parser->previous;
18831
18832 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18833 params = NULL;
18834 } else {
18835 params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1));
18836 }
18837
18838 lex_state_set(parser, PM_LEX_STATE_BEG);
18839 parser->command_start = true;
18840
18841 context_pop(parser);
18842 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18843 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
18844 parser->previous.start = parser->previous.end;
18845 parser->previous.type = PM_TOKEN_MISSING;
18846 }
18847
18848 rparen = parser->previous;
18849 break;
18850 }
18851 case PM_CASE_PARAMETER: {
18852 // If we're about to lex a label, we need to add the label
18853 // state to make sure the next newline is ignored.
18854 if (parser->current.type == PM_TOKEN_LABEL) {
18855 lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
18856 }
18857
18858 lparen = not_provided(parser);
18859 rparen = not_provided(parser);
18860 params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1));
18861
18862 // Reject `def * = 1` and similar. We have to specifically check
18863 // for them because they create ambiguity with optional arguments.
18864 accept_endless_def = false;
18865
18866 context_pop(parser);
18867 break;
18868 }
18869 default: {
18870 lparen = not_provided(parser);
18871 rparen = not_provided(parser);
18872 params = NULL;
18873
18874 context_pop(parser);
18875 break;
18876 }
18877 }
18878
18879 pm_node_t *statements = NULL;
18880 pm_token_t equal;
18881 pm_token_t end_keyword;
18882
18883 if (accept1(parser, PM_TOKEN_EQUAL)) {
18884 if (token_is_setter_name(&name)) {
18885 pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
18886 }
18887 if (!accept_endless_def) {
18888 pm_parser_err_previous(parser, PM_ERR_DEF_ENDLESS_PARAMETERS);
18889 }
18890 if (
18893 ) {
18894 PM_PARSER_ERR_FORMAT(parser, def_keyword.start, parser->previous.end, PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE, "endless method definition");
18895 }
18896 equal = parser->previous;
18897
18898 context_push(parser, PM_CONTEXT_DEF);
18899 pm_do_loop_stack_push(parser, false);
18900 statements = UP(pm_statements_node_create(parser));
18901
18902 bool allow_command_call;
18903 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
18904 allow_command_call = accepts_command_call;
18905 } else {
18906 // Allow `def foo = puts "Hello"` but not `private def foo = puts "Hello"`
18907 allow_command_call = binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION;
18908 }
18909
18910 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_command_call, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
18911
18912 if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
18913 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
18914
18915 pm_token_t rescue_keyword = parser->previous;
18916 pm_node_t *value = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
18917 context_pop(parser);
18918
18919 statement = UP(pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value));
18920 }
18921
18922 pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
18923 pm_do_loop_stack_pop(parser);
18924 context_pop(parser);
18925 end_keyword = not_provided(parser);
18926 } else {
18927 equal = not_provided(parser);
18928
18929 if (lparen.type == PM_TOKEN_NOT_PROVIDED) {
18930 lex_state_set(parser, PM_LEX_STATE_BEG);
18931 parser->command_start = true;
18932 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
18933 } else {
18934 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18935 }
18936
18937 pm_accepts_block_stack_push(parser, true);
18938 pm_do_loop_stack_push(parser, false);
18939
18940 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18941 pm_accepts_block_stack_push(parser, true);
18942 statements = UP(parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1)));
18943 pm_accepts_block_stack_pop(parser);
18944 }
18945
18946 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
18947 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18948 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1)));
18949 } else {
18950 parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
18951 }
18952
18953 pm_accepts_block_stack_pop(parser);
18954 pm_do_loop_stack_pop(parser);
18955
18956 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM, &def_keyword);
18957 end_keyword = parser->previous;
18958 }
18959
18960 pm_constant_id_list_t locals;
18961 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18962 pm_parser_scope_pop(parser);
18963
18969 pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
18970
18971 flush_block_exits(parser, previous_block_exits);
18972 pm_node_list_free(&current_block_exits);
18973
18974 return UP(pm_def_node_create(
18975 parser,
18976 name_id,
18977 &name,
18978 receiver,
18979 params,
18980 statements,
18981 &locals,
18982 &def_keyword,
18983 &operator,
18984 &lparen,
18985 &rparen,
18986 &equal,
18987 &end_keyword
18988 ));
18989 }
18990 case PM_TOKEN_KEYWORD_DEFINED: {
18991 parser_lex(parser);
18992 pm_token_t keyword = parser->previous;
18993
18994 pm_token_t lparen;
18995 pm_token_t rparen;
18996 pm_node_t *expression;
18997
18998 context_push(parser, PM_CONTEXT_DEFINED);
18999 bool newline = accept1(parser, PM_TOKEN_NEWLINE);
19000
19001 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19002 lparen = parser->previous;
19003
19004 if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19005 expression = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
19006 lparen = not_provided(parser);
19007 rparen = not_provided(parser);
19008 } else {
19009 expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19010
19011 if (parser->recovering) {
19012 rparen = not_provided(parser);
19013 } else {
19014 accept1(parser, PM_TOKEN_NEWLINE);
19015 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19016 rparen = parser->previous;
19017 }
19018 }
19019 } else {
19020 lparen = not_provided(parser);
19021 rparen = not_provided(parser);
19022 expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19023 }
19024
19025 context_pop(parser);
19026 return UP(pm_defined_node_create(
19027 parser,
19028 &lparen,
19029 expression,
19030 &rparen,
19031 &keyword
19032 ));
19033 }
19034 case PM_TOKEN_KEYWORD_END_UPCASE: {
19035 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19036 pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
19037 }
19038
19039 parser_lex(parser);
19040 pm_token_t keyword = parser->previous;
19041
19042 if (context_def_p(parser)) {
19043 pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
19044 }
19045
19046 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
19047 pm_token_t opening = parser->previous;
19048 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
19049
19050 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM, &opening);
19051 return UP(pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
19052 }
19053 case PM_TOKEN_KEYWORD_FALSE:
19054 parser_lex(parser);
19055 return UP(pm_false_node_create(parser, &parser->previous));
19056 case PM_TOKEN_KEYWORD_FOR: {
19057 size_t opening_newline_index = token_newline_index(parser);
19058 parser_lex(parser);
19059
19060 pm_token_t for_keyword = parser->previous;
19061 pm_node_t *index;
19062
19063 context_push(parser, PM_CONTEXT_FOR_INDEX);
19064
19065 // First, parse out the first index expression.
19066 if (accept1(parser, PM_TOKEN_USTAR)) {
19067 pm_token_t star_operator = parser->previous;
19068 pm_node_t *name = NULL;
19069
19070 if (token_begins_expression_p(parser->current.type)) {
19071 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19072 }
19073
19074 index = UP(pm_splat_node_create(parser, &star_operator, name));
19075 } else if (token_begins_expression_p(parser->current.type)) {
19076 index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
19077 } else {
19078 pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
19079 index = UP(pm_missing_node_create(parser, for_keyword.start, for_keyword.end));
19080 }
19081
19082 // Now, if there are multiple index expressions, parse them out.
19083 if (match1(parser, PM_TOKEN_COMMA)) {
19084 index = parse_targets(parser, index, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19085 } else {
19086 index = parse_target(parser, index, false, false);
19087 }
19088
19089 context_pop(parser);
19090 pm_do_loop_stack_push(parser, true);
19091
19092 expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
19093 pm_token_t in_keyword = parser->previous;
19094
19095 pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
19096 pm_do_loop_stack_pop(parser);
19097
19098 pm_token_t do_keyword;
19099 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19100 do_keyword = parser->previous;
19101 } else {
19102 do_keyword = not_provided(parser);
19103 if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
19104 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type));
19105 }
19106 }
19107
19108 pm_statements_node_t *statements = NULL;
19109 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19110 statements = parse_statements(parser, PM_CONTEXT_FOR, (uint16_t) (depth + 1));
19111 }
19112
19113 parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
19114 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM, &for_keyword);
19115
19116 return UP(pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous));
19117 }
19118 case PM_TOKEN_KEYWORD_IF:
19119 if (parser_end_of_line_p(parser)) {
19120 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
19121 }
19122
19123 size_t opening_newline_index = token_newline_index(parser);
19124 bool if_after_else = parser->previous.type == PM_TOKEN_KEYWORD_ELSE;
19125 parser_lex(parser);
19126
19127 return parse_conditional(parser, PM_CONTEXT_IF, opening_newline_index, if_after_else, (uint16_t) (depth + 1));
19128 case PM_TOKEN_KEYWORD_UNDEF: {
19129 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19130 pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
19131 }
19132
19133 parser_lex(parser);
19134 pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
19135 pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19136
19137 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19138 pm_node_destroy(parser, name);
19139 } else {
19140 pm_undef_node_append(undef, name);
19141
19142 while (match1(parser, PM_TOKEN_COMMA)) {
19143 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
19144 parser_lex(parser);
19145 name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19146
19147 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19148 pm_node_destroy(parser, name);
19149 break;
19150 }
19151
19152 pm_undef_node_append(undef, name);
19153 }
19154 }
19155
19156 return UP(undef);
19157 }
19158 case PM_TOKEN_KEYWORD_NOT: {
19159 parser_lex(parser);
19160
19161 pm_token_t message = parser->previous;
19162 pm_arguments_t arguments = { 0 };
19163 pm_node_t *receiver = NULL;
19164
19165 // If we do not accept a command call, then we also do not accept a
19166 // not without parentheses. In this case we need to reject this
19167 // syntax.
19168 if (!accepts_command_call && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19169 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) {
19170 pm_parser_err(parser, parser->previous.end, parser->previous.end + 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN);
19171 } else {
19172 accept1(parser, PM_TOKEN_NEWLINE);
19173 pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER);
19174 }
19175
19176 return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
19177 }
19178
19179 accept1(parser, PM_TOKEN_NEWLINE);
19180
19181 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19182 pm_token_t lparen = parser->previous;
19183
19184 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19185 receiver = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
19186 } else {
19187 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&lparen);
19188 receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19189
19190 if (!parser->recovering) {
19191 accept1(parser, PM_TOKEN_NEWLINE);
19192 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19193 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
19194 }
19195 }
19196 } else {
19197 receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19198 }
19199
19200 return UP(pm_call_node_not_create(parser, receiver, &message, &arguments));
19201 }
19202 case PM_TOKEN_KEYWORD_UNLESS: {
19203 size_t opening_newline_index = token_newline_index(parser);
19204 parser_lex(parser);
19205
19206 return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
19207 }
19208 case PM_TOKEN_KEYWORD_MODULE: {
19209 pm_node_list_t current_block_exits = { 0 };
19210 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19211
19212 size_t opening_newline_index = token_newline_index(parser);
19213 parser_lex(parser);
19214 pm_token_t module_keyword = parser->previous;
19215
19216 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
19217 pm_token_t name;
19218
19219 // If we can recover from a syntax error that occurred while parsing
19220 // the name of the module, then we'll handle that here.
19221 if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) {
19222 pop_block_exits(parser, previous_block_exits);
19223 pm_node_list_free(&current_block_exits);
19224
19225 pm_token_t missing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19226 return UP(pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing));
19227 }
19228
19229 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
19230 pm_token_t double_colon = parser->previous;
19231
19232 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
19233 constant_path = UP(pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous));
19234 }
19235
19236 // Here we retrieve the name of the module. If it wasn't a constant,
19237 // then it's possible that `module foo` was passed, which is a
19238 // syntax error. We handle that here as well.
19239 name = parser->previous;
19240 if (name.type != PM_TOKEN_CONSTANT) {
19241 pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
19242 }
19243
19244 pm_parser_scope_push(parser, true);
19245 accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
19246 pm_node_t *statements = NULL;
19247
19248 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19249 pm_accepts_block_stack_push(parser, true);
19250 statements = UP(parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1)));
19251 pm_accepts_block_stack_pop(parser);
19252 }
19253
19254 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
19255 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19256 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1)));
19257 } else {
19258 parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
19259 }
19260
19261 pm_constant_id_list_t locals;
19262 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19263
19264 pm_parser_scope_pop(parser);
19265 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM, &module_keyword);
19266
19267 if (context_def_p(parser)) {
19268 pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
19269 }
19270
19271 pop_block_exits(parser, previous_block_exits);
19272 pm_node_list_free(&current_block_exits);
19273
19274 return UP(pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous));
19275 }
19276 case PM_TOKEN_KEYWORD_NIL:
19277 parser_lex(parser);
19278 return UP(pm_nil_node_create(parser, &parser->previous));
19279 case PM_TOKEN_KEYWORD_REDO: {
19280 parser_lex(parser);
19281
19282 pm_node_t *node = UP(pm_redo_node_create(parser, &parser->previous));
19283 if (!parser->partial_script) parse_block_exit(parser, node);
19284
19285 return node;
19286 }
19287 case PM_TOKEN_KEYWORD_RETRY: {
19288 parser_lex(parser);
19289
19290 pm_node_t *node = UP(pm_retry_node_create(parser, &parser->previous));
19291 parse_retry(parser, node);
19292
19293 return node;
19294 }
19295 case PM_TOKEN_KEYWORD_SELF:
19296 parser_lex(parser);
19297 return UP(pm_self_node_create(parser, &parser->previous));
19298 case PM_TOKEN_KEYWORD_TRUE:
19299 parser_lex(parser);
19300 return UP(pm_true_node_create(parser, &parser->previous));
19301 case PM_TOKEN_KEYWORD_UNTIL: {
19302 size_t opening_newline_index = token_newline_index(parser);
19303
19304 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19305 pm_do_loop_stack_push(parser, true);
19306
19307 parser_lex(parser);
19308 pm_token_t keyword = parser->previous;
19309 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
19310
19311 pm_do_loop_stack_pop(parser);
19312 context_pop(parser);
19313
19314 pm_token_t do_keyword;
19315 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19316 do_keyword = parser->previous;
19317 } else {
19318 do_keyword = not_provided(parser);
19319 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19320 }
19321
19322 pm_statements_node_t *statements = NULL;
19323 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19324 pm_accepts_block_stack_push(parser, true);
19325 statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
19326 pm_accepts_block_stack_pop(parser);
19327 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19328 }
19329
19330 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19331 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM, &keyword);
19332
19333 return UP(pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0));
19334 }
19335 case PM_TOKEN_KEYWORD_WHILE: {
19336 size_t opening_newline_index = token_newline_index(parser);
19337
19338 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19339 pm_do_loop_stack_push(parser, true);
19340
19341 parser_lex(parser);
19342 pm_token_t keyword = parser->previous;
19343 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
19344
19345 pm_do_loop_stack_pop(parser);
19346 context_pop(parser);
19347
19348 pm_token_t do_keyword;
19349 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19350 do_keyword = parser->previous;
19351 } else {
19352 do_keyword = not_provided(parser);
19353 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19354 }
19355
19356 pm_statements_node_t *statements = NULL;
19357 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19358 pm_accepts_block_stack_push(parser, true);
19359 statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
19360 pm_accepts_block_stack_pop(parser);
19361 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19362 }
19363
19364 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19365 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM, &keyword);
19366
19367 return UP(pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0));
19368 }
19369 case PM_TOKEN_PERCENT_LOWER_I: {
19370 parser_lex(parser);
19371 pm_token_t opening = parser->previous;
19372 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19373 pm_node_t *current = NULL;
19374
19375 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19376 accept1(parser, PM_TOKEN_WORDS_SEP);
19377 if (match1(parser, PM_TOKEN_STRING_END)) break;
19378
19379 // Interpolation is not possible but nested heredocs can still lead to
19380 // consecutive (disjoint) string tokens when the final newline is escaped.
19381 while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19382 pm_token_t opening = not_provided(parser);
19383 pm_token_t closing = not_provided(parser);
19384
19385 // Record the string node, moving to interpolation if needed.
19386 if (current == NULL) {
19387 current = UP(pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
19388 parser_lex(parser);
19389 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19390 pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
19391 parser_lex(parser);
19392 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
19393 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19394 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
19395 pm_token_t bounds = not_provided(parser);
19396
19397 pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
19398 pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped));
19399 pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing));
19400 parser_lex(parser);
19401
19402 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19403 pm_interpolated_symbol_node_append(interpolated, first_string);
19404 pm_interpolated_symbol_node_append(interpolated, second_string);
19405
19406 xfree(current);
19407 current = UP(interpolated);
19408 } else {
19409 assert(false && "unreachable");
19410 }
19411 }
19412
19413 if (current) {
19414 pm_array_node_elements_append(array, current);
19415 current = NULL;
19416 } else {
19417 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
19418 }
19419 }
19420
19421 pm_token_t closing = parser->current;
19422 if (match1(parser, PM_TOKEN_EOF)) {
19423 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
19424 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19425 } else {
19426 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
19427 }
19428 pm_array_node_close_set(array, &closing);
19429
19430 return UP(array);
19431 }
19432 case PM_TOKEN_PERCENT_UPPER_I: {
19433 parser_lex(parser);
19434 pm_token_t opening = parser->previous;
19435 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19436
19437 // This is the current node that we are parsing that will be added to the
19438 // list of elements.
19439 pm_node_t *current = NULL;
19440
19441 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19442 switch (parser->current.type) {
19443 case PM_TOKEN_WORDS_SEP: {
19444 if (current == NULL) {
19445 // If we hit a separator before we have any content, then we don't
19446 // need to do anything.
19447 } else {
19448 // If we hit a separator after we've hit content, then we need to
19449 // append that content to the list and reset the current node.
19450 pm_array_node_elements_append(array, current);
19451 current = NULL;
19452 }
19453
19454 parser_lex(parser);
19455 break;
19456 }
19457 case PM_TOKEN_STRING_CONTENT: {
19458 pm_token_t opening = not_provided(parser);
19459 pm_token_t closing = not_provided(parser);
19460
19461 if (current == NULL) {
19462 // If we hit content and the current node is NULL, then this is
19463 // the first string content we've seen. In that case we're going
19464 // to create a new string node and set that to the current.
19465 current = UP(pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
19466 parser_lex(parser);
19467 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19468 // If we hit string content and the current node is an
19469 // interpolated string, then we need to append the string content
19470 // to the list of child nodes.
19471 pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
19472 parser_lex(parser);
19473
19474 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
19475 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19476 // If we hit string content and the current node is a symbol node,
19477 // then we need to convert the current node into an interpolated
19478 // string and add the string content to the list of child nodes.
19479 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
19480 pm_token_t bounds = not_provided(parser);
19481
19482 pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
19483 pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped));
19484 pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing));
19485 parser_lex(parser);
19486
19487 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19488 pm_interpolated_symbol_node_append(interpolated, first_string);
19489 pm_interpolated_symbol_node_append(interpolated, second_string);
19490
19491 xfree(current);
19492 current = UP(interpolated);
19493 } else {
19494 assert(false && "unreachable");
19495 }
19496
19497 break;
19498 }
19499 case PM_TOKEN_EMBVAR: {
19500 bool start_location_set = false;
19501 if (current == NULL) {
19502 // If we hit an embedded variable and the current node is NULL,
19503 // then this is the start of a new string. We'll set the current
19504 // node to a new interpolated string.
19505 pm_token_t opening = not_provided(parser);
19506 pm_token_t closing = not_provided(parser);
19507 current = UP(pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing));
19508 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19509 // If we hit an embedded variable and the current node is a string
19510 // node, then we'll convert the current into an interpolated
19511 // string and add the string node to the list of parts.
19512 pm_token_t opening = not_provided(parser);
19513 pm_token_t closing = not_provided(parser);
19514 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19515
19516 current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
19517 pm_interpolated_symbol_node_append(interpolated, current);
19518 interpolated->base.location.start = current->location.start;
19519 start_location_set = true;
19520 current = UP(interpolated);
19521 } else {
19522 // If we hit an embedded variable and the current node is an
19523 // interpolated string, then we'll just add the embedded variable.
19524 }
19525
19526 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19527 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
19528 if (!start_location_set) {
19529 current->location.start = part->location.start;
19530 }
19531 break;
19532 }
19533 case PM_TOKEN_EMBEXPR_BEGIN: {
19534 bool start_location_set = false;
19535 if (current == NULL) {
19536 // If we hit an embedded expression and the current node is NULL,
19537 // then this is the start of a new string. We'll set the current
19538 // node to a new interpolated string.
19539 pm_token_t opening = not_provided(parser);
19540 pm_token_t closing = not_provided(parser);
19541 current = UP(pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing));
19542 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19543 // If we hit an embedded expression and the current node is a
19544 // string node, then we'll convert the current into an
19545 // interpolated string and add the string node to the list of
19546 // parts.
19547 pm_token_t opening = not_provided(parser);
19548 pm_token_t closing = not_provided(parser);
19549 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19550
19551 current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
19552 pm_interpolated_symbol_node_append(interpolated, current);
19553 interpolated->base.location.start = current->location.start;
19554 start_location_set = true;
19555 current = UP(interpolated);
19556 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19557 // If we hit an embedded expression and the current node is an
19558 // interpolated string, then we'll just continue on.
19559 } else {
19560 assert(false && "unreachable");
19561 }
19562
19563 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19564 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
19565 if (!start_location_set) {
19566 current->location.start = part->location.start;
19567 }
19568 break;
19569 }
19570 default:
19571 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
19572 parser_lex(parser);
19573 break;
19574 }
19575 }
19576
19577 // If we have a current node, then we need to append it to the list.
19578 if (current) {
19579 pm_array_node_elements_append(array, current);
19580 }
19581
19582 pm_token_t closing = parser->current;
19583 if (match1(parser, PM_TOKEN_EOF)) {
19584 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
19585 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19586 } else {
19587 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
19588 }
19589 pm_array_node_close_set(array, &closing);
19590
19591 return UP(array);
19592 }
19593 case PM_TOKEN_PERCENT_LOWER_W: {
19594 parser_lex(parser);
19595 pm_token_t opening = parser->previous;
19596 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19597 pm_node_t *current = NULL;
19598
19599 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19600 accept1(parser, PM_TOKEN_WORDS_SEP);
19601 if (match1(parser, PM_TOKEN_STRING_END)) break;
19602
19603 // Interpolation is not possible but nested heredocs can still lead to
19604 // consecutive (disjoint) string tokens when the final newline is escaped.
19605 while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19606 pm_token_t opening = not_provided(parser);
19607 pm_token_t closing = not_provided(parser);
19608
19609 pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
19610
19611 // Record the string node, moving to interpolation if needed.
19612 if (current == NULL) {
19613 current = string;
19614 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
19615 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
19616 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19617 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
19618 pm_interpolated_string_node_append(interpolated, current);
19619 pm_interpolated_string_node_append(interpolated, string);
19620 current = UP(interpolated);
19621 } else {
19622 assert(false && "unreachable");
19623 }
19624 parser_lex(parser);
19625 }
19626
19627 if (current) {
19628 pm_array_node_elements_append(array, current);
19629 current = NULL;
19630 } else {
19631 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
19632 }
19633 }
19634
19635 pm_token_t closing = parser->current;
19636 if (match1(parser, PM_TOKEN_EOF)) {
19637 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
19638 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19639 } else {
19640 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
19641 }
19642
19643 pm_array_node_close_set(array, &closing);
19644 return UP(array);
19645 }
19646 case PM_TOKEN_PERCENT_UPPER_W: {
19647 parser_lex(parser);
19648 pm_token_t opening = parser->previous;
19649 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19650
19651 // This is the current node that we are parsing that will be added
19652 // to the list of elements.
19653 pm_node_t *current = NULL;
19654
19655 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19656 switch (parser->current.type) {
19657 case PM_TOKEN_WORDS_SEP: {
19658 // Reset the explicit encoding if we hit a separator
19659 // since each element can have its own encoding.
19660 parser->explicit_encoding = NULL;
19661
19662 if (current == NULL) {
19663 // If we hit a separator before we have any content,
19664 // then we don't need to do anything.
19665 } else {
19666 // If we hit a separator after we've hit content,
19667 // then we need to append that content to the list
19668 // and reset the current node.
19669 pm_array_node_elements_append(array, current);
19670 current = NULL;
19671 }
19672
19673 parser_lex(parser);
19674 break;
19675 }
19676 case PM_TOKEN_STRING_CONTENT: {
19677 pm_token_t opening = not_provided(parser);
19678 pm_token_t closing = not_provided(parser);
19679
19680 pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
19681 pm_node_flag_set(string, parse_unescaped_encoding(parser));
19682 parser_lex(parser);
19683
19684 if (current == NULL) {
19685 // If we hit content and the current node is NULL,
19686 // then this is the first string content we've seen.
19687 // In that case we're going to create a new string
19688 // node and set that to the current.
19689 current = string;
19690 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
19691 // If we hit string content and the current node is
19692 // an interpolated string, then we need to append
19693 // the string content to the list of child nodes.
19694 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
19695 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19696 // If we hit string content and the current node is
19697 // a string node, then we need to convert the
19698 // current node into an interpolated string and add
19699 // the string content to the list of child nodes.
19700 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
19701 pm_interpolated_string_node_append(interpolated, current);
19702 pm_interpolated_string_node_append(interpolated, string);
19703 current = UP(interpolated);
19704 } else {
19705 assert(false && "unreachable");
19706 }
19707
19708 break;
19709 }
19710 case PM_TOKEN_EMBVAR: {
19711 if (current == NULL) {
19712 // If we hit an embedded variable and the current
19713 // node is NULL, then this is the start of a new
19714 // string. We'll set the current node to a new
19715 // interpolated string.
19716 pm_token_t opening = not_provided(parser);
19717 pm_token_t closing = not_provided(parser);
19718 current = UP(pm_interpolated_string_node_create(parser, &opening, NULL, &closing));
19719 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19720 // If we hit an embedded variable and the current
19721 // node is a string node, then we'll convert the
19722 // current into an interpolated string and add the
19723 // string node to the list of parts.
19724 pm_token_t opening = not_provided(parser);
19725 pm_token_t closing = not_provided(parser);
19726 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
19727 pm_interpolated_string_node_append(interpolated, current);
19728 current = UP(interpolated);
19729 } else {
19730 // If we hit an embedded variable and the current
19731 // node is an interpolated string, then we'll just
19732 // add the embedded variable.
19733 }
19734
19735 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19736 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
19737 break;
19738 }
19739 case PM_TOKEN_EMBEXPR_BEGIN: {
19740 if (current == NULL) {
19741 // If we hit an embedded expression and the current
19742 // node is NULL, then this is the start of a new
19743 // string. We'll set the current node to a new
19744 // interpolated string.
19745 pm_token_t opening = not_provided(parser);
19746 pm_token_t closing = not_provided(parser);
19747 current = UP(pm_interpolated_string_node_create(parser, &opening, NULL, &closing));
19748 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19749 // If we hit an embedded expression and the current
19750 // node is a string node, then we'll convert the
19751 // current into an interpolated string and add the
19752 // string node to the list of parts.
19753 pm_token_t opening = not_provided(parser);
19754 pm_token_t closing = not_provided(parser);
19755 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
19756 pm_interpolated_string_node_append(interpolated, current);
19757 current = UP(interpolated);
19758 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
19759 // If we hit an embedded expression and the current
19760 // node is an interpolated string, then we'll just
19761 // continue on.
19762 } else {
19763 assert(false && "unreachable");
19764 }
19765
19766 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19767 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
19768 break;
19769 }
19770 default:
19771 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
19772 parser_lex(parser);
19773 break;
19774 }
19775 }
19776
19777 // If we have a current node, then we need to append it to the list.
19778 if (current) {
19779 pm_array_node_elements_append(array, current);
19780 }
19781
19782 pm_token_t closing = parser->current;
19783 if (match1(parser, PM_TOKEN_EOF)) {
19784 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
19785 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19786 } else {
19787 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
19788 }
19789
19790 pm_array_node_close_set(array, &closing);
19791 return UP(array);
19792 }
19793 case PM_TOKEN_REGEXP_BEGIN: {
19794 pm_token_t opening = parser->current;
19795 parser_lex(parser);
19796
19797 if (match1(parser, PM_TOKEN_REGEXP_END)) {
19798 // If we get here, then we have an end immediately after a start. In
19799 // that case we'll create an empty content token and return an
19800 // uninterpolated regular expression.
19801 pm_token_t content = (pm_token_t) {
19802 .type = PM_TOKEN_STRING_CONTENT,
19803 .start = parser->previous.end,
19804 .end = parser->previous.end
19805 };
19806
19807 parser_lex(parser);
19808
19809 pm_node_t *node = UP(pm_regular_expression_node_create(parser, &opening, &content, &parser->previous));
19810 pm_node_flag_set(node, PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING);
19811
19812 return node;
19813 }
19814
19816
19817 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19818 // In this case we've hit string content so we know the regular
19819 // expression at least has something in it. We'll need to check if the
19820 // following token is the end (in which case we can return a plain
19821 // regular expression) or if it's not then it has interpolation.
19822 pm_string_t unescaped = parser->current_string;
19823 pm_token_t content = parser->current;
19824 bool ascii_only = parser->current_regular_expression_ascii_only;
19825 parser_lex(parser);
19826
19827 // If we hit an end, then we can create a regular expression
19828 // node without interpolation, which can be represented more
19829 // succinctly and more easily compiled.
19830 if (accept1(parser, PM_TOKEN_REGEXP_END)) {
19831 pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
19832
19833 // If we're not immediately followed by a =~, then we want
19834 // to parse all of the errors at this point. If it is
19835 // followed by a =~, then it will get parsed higher up while
19836 // parsing the named captures as well.
19837 if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
19838 parse_regular_expression_errors(parser, node);
19839 }
19840
19841 pm_node_flag_set(UP(node), parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, FL(node)));
19842 return UP(node);
19843 }
19844
19845 // If we get here, then we have interpolation so we'll need to create
19846 // a regular expression node with interpolation.
19847 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
19848
19849 pm_token_t opening = not_provided(parser);
19850 pm_token_t closing = not_provided(parser);
19851 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped));
19852
19853 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
19854 // This is extremely strange, but the first string part of a
19855 // regular expression will always be tagged as binary if we
19856 // are in a US-ASCII file, no matter its contents.
19857 pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
19858 }
19859
19860 pm_interpolated_regular_expression_node_append(interpolated, part);
19861 } else {
19862 // If the first part of the body of the regular expression is not a
19863 // string content, then we have interpolation and we need to create an
19864 // interpolated regular expression node.
19865 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
19866 }
19867
19868 // Now that we're here and we have interpolation, we'll parse all of the
19869 // parts into the list.
19870 pm_node_t *part;
19871 while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
19872 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
19873 pm_interpolated_regular_expression_node_append(interpolated, part);
19874 }
19875 }
19876
19877 pm_token_t closing = parser->current;
19878 if (match1(parser, PM_TOKEN_EOF)) {
19879 pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
19880 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19881 } else {
19882 expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
19883 }
19884
19885 pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
19886 return UP(interpolated);
19887 }
19888 case PM_TOKEN_BACKTICK:
19889 case PM_TOKEN_PERCENT_LOWER_X: {
19890 parser_lex(parser);
19891 pm_token_t opening = parser->previous;
19892
19893 // When we get here, we don't know if this string is going to have
19894 // interpolation or not, even though it is allowed. Still, we want to be
19895 // able to return a string node without interpolation if we can since
19896 // it'll be faster.
19897 if (match1(parser, PM_TOKEN_STRING_END)) {
19898 // If we get here, then we have an end immediately after a start. In
19899 // that case we'll create an empty content token and return an
19900 // uninterpolated string.
19901 pm_token_t content = (pm_token_t) {
19902 .type = PM_TOKEN_STRING_CONTENT,
19903 .start = parser->previous.end,
19904 .end = parser->previous.end
19905 };
19906
19907 parser_lex(parser);
19908 return UP(pm_xstring_node_create(parser, &opening, &content, &parser->previous));
19909 }
19910
19912
19913 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19914 // In this case we've hit string content so we know the string
19915 // at least has something in it. We'll need to check if the
19916 // following token is the end (in which case we can return a
19917 // plain string) or if it's not then it has interpolation.
19918 pm_string_t unescaped = parser->current_string;
19919 pm_token_t content = parser->current;
19920 parser_lex(parser);
19921
19922 if (match1(parser, PM_TOKEN_STRING_END)) {
19923 pm_node_t *node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
19924 pm_node_flag_set(node, parse_unescaped_encoding(parser));
19925 parser_lex(parser);
19926 return node;
19927 }
19928
19929 // If we get here, then we have interpolation so we'll need to
19930 // create a string node with interpolation.
19931 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
19932
19933 pm_token_t opening = not_provided(parser);
19934 pm_token_t closing = not_provided(parser);
19935
19936 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped));
19937 pm_node_flag_set(part, parse_unescaped_encoding(parser));
19938
19939 pm_interpolated_xstring_node_append(node, part);
19940 } else {
19941 // If the first part of the body of the string is not a string
19942 // content, then we have interpolation and we need to create an
19943 // interpolated string node.
19944 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
19945 }
19946
19947 pm_node_t *part;
19948 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19949 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
19950 pm_interpolated_xstring_node_append(node, part);
19951 }
19952 }
19953
19954 pm_token_t closing = parser->current;
19955 if (match1(parser, PM_TOKEN_EOF)) {
19956 pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
19957 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19958 } else {
19959 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
19960 }
19961 pm_interpolated_xstring_node_closing_set(node, &closing);
19962
19963 return UP(node);
19964 }
19965 case PM_TOKEN_USTAR: {
19966 parser_lex(parser);
19967
19968 // * operators at the beginning of expressions are only valid in the
19969 // context of a multiple assignment. We enforce that here. We'll
19970 // still lex past it though and create a missing node place.
19971 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19972 pm_parser_err_prefix(parser, diag_id);
19973 return UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
19974 }
19975
19976 pm_token_t operator = parser->previous;
19977 pm_node_t *name = NULL;
19978
19979 if (token_begins_expression_p(parser->current.type)) {
19980 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19981 }
19982
19983 pm_node_t *splat = UP(pm_splat_node_create(parser, &operator, name));
19984
19985 if (match1(parser, PM_TOKEN_COMMA)) {
19986 return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19987 } else {
19988 return parse_target_validate(parser, splat, true);
19989 }
19990 }
19991 case PM_TOKEN_BANG: {
19992 if (binding_power > PM_BINDING_POWER_UNARY) {
19993 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
19994 }
19995
19996 parser_lex(parser);
19997
19998 pm_token_t operator = parser->previous;
19999 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20000 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
20001
20002 pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
20003 return UP(node);
20004 }
20005 case PM_TOKEN_TILDE: {
20006 if (binding_power > PM_BINDING_POWER_UNARY) {
20007 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20008 }
20009 parser_lex(parser);
20010
20011 pm_token_t operator = parser->previous;
20012 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20013 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
20014
20015 return UP(node);
20016 }
20017 case PM_TOKEN_UMINUS: {
20018 if (binding_power > PM_BINDING_POWER_UNARY) {
20019 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20020 }
20021 parser_lex(parser);
20022
20023 pm_token_t operator = parser->previous;
20024 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20025 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
20026
20027 return UP(node);
20028 }
20029 case PM_TOKEN_UMINUS_NUM: {
20030 parser_lex(parser);
20031
20032 pm_token_t operator = parser->previous;
20033 pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20034
20035 if (accept1(parser, PM_TOKEN_STAR_STAR)) {
20036 pm_token_t exponent_operator = parser->previous;
20037 pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
20038 node = UP(pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0));
20039 node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
20040 } else {
20041 switch (PM_NODE_TYPE(node)) {
20042 case PM_INTEGER_NODE:
20043 case PM_FLOAT_NODE:
20044 case PM_RATIONAL_NODE:
20045 case PM_IMAGINARY_NODE:
20046 parse_negative_numeric(node);
20047 break;
20048 default:
20049 node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
20050 break;
20051 }
20052 }
20053
20054 return node;
20055 }
20056 case PM_TOKEN_MINUS_GREATER: {
20057 int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting;
20059
20060 size_t opening_newline_index = token_newline_index(parser);
20061 pm_accepts_block_stack_push(parser, true);
20062 parser_lex(parser);
20063
20064 pm_token_t operator = parser->previous;
20065 pm_parser_scope_push(parser, false);
20066
20067 pm_block_parameters_node_t *block_parameters;
20068
20069 switch (parser->current.type) {
20070 case PM_TOKEN_PARENTHESIS_LEFT: {
20071 pm_token_t opening = parser->current;
20072 parser_lex(parser);
20073
20074 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
20075 block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
20076 } else {
20077 block_parameters = parse_block_parameters(parser, false, &opening, true, true, (uint16_t) (depth + 1));
20078 }
20079
20080 accept1(parser, PM_TOKEN_NEWLINE);
20081 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
20082
20083 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
20084 break;
20085 }
20086 case PM_CASE_PARAMETER: {
20087 pm_accepts_block_stack_push(parser, false);
20088 pm_token_t opening = not_provided(parser);
20089 block_parameters = parse_block_parameters(parser, false, &opening, true, false, (uint16_t) (depth + 1));
20090 pm_accepts_block_stack_pop(parser);
20091 break;
20092 }
20093 default: {
20094 block_parameters = NULL;
20095 break;
20096 }
20097 }
20098
20099 pm_token_t opening;
20100 pm_node_t *body = NULL;
20101 parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
20102
20103 if (accept1(parser, PM_TOKEN_LAMBDA_BEGIN)) {
20104 opening = parser->previous;
20105
20106 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
20107 body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1)));
20108 }
20109
20110 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20111 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE, &opening);
20112 } else {
20113 expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
20114 opening = parser->previous;
20115
20116 if (!match3(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20117 pm_accepts_block_stack_push(parser, true);
20118 body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1)));
20119 pm_accepts_block_stack_pop(parser);
20120 }
20121
20122 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20123 assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
20124 body = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1)));
20125 } else {
20126 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20127 }
20128
20129 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END, &operator);
20130 }
20131
20132 pm_constant_id_list_t locals;
20133 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
20134 pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &operator, &parser->previous);
20135
20136 pm_parser_scope_pop(parser);
20137 pm_accepts_block_stack_pop(parser);
20138
20139 return UP(pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body));
20140 }
20141 case PM_TOKEN_UPLUS: {
20142 if (binding_power > PM_BINDING_POWER_UNARY) {
20143 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20144 }
20145 parser_lex(parser);
20146
20147 pm_token_t operator = parser->previous;
20148 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20149 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
20150
20151 return UP(node);
20152 }
20153 case PM_TOKEN_STRING_BEGIN:
20154 return parse_strings(parser, NULL, accepts_label, (uint16_t) (depth + 1));
20155 case PM_TOKEN_SYMBOL_BEGIN: {
20156 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
20157 parser_lex(parser);
20158
20159 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END, (uint16_t) (depth + 1));
20160 }
20161 default: {
20162 pm_context_t recoverable = context_recoverable(parser, &parser->current);
20163
20164 if (recoverable != PM_CONTEXT_NONE) {
20165 parser->recovering = true;
20166
20167 // If the given error is not the generic one, then we'll add it
20168 // here because it will provide more context in addition to the
20169 // recoverable error that we will also add.
20170 if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
20171 pm_parser_err_prefix(parser, diag_id);
20172 }
20173
20174 // If we get here, then we are assuming this token is closing a
20175 // parent context, so we'll indicate that to the user so that
20176 // they know how we behaved.
20177 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
20178 } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
20179 // We're going to make a special case here, because "cannot
20180 // parse expression" is pretty generic, and we know here that we
20181 // have an unexpected token.
20182 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
20183 } else {
20184 pm_parser_err_prefix(parser, diag_id);
20185 }
20186
20187 return UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
20188 }
20189 }
20190}
20191
20201static pm_node_t *
20202parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20203 pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, false, diag_id, (uint16_t) (depth + 1));
20204
20205 // Contradicting binding powers, the right-hand-side value of the assignment
20206 // allows the `rescue` modifier.
20207 if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20208 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20209
20210 pm_token_t rescue = parser->current;
20211 parser_lex(parser);
20212
20213 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20214 context_pop(parser);
20215
20216 return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
20217 }
20218
20219 return value;
20220}
20221
20226static void
20227parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
20228 switch (PM_NODE_TYPE(node)) {
20229 case PM_BEGIN_NODE: {
20230 const pm_begin_node_t *cast = (const pm_begin_node_t *) node;
20231 if (cast->statements != NULL) parse_assignment_value_local(parser, (const pm_node_t *) cast->statements);
20232 break;
20233 }
20234 case PM_LOCAL_VARIABLE_WRITE_NODE: {
20236 pm_locals_read(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
20237 break;
20238 }
20239 case PM_PARENTHESES_NODE: {
20240 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
20241 if (cast->body != NULL) parse_assignment_value_local(parser, cast->body);
20242 break;
20243 }
20244 case PM_STATEMENTS_NODE: {
20245 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
20246 const pm_node_t *statement;
20247
20248 PM_NODE_LIST_FOREACH(&cast->body, index, statement) {
20249 parse_assignment_value_local(parser, statement);
20250 }
20251 break;
20252 }
20253 default:
20254 break;
20255 }
20256}
20257
20270static pm_node_t *
20271parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20272 bool permitted = true;
20273 if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
20274
20275 pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MODIFIER, diag_id, (uint16_t) (depth + 1));
20276 if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
20277
20278 parse_assignment_value_local(parser, value);
20279 bool single_value = true;
20280
20281 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
20282 single_value = false;
20283
20284 pm_token_t opening = not_provided(parser);
20285 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20286
20287 pm_array_node_elements_append(array, value);
20288 value = UP(array);
20289
20290 while (accept1(parser, PM_TOKEN_COMMA)) {
20291 pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
20292
20293 pm_array_node_elements_append(array, element);
20294 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
20295
20296 parse_assignment_value_local(parser, element);
20297 }
20298 }
20299
20300 // Contradicting binding powers, the right-hand-side value of the assignment
20301 // allows the `rescue` modifier.
20302 if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20303 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20304
20305 pm_token_t rescue = parser->current;
20306 parser_lex(parser);
20307
20308 bool accepts_command_call_inner = false;
20309
20310 // RHS can accept command call iff the value is a call with arguments
20311 // but without parenthesis.
20312 if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
20313 pm_call_node_t *call_node = (pm_call_node_t *) value;
20314 if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) {
20315 accepts_command_call_inner = true;
20316 }
20317 }
20318
20319 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20320 context_pop(parser);
20321
20322 return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
20323 }
20324
20325 return value;
20326}
20327
20335static void
20336parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
20337 if (call_node->arguments != NULL) {
20338 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
20339 pm_node_unreference(parser, UP(call_node->arguments));
20340 pm_node_destroy(parser, UP(call_node->arguments));
20341 call_node->arguments = NULL;
20342 }
20343
20344 if (call_node->block != NULL) {
20345 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
20346 pm_node_unreference(parser, UP(call_node->block));
20347 pm_node_destroy(parser, UP(call_node->block));
20348 call_node->block = NULL;
20349 }
20350}
20351
20376
20377static inline const uint8_t *
20378pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20379 cursor++;
20380
20381 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20382 uint8_t value = escape_hexadecimal_digit(*cursor);
20383 cursor++;
20384
20385 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20386 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(*cursor));
20387 cursor++;
20388 }
20389
20390 pm_buffer_append_byte(unescaped, value);
20391 } else {
20392 pm_buffer_append_string(unescaped, "\\x", 2);
20393 }
20394
20395 return cursor;
20396}
20397
20398static inline const uint8_t *
20399pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20400 uint8_t value = (uint8_t) (*cursor - '0');
20401 cursor++;
20402
20403 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20404 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20405 cursor++;
20406
20407 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20408 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20409 cursor++;
20410 }
20411 }
20412
20413 pm_buffer_append_byte(unescaped, value);
20414 return cursor;
20415}
20416
20417static inline const uint8_t *
20418pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end, const pm_location_t *error_location) {
20419 const uint8_t *start = cursor - 1;
20420 cursor++;
20421
20422 if (cursor >= end) {
20423 pm_buffer_append_string(unescaped, "\\u", 2);
20424 return cursor;
20425 }
20426
20427 if (*cursor != '{') {
20428 size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
20429 uint32_t value = escape_unicode(parser, cursor, length, error_location);
20430
20431 if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
20432 pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start));
20433 }
20434
20435 return cursor + length;
20436 }
20437
20438 cursor++;
20439 for (;;) {
20440 while (cursor < end && *cursor == ' ') cursor++;
20441
20442 if (cursor >= end) break;
20443 if (*cursor == '}') {
20444 cursor++;
20445 break;
20446 }
20447
20448 size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor);
20449 if (length == 0) {
20450 break;
20451 }
20452 uint32_t value = escape_unicode(parser, cursor, length, error_location);
20453
20454 (void) pm_buffer_append_unicode_codepoint(unescaped, value);
20455 cursor += length;
20456 }
20457
20458 return cursor;
20459}
20460
20461static void
20462pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor, const pm_location_t *error_location) {
20463 const uint8_t *end = source + length;
20464 pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source));
20465
20466 for (;;) {
20467 if (++cursor >= end) {
20468 pm_buffer_append_byte(unescaped, '\\');
20469 return;
20470 }
20471
20472 switch (*cursor) {
20473 case 'x':
20474 cursor = pm_named_capture_escape_hex(unescaped, cursor, end);
20475 break;
20476 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
20477 cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
20478 break;
20479 case 'u':
20480 cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end, error_location);
20481 break;
20482 default:
20483 pm_buffer_append_byte(unescaped, '\\');
20484 break;
20485 }
20486
20487 const uint8_t *next_cursor = pm_memchr(cursor, '\\', (size_t) (end - cursor), parser->encoding_changed, parser->encoding);
20488 if (next_cursor == NULL) break;
20489
20490 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (next_cursor - cursor));
20491 cursor = next_cursor;
20492 }
20493
20494 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (end - cursor));
20495}
20496
20501static void
20502parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
20504
20505 pm_parser_t *parser = callback_data->parser;
20506 pm_call_node_t *call = callback_data->call;
20507 pm_constant_id_list_t *names = &callback_data->names;
20508
20509 const uint8_t *source = pm_string_source(capture);
20510 size_t length = pm_string_length(capture);
20511 pm_buffer_t unescaped = { 0 };
20512
20513 // First, we need to handle escapes within the name of the capture group.
20514 // This is because regular expressions have three different representations
20515 // in prism. The first is the plain source code. The second is the
20516 // representation that will be sent to the regular expression engine, which
20517 // is the value of the "unescaped" field. This is poorly named, because it
20518 // actually still contains escapes, just a subset of them that the regular
20519 // expression engine knows how to handle. The third representation is fully
20520 // unescaped, which is what we need.
20521 const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding);
20522 if (PRISM_UNLIKELY(cursor != NULL)) {
20523 pm_named_capture_escape(parser, &unescaped, source, length, cursor, callback_data->shared ? NULL : &call->receiver->location);
20524 source = (const uint8_t *) pm_buffer_value(&unescaped);
20525 length = pm_buffer_length(&unescaped);
20526 }
20527
20528 pm_location_t location;
20529 pm_constant_id_t name;
20530
20531 // If the name of the capture group isn't a valid identifier, we do
20532 // not add it to the local table.
20533 if (!pm_slice_is_valid_local(parser, source, source + length)) {
20534 pm_buffer_free(&unescaped);
20535 return;
20536 }
20537
20538 if (callback_data->shared) {
20539 // If the unescaped string is a slice of the source, then we can
20540 // copy the names directly. The pointers will line up.
20541 location = (pm_location_t) { .start = source, .end = source + length };
20542 name = pm_parser_constant_id_location(parser, location.start, location.end);
20543 } else {
20544 // Otherwise, the name is a slice of the malloc-ed owned string,
20545 // in which case we need to copy it out into a new string.
20546 location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
20547
20548 void *memory = xmalloc(length);
20549 if (memory == NULL) abort();
20550
20551 memcpy(memory, source, length);
20552 name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
20553 }
20554
20555 // Add this name to the list of constants if it is valid, not duplicated,
20556 // and not a keyword.
20557 if (name != 0 && !pm_constant_id_list_includes(names, name)) {
20558 pm_constant_id_list_append(names, name);
20559
20560 int depth;
20561 if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
20562 // If the local is not already a local but it is a keyword, then we
20563 // do not want to add a capture for this.
20564 if (pm_local_is_keyword((const char *) source, length)) {
20565 pm_buffer_free(&unescaped);
20566 return;
20567 }
20568
20569 // If the identifier is not already a local, then we will add it to
20570 // the local table.
20571 pm_parser_local_add(parser, name, location.start, location.end, 0);
20572 }
20573
20574 // Here we lazily create the MatchWriteNode since we know we're
20575 // about to add a target.
20576 if (callback_data->match == NULL) {
20577 callback_data->match = pm_match_write_node_create(parser, call);
20578 }
20579
20580 // Next, create the local variable target and add it to the list of
20581 // targets for the match.
20582 pm_node_t *target = UP(pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth));
20583 pm_node_list_append(&callback_data->match->targets, target);
20584 }
20585
20586 pm_buffer_free(&unescaped);
20587}
20588
20593static pm_node_t *
20594parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
20596 .parser = parser,
20597 .call = call,
20598 .names = { 0 },
20599 .shared = content->type == PM_STRING_SHARED
20600 };
20601
20603 .parser = parser,
20604 .start = call->receiver->location.start,
20605 .end = call->receiver->location.end,
20606 .shared = content->type == PM_STRING_SHARED
20607 };
20608
20609 pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), extended_mode, parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
20610 pm_constant_id_list_free(&callback_data.names);
20611
20612 if (callback_data.match != NULL) {
20613 return UP(callback_data.match);
20614 } else {
20615 return UP(call);
20616 }
20617}
20618
20619static inline pm_node_t *
20620parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, uint16_t depth) {
20621 pm_token_t token = parser->current;
20622
20623 switch (token.type) {
20624 case PM_TOKEN_EQUAL: {
20625 switch (PM_NODE_TYPE(node)) {
20626 case PM_CALL_NODE: {
20627 // If we have no arguments to the call node and we need this
20628 // to be a target then this is either a method call or a
20629 // local variable write. This _must_ happen before the value
20630 // is parsed because it could be referenced in the value.
20631 pm_call_node_t *call_node = (pm_call_node_t *) node;
20632 if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20633 pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
20634 }
20635 }
20637 case PM_CASE_WRITABLE: {
20638 // When we have `it = value`, we need to add `it` as a local
20639 // variable before parsing the value, in case the value
20640 // references the variable.
20641 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
20642 pm_parser_local_add_location(parser, node->location.start, node->location.end, 0);
20643 }
20644
20645 parser_lex(parser);
20646 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20647
20648 if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
20649 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
20650 }
20651
20652 return parse_write(parser, node, &token, value);
20653 }
20654 case PM_SPLAT_NODE: {
20655 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
20656 pm_multi_target_node_targets_append(parser, multi_target, node);
20657
20658 parser_lex(parser);
20659 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20660 return parse_write(parser, UP(multi_target), &token, value);
20661 }
20662 case PM_SOURCE_ENCODING_NODE:
20663 case PM_FALSE_NODE:
20664 case PM_SOURCE_FILE_NODE:
20665 case PM_SOURCE_LINE_NODE:
20666 case PM_NIL_NODE:
20667 case PM_SELF_NODE:
20668 case PM_TRUE_NODE: {
20669 // In these special cases, we have specific error messages
20670 // and we will replace them with local variable writes.
20671 parser_lex(parser);
20672 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20673 return parse_unwriteable_write(parser, node, &token, value);
20674 }
20675 default:
20676 // In this case we have an = sign, but we don't know what
20677 // it's for. We need to treat it as an error. We'll mark it
20678 // as an error and skip past it.
20679 parser_lex(parser);
20680 pm_parser_err_token(parser, &token, PM_ERR_EXPRESSION_NOT_WRITABLE);
20681 return node;
20682 }
20683 }
20684 case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL: {
20685 switch (PM_NODE_TYPE(node)) {
20686 case PM_BACK_REFERENCE_READ_NODE:
20687 case PM_NUMBERED_REFERENCE_READ_NODE:
20688 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20690 case PM_GLOBAL_VARIABLE_READ_NODE: {
20691 parser_lex(parser);
20692
20693 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20694 pm_node_t *result = UP(pm_global_variable_and_write_node_create(parser, node, &token, value));
20695
20696 pm_node_destroy(parser, node);
20697 return result;
20698 }
20699 case PM_CLASS_VARIABLE_READ_NODE: {
20700 parser_lex(parser);
20701
20702 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20703 pm_node_t *result = UP(pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
20704
20705 pm_node_destroy(parser, node);
20706 return result;
20707 }
20708 case PM_CONSTANT_PATH_NODE: {
20709 parser_lex(parser);
20710
20711 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20712 pm_node_t *write = UP(pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
20713
20714 return parse_shareable_constant_write(parser, write);
20715 }
20716 case PM_CONSTANT_READ_NODE: {
20717 parser_lex(parser);
20718
20719 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20720 pm_node_t *write = UP(pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
20721
20722 pm_node_destroy(parser, node);
20723 return parse_shareable_constant_write(parser, write);
20724 }
20725 case PM_INSTANCE_VARIABLE_READ_NODE: {
20726 parser_lex(parser);
20727
20728 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20729 pm_node_t *result = UP(pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
20730
20731 pm_node_destroy(parser, node);
20732 return result;
20733 }
20734 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
20735 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
20736 parser_lex(parser);
20737
20738 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20739 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0));
20740
20741 pm_node_unreference(parser, node);
20742 pm_node_destroy(parser, node);
20743 return result;
20744 }
20745 case PM_LOCAL_VARIABLE_READ_NODE: {
20746 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
20747 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
20748 pm_node_unreference(parser, node);
20749 }
20750
20752 parser_lex(parser);
20753
20754 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20755 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth));
20756
20757 pm_node_destroy(parser, node);
20758 return result;
20759 }
20760 case PM_CALL_NODE: {
20761 pm_call_node_t *cast = (pm_call_node_t *) node;
20762
20763 // If we have a vcall (a method with no arguments and no
20764 // receiver that could have been a local variable) then we
20765 // will transform it into a local variable write.
20766 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20767 pm_location_t *message_loc = &cast->message_loc;
20768 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
20769
20770 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
20771 parser_lex(parser);
20772
20773 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20774 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
20775
20776 pm_node_destroy(parser, UP(cast));
20777 return result;
20778 }
20779
20780 // Move past the token here so that we have already added
20781 // the local variable by this point.
20782 parser_lex(parser);
20783
20784 // If there is no call operator and the message is "[]" then
20785 // this is an aref expression, and we can transform it into
20786 // an aset expression.
20787 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
20788 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20789 return UP(pm_index_and_write_node_create(parser, cast, &token, value));
20790 }
20791
20792 // If this node cannot be writable, then we have an error.
20793 if (pm_call_node_writable_p(parser, cast)) {
20794 parse_write_name(parser, &cast->name);
20795 } else {
20796 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
20797 }
20798
20799 parse_call_operator_write(parser, cast, &token);
20800 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20801 return UP(pm_call_and_write_node_create(parser, cast, &token, value));
20802 }
20803 case PM_MULTI_WRITE_NODE: {
20804 parser_lex(parser);
20805 pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
20806 return node;
20807 }
20808 default:
20809 parser_lex(parser);
20810
20811 // In this case we have an &&= sign, but we don't know what it's for.
20812 // We need to treat it as an error. For now, we'll mark it as an error
20813 // and just skip right past it.
20814 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
20815 return node;
20816 }
20817 }
20818 case PM_TOKEN_PIPE_PIPE_EQUAL: {
20819 switch (PM_NODE_TYPE(node)) {
20820 case PM_BACK_REFERENCE_READ_NODE:
20821 case PM_NUMBERED_REFERENCE_READ_NODE:
20822 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20824 case PM_GLOBAL_VARIABLE_READ_NODE: {
20825 parser_lex(parser);
20826
20827 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20828 pm_node_t *result = UP(pm_global_variable_or_write_node_create(parser, node, &token, value));
20829
20830 pm_node_destroy(parser, node);
20831 return result;
20832 }
20833 case PM_CLASS_VARIABLE_READ_NODE: {
20834 parser_lex(parser);
20835
20836 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20837 pm_node_t *result = UP(pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
20838
20839 pm_node_destroy(parser, node);
20840 return result;
20841 }
20842 case PM_CONSTANT_PATH_NODE: {
20843 parser_lex(parser);
20844
20845 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20846 pm_node_t *write = UP(pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
20847
20848 return parse_shareable_constant_write(parser, write);
20849 }
20850 case PM_CONSTANT_READ_NODE: {
20851 parser_lex(parser);
20852
20853 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20854 pm_node_t *write = UP(pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
20855
20856 pm_node_destroy(parser, node);
20857 return parse_shareable_constant_write(parser, write);
20858 }
20859 case PM_INSTANCE_VARIABLE_READ_NODE: {
20860 parser_lex(parser);
20861
20862 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20863 pm_node_t *result = UP(pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
20864
20865 pm_node_destroy(parser, node);
20866 return result;
20867 }
20868 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
20869 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
20870 parser_lex(parser);
20871
20872 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20873 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0));
20874
20875 pm_node_unreference(parser, node);
20876 pm_node_destroy(parser, node);
20877 return result;
20878 }
20879 case PM_LOCAL_VARIABLE_READ_NODE: {
20880 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
20881 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
20882 pm_node_unreference(parser, node);
20883 }
20884
20886 parser_lex(parser);
20887
20888 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20889 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth));
20890
20891 pm_node_destroy(parser, node);
20892 return result;
20893 }
20894 case PM_CALL_NODE: {
20895 pm_call_node_t *cast = (pm_call_node_t *) node;
20896
20897 // If we have a vcall (a method with no arguments and no
20898 // receiver that could have been a local variable) then we
20899 // will transform it into a local variable write.
20900 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20901 pm_location_t *message_loc = &cast->message_loc;
20902 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
20903
20904 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
20905 parser_lex(parser);
20906
20907 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20908 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
20909
20910 pm_node_destroy(parser, UP(cast));
20911 return result;
20912 }
20913
20914 // Move past the token here so that we have already added
20915 // the local variable by this point.
20916 parser_lex(parser);
20917
20918 // If there is no call operator and the message is "[]" then
20919 // this is an aref expression, and we can transform it into
20920 // an aset expression.
20921 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
20922 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20923 return UP(pm_index_or_write_node_create(parser, cast, &token, value));
20924 }
20925
20926 // If this node cannot be writable, then we have an error.
20927 if (pm_call_node_writable_p(parser, cast)) {
20928 parse_write_name(parser, &cast->name);
20929 } else {
20930 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
20931 }
20932
20933 parse_call_operator_write(parser, cast, &token);
20934 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20935 return UP(pm_call_or_write_node_create(parser, cast, &token, value));
20936 }
20937 case PM_MULTI_WRITE_NODE: {
20938 parser_lex(parser);
20939 pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
20940 return node;
20941 }
20942 default:
20943 parser_lex(parser);
20944
20945 // In this case we have an ||= sign, but we don't know what it's for.
20946 // We need to treat it as an error. For now, we'll mark it as an error
20947 // and just skip right past it.
20948 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
20949 return node;
20950 }
20951 }
20952 case PM_TOKEN_AMPERSAND_EQUAL:
20953 case PM_TOKEN_CARET_EQUAL:
20954 case PM_TOKEN_GREATER_GREATER_EQUAL:
20955 case PM_TOKEN_LESS_LESS_EQUAL:
20956 case PM_TOKEN_MINUS_EQUAL:
20957 case PM_TOKEN_PERCENT_EQUAL:
20958 case PM_TOKEN_PIPE_EQUAL:
20959 case PM_TOKEN_PLUS_EQUAL:
20960 case PM_TOKEN_SLASH_EQUAL:
20961 case PM_TOKEN_STAR_EQUAL:
20962 case PM_TOKEN_STAR_STAR_EQUAL: {
20963 switch (PM_NODE_TYPE(node)) {
20964 case PM_BACK_REFERENCE_READ_NODE:
20965 case PM_NUMBERED_REFERENCE_READ_NODE:
20966 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20968 case PM_GLOBAL_VARIABLE_READ_NODE: {
20969 parser_lex(parser);
20970
20971 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20972 pm_node_t *result = UP(pm_global_variable_operator_write_node_create(parser, node, &token, value));
20973
20974 pm_node_destroy(parser, node);
20975 return result;
20976 }
20977 case PM_CLASS_VARIABLE_READ_NODE: {
20978 parser_lex(parser);
20979
20980 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20981 pm_node_t *result = UP(pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
20982
20983 pm_node_destroy(parser, node);
20984 return result;
20985 }
20986 case PM_CONSTANT_PATH_NODE: {
20987 parser_lex(parser);
20988
20989 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20990 pm_node_t *write = UP(pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
20991
20992 return parse_shareable_constant_write(parser, write);
20993 }
20994 case PM_CONSTANT_READ_NODE: {
20995 parser_lex(parser);
20996
20997 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20998 pm_node_t *write = UP(pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
20999
21000 pm_node_destroy(parser, node);
21001 return parse_shareable_constant_write(parser, write);
21002 }
21003 case PM_INSTANCE_VARIABLE_READ_NODE: {
21004 parser_lex(parser);
21005
21006 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21007 pm_node_t *result = UP(pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
21008
21009 pm_node_destroy(parser, node);
21010 return result;
21011 }
21012 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21013 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21014 parser_lex(parser);
21015
21016 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21017 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0));
21018
21019 pm_node_unreference(parser, node);
21020 pm_node_destroy(parser, node);
21021 return result;
21022 }
21023 case PM_LOCAL_VARIABLE_READ_NODE: {
21024 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21025 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21026 pm_node_unreference(parser, node);
21027 }
21028
21030 parser_lex(parser);
21031
21032 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21033 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth));
21034
21035 pm_node_destroy(parser, node);
21036 return result;
21037 }
21038 case PM_CALL_NODE: {
21039 parser_lex(parser);
21040 pm_call_node_t *cast = (pm_call_node_t *) node;
21041
21042 // If we have a vcall (a method with no arguments and no
21043 // receiver that could have been a local variable) then we
21044 // will transform it into a local variable write.
21045 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21046 pm_location_t *message_loc = &cast->message_loc;
21047 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21048
21049 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21050 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21051 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
21052
21053 pm_node_destroy(parser, UP(cast));
21054 return result;
21055 }
21056
21057 // If there is no call operator and the message is "[]" then
21058 // this is an aref expression, and we can transform it into
21059 // an aset expression.
21060 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21061 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21062 return UP(pm_index_operator_write_node_create(parser, cast, &token, value));
21063 }
21064
21065 // If this node cannot be writable, then we have an error.
21066 if (pm_call_node_writable_p(parser, cast)) {
21067 parse_write_name(parser, &cast->name);
21068 } else {
21069 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21070 }
21071
21072 parse_call_operator_write(parser, cast, &token);
21073 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21074 return UP(pm_call_operator_write_node_create(parser, cast, &token, value));
21075 }
21076 case PM_MULTI_WRITE_NODE: {
21077 parser_lex(parser);
21078 pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
21079 return node;
21080 }
21081 default:
21082 parser_lex(parser);
21083
21084 // In this case we have an operator but we don't know what it's for.
21085 // We need to treat it as an error. For now, we'll mark it as an error
21086 // and just skip right past it.
21087 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
21088 return node;
21089 }
21090 }
21091 case PM_TOKEN_AMPERSAND_AMPERSAND:
21092 case PM_TOKEN_KEYWORD_AND: {
21093 parser_lex(parser);
21094
21095 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21096 return UP(pm_and_node_create(parser, node, &token, right));
21097 }
21098 case PM_TOKEN_KEYWORD_OR:
21099 case PM_TOKEN_PIPE_PIPE: {
21100 parser_lex(parser);
21101
21102 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21103 return UP(pm_or_node_create(parser, node, &token, right));
21104 }
21105 case PM_TOKEN_EQUAL_TILDE: {
21106 // Note that we _must_ parse the value before adding the local
21107 // variables in order to properly mirror the behavior of Ruby. For
21108 // example,
21109 //
21110 // /(?<foo>bar)/ =~ foo
21111 //
21112 // In this case, `foo` should be a method call and not a local yet.
21113 parser_lex(parser);
21114 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21115
21116 // By default, we're going to create a call node and then return it.
21117 pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
21118 pm_node_t *result = UP(call);
21119
21120 // If the receiver of this =~ is a regular expression node, then we
21121 // need to introduce local variables for it based on its named
21122 // capture groups.
21123 if (PM_NODE_TYPE_P(node, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE)) {
21124 // It's possible to have an interpolated regular expression node
21125 // that only contains strings. This is because it can be split
21126 // up by a heredoc. In this case we need to concat the unescaped
21127 // strings together and then parse them as a regular expression.
21129
21130 bool interpolated = false;
21131 size_t total_length = 0;
21132
21133 pm_node_t *part;
21134 PM_NODE_LIST_FOREACH(parts, index, part) {
21135 if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
21136 total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
21137 } else {
21138 interpolated = true;
21139 break;
21140 }
21141 }
21142
21143 if (!interpolated && total_length > 0) {
21144 void *memory = xmalloc(total_length);
21145 if (!memory) abort();
21146
21147 uint8_t *cursor = memory;
21148 PM_NODE_LIST_FOREACH(parts, index, part) {
21149 pm_string_t *unescaped = &((pm_string_node_t *) part)->unescaped;
21150 size_t length = pm_string_length(unescaped);
21151
21152 memcpy(cursor, pm_string_source(unescaped), length);
21153 cursor += length;
21154 }
21155
21156 pm_string_t owned;
21157 pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
21158
21159 result = parse_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21160 pm_string_free(&owned);
21161 }
21162 } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
21163 // If we have a regular expression node, then we can just parse
21164 // the named captures directly off the unescaped string.
21165 const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
21166 result = parse_regular_expression_named_captures(parser, content, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21167 }
21168
21169 return result;
21170 }
21171 case PM_TOKEN_UAMPERSAND:
21172 case PM_TOKEN_USTAR:
21173 case PM_TOKEN_USTAR_STAR:
21174 // The only times this will occur are when we are in an error state,
21175 // but we'll put them in here so that errors can propagate.
21176 case PM_TOKEN_BANG_EQUAL:
21177 case PM_TOKEN_BANG_TILDE:
21178 case PM_TOKEN_EQUAL_EQUAL:
21179 case PM_TOKEN_EQUAL_EQUAL_EQUAL:
21180 case PM_TOKEN_LESS_EQUAL_GREATER:
21181 case PM_TOKEN_CARET:
21182 case PM_TOKEN_PIPE:
21183 case PM_TOKEN_AMPERSAND:
21184 case PM_TOKEN_GREATER_GREATER:
21185 case PM_TOKEN_LESS_LESS:
21186 case PM_TOKEN_MINUS:
21187 case PM_TOKEN_PLUS:
21188 case PM_TOKEN_PERCENT:
21189 case PM_TOKEN_SLASH:
21190 case PM_TOKEN_STAR:
21191 case PM_TOKEN_STAR_STAR: {
21192 parser_lex(parser);
21193 pm_token_t operator = parser->previous;
21194 switch (PM_NODE_TYPE(node)) {
21195 case PM_RESCUE_MODIFIER_NODE: {
21197 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21198 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21199 }
21200 break;
21201 }
21202 case PM_AND_NODE: {
21203 pm_and_node_t *cast = (pm_and_node_t *) node;
21204 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21205 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21206 }
21207 break;
21208 }
21209 case PM_OR_NODE: {
21210 pm_or_node_t *cast = (pm_or_node_t *) node;
21211 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21212 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21213 }
21214 break;
21215 }
21216 default:
21217 break;
21218 }
21219
21220 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21221 return UP(pm_call_node_binary_create(parser, node, &token, argument, 0));
21222 }
21223 case PM_TOKEN_GREATER:
21224 case PM_TOKEN_GREATER_EQUAL:
21225 case PM_TOKEN_LESS:
21226 case PM_TOKEN_LESS_EQUAL: {
21227 if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
21228 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
21229 }
21230
21231 parser_lex(parser);
21232 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21233 return UP(pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON));
21234 }
21235 case PM_TOKEN_AMPERSAND_DOT:
21236 case PM_TOKEN_DOT: {
21237 parser_lex(parser);
21238 pm_token_t operator = parser->previous;
21239 pm_arguments_t arguments = { 0 };
21240
21241 // This if statement handles the foo.() syntax.
21242 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
21243 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21244 return UP(pm_call_node_shorthand_create(parser, node, &operator, &arguments));
21245 }
21246
21247 switch (PM_NODE_TYPE(node)) {
21248 case PM_RESCUE_MODIFIER_NODE: {
21250 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21251 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21252 }
21253 break;
21254 }
21255 case PM_AND_NODE: {
21256 pm_and_node_t *cast = (pm_and_node_t *) node;
21257 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21258 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21259 }
21260 break;
21261 }
21262 case PM_OR_NODE: {
21263 pm_or_node_t *cast = (pm_or_node_t *) node;
21264 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21265 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21266 }
21267 break;
21268 }
21269 default:
21270 break;
21271 }
21272
21273 pm_token_t message;
21274
21275 switch (parser->current.type) {
21276 case PM_CASE_OPERATOR:
21277 case PM_CASE_KEYWORD:
21278 case PM_TOKEN_CONSTANT:
21279 case PM_TOKEN_IDENTIFIER:
21280 case PM_TOKEN_METHOD_NAME: {
21281 parser_lex(parser);
21282 message = parser->previous;
21283 break;
21284 }
21285 default: {
21286 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type));
21287 message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21288 }
21289 }
21290
21291 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21292 pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
21293
21294 if (
21295 (previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
21296 arguments.arguments == NULL &&
21297 arguments.opening_loc.start == NULL &&
21298 match1(parser, PM_TOKEN_COMMA)
21299 ) {
21300 return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21301 } else {
21302 return UP(call);
21303 }
21304 }
21305 case PM_TOKEN_DOT_DOT:
21306 case PM_TOKEN_DOT_DOT_DOT: {
21307 parser_lex(parser);
21308
21309 pm_node_t *right = NULL;
21310 if (token_begins_expression_p(parser->current.type)) {
21311 right = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21312 }
21313
21314 return UP(pm_range_node_create(parser, node, &token, right));
21315 }
21316 case PM_TOKEN_KEYWORD_IF_MODIFIER: {
21317 pm_token_t keyword = parser->current;
21318 parser_lex(parser);
21319
21320 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
21321 return UP(pm_if_node_modifier_create(parser, node, &keyword, predicate));
21322 }
21323 case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: {
21324 pm_token_t keyword = parser->current;
21325 parser_lex(parser);
21326
21327 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
21328 return UP(pm_unless_node_modifier_create(parser, node, &keyword, predicate));
21329 }
21330 case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: {
21331 parser_lex(parser);
21332 pm_statements_node_t *statements = pm_statements_node_create(parser);
21333 pm_statements_node_body_append(parser, statements, node, true);
21334
21335 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
21336 return UP(pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
21337 }
21338 case PM_TOKEN_KEYWORD_WHILE_MODIFIER: {
21339 parser_lex(parser);
21340 pm_statements_node_t *statements = pm_statements_node_create(parser);
21341 pm_statements_node_body_append(parser, statements, node, true);
21342
21343 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
21344 return UP(pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
21345 }
21346 case PM_TOKEN_QUESTION_MARK: {
21347 context_push(parser, PM_CONTEXT_TERNARY);
21348 pm_node_list_t current_block_exits = { 0 };
21349 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21350
21351 pm_token_t qmark = parser->current;
21352 parser_lex(parser);
21353
21354 pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
21355
21356 if (parser->recovering) {
21357 // If parsing the true expression of this ternary resulted in a syntax
21358 // error that we can recover from, then we're going to put missing nodes
21359 // and tokens into the remaining places. We want to be sure to do this
21360 // before the `expect` function call to make sure it doesn't
21361 // accidentally move past a ':' token that occurs after the syntax
21362 // error.
21363 pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21364 pm_node_t *false_expression = UP(pm_missing_node_create(parser, colon.start, colon.end));
21365
21366 context_pop(parser);
21367 pop_block_exits(parser, previous_block_exits);
21368 pm_node_list_free(&current_block_exits);
21369
21370 return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
21371 }
21372
21373 accept1(parser, PM_TOKEN_NEWLINE);
21374 expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
21375
21376 pm_token_t colon = parser->previous;
21377 pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
21378
21379 context_pop(parser);
21380 pop_block_exits(parser, previous_block_exits);
21381 pm_node_list_free(&current_block_exits);
21382
21383 return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
21384 }
21385 case PM_TOKEN_COLON_COLON: {
21386 parser_lex(parser);
21387 pm_token_t delimiter = parser->previous;
21388
21389 switch (parser->current.type) {
21390 case PM_TOKEN_CONSTANT: {
21391 parser_lex(parser);
21392 pm_node_t *path;
21393
21394 if (
21395 (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
21396 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
21397 ) {
21398 // If we have a constant immediately following a '::' operator, then
21399 // this can either be a constant path or a method call, depending on
21400 // what follows the constant.
21401 //
21402 // If we have parentheses, then this is a method call. That would
21403 // look like Foo::Bar().
21404 pm_token_t message = parser->previous;
21405 pm_arguments_t arguments = { 0 };
21406
21407 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21408 path = UP(pm_call_node_call_create(parser, node, &delimiter, &message, &arguments));
21409 } else {
21410 // Otherwise, this is a constant path. That would look like Foo::Bar.
21411 path = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
21412 }
21413
21414 // If this is followed by a comma then it is a multiple assignment.
21415 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21416 return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21417 }
21418
21419 return path;
21420 }
21421 case PM_CASE_OPERATOR:
21422 case PM_CASE_KEYWORD:
21423 case PM_TOKEN_IDENTIFIER:
21424 case PM_TOKEN_METHOD_NAME: {
21425 parser_lex(parser);
21426 pm_token_t message = parser->previous;
21427
21428 // If we have an identifier following a '::' operator, then it is for
21429 // sure a method call.
21430 pm_arguments_t arguments = { 0 };
21431 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21432 pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21433
21434 // If this is followed by a comma then it is a multiple assignment.
21435 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21436 return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21437 }
21438
21439 return UP(call);
21440 }
21441 case PM_TOKEN_PARENTHESIS_LEFT: {
21442 // If we have a parenthesis following a '::' operator, then it is the
21443 // method call shorthand. That would look like Foo::(bar).
21444 pm_arguments_t arguments = { 0 };
21445 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21446
21447 return UP(pm_call_node_shorthand_create(parser, node, &delimiter, &arguments));
21448 }
21449 default: {
21450 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
21451 return UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
21452 }
21453 }
21454 }
21455 case PM_TOKEN_KEYWORD_RESCUE_MODIFIER: {
21456 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
21457 parser_lex(parser);
21458 accept1(parser, PM_TOKEN_NEWLINE);
21459
21460 pm_node_t *value = parse_expression(parser, binding_power, true, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
21461 context_pop(parser);
21462
21463 return UP(pm_rescue_modifier_node_create(parser, node, &token, value));
21464 }
21465 case PM_TOKEN_BRACKET_LEFT: {
21466 parser_lex(parser);
21467
21468 pm_arguments_t arguments = { 0 };
21469 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
21470
21471 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
21472 pm_accepts_block_stack_push(parser, true);
21473 parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint16_t) (depth + 1));
21474 pm_accepts_block_stack_pop(parser);
21475 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
21476 }
21477
21478 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
21479
21480 // If we have a comma after the closing bracket then this is a multiple
21481 // assignment and we should parse the targets.
21482 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21483 pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
21484 return parse_targets_validate(parser, UP(aref), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21485 }
21486
21487 // If we're at the end of the arguments, we can now check if there is a
21488 // block node that starts with a {. If there is, then we can parse it and
21489 // add it to the arguments.
21490 pm_block_node_t *block = NULL;
21491 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
21492 block = parse_block(parser, (uint16_t) (depth + 1));
21493 pm_arguments_validate_block(parser, &arguments, block);
21494 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
21495 block = parse_block(parser, (uint16_t) (depth + 1));
21496 }
21497
21498 if (block != NULL) {
21499 if (arguments.block != NULL) {
21500 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_AFTER_BLOCK);
21501 if (arguments.arguments == NULL) {
21502 arguments.arguments = pm_arguments_node_create(parser);
21503 }
21504 pm_arguments_node_arguments_append(arguments.arguments, arguments.block);
21505 }
21506
21507 arguments.block = UP(block);
21508 }
21509
21510 return UP(pm_call_node_aref_create(parser, node, &arguments));
21511 }
21512 case PM_TOKEN_KEYWORD_IN: {
21513 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21514 parser->pattern_matching_newlines = true;
21515
21516 pm_token_t operator = parser->current;
21517 parser->command_start = false;
21518 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21519 parser_lex(parser);
21520
21521 pm_constant_id_list_t captures = { 0 };
21522 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
21523
21524 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21525 pm_constant_id_list_free(&captures);
21526
21527 return UP(pm_match_predicate_node_create(parser, node, pattern, &operator));
21528 }
21529 case PM_TOKEN_EQUAL_GREATER: {
21530 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21531 parser->pattern_matching_newlines = true;
21532
21533 pm_token_t operator = parser->current;
21534 parser->command_start = false;
21535 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21536 parser_lex(parser);
21537
21538 pm_constant_id_list_t captures = { 0 };
21539 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
21540
21541 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21542 pm_constant_id_list_free(&captures);
21543
21544 return UP(pm_match_required_node_create(parser, node, pattern, &operator));
21545 }
21546 default:
21547 assert(false && "unreachable");
21548 return NULL;
21549 }
21550}
21551
21552#undef PM_PARSE_PATTERN_SINGLE
21553#undef PM_PARSE_PATTERN_TOP
21554#undef PM_PARSE_PATTERN_MULTI
21555
21560static inline bool
21561pm_call_node_command_p(const pm_call_node_t *node) {
21562 return (
21563 (node->opening_loc.start == NULL) &&
21564 (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
21565 (node->arguments != NULL || node->block != NULL)
21566 );
21567}
21568
21577static pm_node_t *
21578parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
21579 if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
21580 pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
21581 return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
21582 }
21583
21584 pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
21585
21586 switch (PM_NODE_TYPE(node)) {
21587 case PM_MISSING_NODE:
21588 // If we found a syntax error, then the type of node returned by
21589 // parse_expression_prefix is going to be a missing node.
21590 return node;
21591 case PM_PRE_EXECUTION_NODE:
21592 case PM_POST_EXECUTION_NODE:
21593 case PM_ALIAS_GLOBAL_VARIABLE_NODE:
21594 case PM_ALIAS_METHOD_NODE:
21595 case PM_MULTI_WRITE_NODE:
21596 case PM_UNDEF_NODE:
21597 // These expressions are statements, and cannot be followed by
21598 // operators (except modifiers).
21599 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21600 return node;
21601 }
21602 break;
21603 case PM_CALL_NODE:
21604 // If we have a call node, then we need to check if it looks like a
21605 // method call without parentheses that contains arguments. If it
21606 // does, then it has different rules for parsing infix operators,
21607 // namely that it only accepts composition (and/or) and modifiers
21608 // (if/unless/etc.).
21609 if ((pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_COMPOSITION) && pm_call_node_command_p((pm_call_node_t *) node)) {
21610 return node;
21611 }
21612 break;
21613 case PM_SYMBOL_NODE:
21614 // If we have a symbol node that is being parsed as a label, then we
21615 // need to immediately return, because there should never be an
21616 // infix operator following this node.
21617 if (pm_symbol_node_label_p(node)) {
21618 return node;
21619 }
21620 break;
21621 default:
21622 break;
21623 }
21624
21625 // Otherwise we'll look and see if the next token can be parsed as an infix
21626 // operator. If it can, then we'll parse it using parse_expression_infix.
21627 pm_binding_powers_t current_binding_powers;
21628 pm_token_type_t current_token_type;
21629
21630 while (
21631 current_token_type = parser->current.type,
21632 current_binding_powers = pm_binding_powers[current_token_type],
21633 binding_power <= current_binding_powers.left &&
21634 current_binding_powers.binary
21635 ) {
21636 node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1));
21637
21638 if (context_terminator(parser->current_context->context, &parser->current)) {
21639 // If this token terminates the current context, then we need to
21640 // stop parsing the expression, as it has become a statement.
21641 return node;
21642 }
21643
21644 switch (PM_NODE_TYPE(node)) {
21645 case PM_MULTI_WRITE_NODE:
21646 // Multi-write nodes are statements, and cannot be followed by
21647 // operators except modifiers.
21648 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21649 return node;
21650 }
21651 break;
21652 case PM_CLASS_VARIABLE_WRITE_NODE:
21653 case PM_CONSTANT_PATH_WRITE_NODE:
21654 case PM_CONSTANT_WRITE_NODE:
21655 case PM_GLOBAL_VARIABLE_WRITE_NODE:
21656 case PM_INSTANCE_VARIABLE_WRITE_NODE:
21657 case PM_LOCAL_VARIABLE_WRITE_NODE:
21658 // These expressions are statements, by virtue of the right-hand
21659 // side of their write being an implicit array.
21660 if (PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21661 return node;
21662 }
21663 break;
21664 case PM_CALL_NODE:
21665 // These expressions are also statements, by virtue of the
21666 // right-hand side of the expression (i.e., the last argument to
21667 // the call node) being an implicit array.
21668 if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21669 return node;
21670 }
21671 break;
21672 default:
21673 break;
21674 }
21675
21676 // If the operator is nonassoc and we should not be able to parse the
21677 // upcoming infix operator, break.
21678 if (current_binding_powers.nonassoc) {
21679 // If this is a non-assoc operator and we are about to parse the
21680 // exact same operator, then we need to add an error.
21681 if (match1(parser, current_token_type)) {
21682 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
21683 break;
21684 }
21685
21686 // If this is an endless range, then we need to reject a couple of
21687 // additional operators because it violates the normal operator
21688 // precedence rules. Those patterns are:
21689 //
21690 // 1.. & 2
21691 // 1.. * 2
21692 //
21693 if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
21694 if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) {
21695 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
21696 break;
21697 }
21698
21699 if (PM_BINDING_POWER_TERM <= pm_binding_powers[parser->current.type].left) {
21700 break;
21701 }
21702 } else if (current_binding_powers.left <= pm_binding_powers[parser->current.type].left) {
21703 break;
21704 }
21705 }
21706
21707 if (accepts_command_call) {
21708 // A command-style method call is only accepted on method chains.
21709 // Thus, we check whether the parsed node can continue method chains.
21710 // The method chain can continue if the parsed node is one of the following five kinds:
21711 // (1) index access: foo[1]
21712 // (2) attribute access: foo.bar
21713 // (3) method call with parenthesis: foo.bar(1)
21714 // (4) method call with a block: foo.bar do end
21715 // (5) constant path: foo::Bar
21716 switch (node->type) {
21717 case PM_CALL_NODE: {
21718 pm_call_node_t *cast = (pm_call_node_t *)node;
21719 if (
21720 // (1) foo[1]
21721 !(
21722 cast->call_operator_loc.start == NULL &&
21723 cast->message_loc.start != NULL &&
21724 cast->message_loc.start[0] == '[' &&
21725 cast->message_loc.end[-1] == ']'
21726 ) &&
21727 // (2) foo.bar
21728 !(
21729 cast->call_operator_loc.start != NULL &&
21730 cast->arguments == NULL &&
21731 cast->block == NULL &&
21732 cast->opening_loc.start == NULL
21733 ) &&
21734 // (3) foo.bar(1)
21735 !(
21736 cast->call_operator_loc.start != NULL &&
21737 cast->opening_loc.start != NULL
21738 ) &&
21739 // (4) foo.bar do end
21740 !(
21741 cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
21742 )
21743 ) {
21744 accepts_command_call = false;
21745 }
21746 break;
21747 }
21748 // (5) foo::Bar
21749 case PM_CONSTANT_PATH_NODE:
21750 break;
21751 default:
21752 accepts_command_call = false;
21753 break;
21754 }
21755 }
21756 }
21757
21758 return node;
21759}
21760
21765static pm_statements_node_t *
21766wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
21767 if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
21768 if (statements == NULL) {
21769 statements = pm_statements_node_create(parser);
21770 }
21771
21772 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21773 pm_arguments_node_arguments_append(
21774 arguments,
21775 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2)))
21776 );
21777
21778 pm_statements_node_body_append(parser, statements, UP(pm_call_node_fcall_synthesized_create(
21779 parser,
21780 arguments,
21781 pm_parser_constant_id_constant(parser, "print", 5)
21782 )), true);
21783 }
21784
21785 if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
21786 if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
21787 if (statements == NULL) {
21788 statements = pm_statements_node_create(parser);
21789 }
21790
21791 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21792 pm_arguments_node_arguments_append(
21793 arguments,
21794 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2)))
21795 );
21796
21797 pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
21798 pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, UP(receiver), "split", arguments);
21799
21800 pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
21801 parser,
21802 pm_parser_constant_id_constant(parser, "$F", 2),
21803 UP(call)
21804 );
21805
21806 pm_statements_node_body_prepend(statements, UP(write));
21807 }
21808
21809 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21810 pm_arguments_node_arguments_append(
21811 arguments,
21812 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2)))
21813 );
21814
21815 if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
21816 pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
21817 pm_keyword_hash_node_elements_append(keywords, UP(pm_assoc_node_create(
21818 parser,
21819 UP(pm_symbol_node_synthesized_create(parser, "chomp")),
21820 &(pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start },
21821 UP(pm_true_node_synthesized_create(parser))
21822 )));
21823
21824 pm_arguments_node_arguments_append(arguments, UP(keywords));
21825 pm_node_flag_set(UP(arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
21826 }
21827
21828 pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
21829 pm_statements_node_body_append(parser, wrapped_statements, UP(pm_while_node_synthesized_create(
21830 parser,
21831 UP(pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4))),
21832 statements
21833 )), true);
21834
21835 statements = wrapped_statements;
21836 }
21837
21838 return statements;
21839}
21840
21844static pm_node_t *
21845parse_program(pm_parser_t *parser) {
21846 // If the current scope is NULL, then we want to push a new top level scope.
21847 // The current scope could exist in the event that we are parsing an eval
21848 // and the user has passed into scopes that already exist.
21849 if (parser->current_scope == NULL) {
21850 pm_parser_scope_push(parser, true);
21851 }
21852
21853 pm_node_list_t current_block_exits = { 0 };
21854 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21855
21856 parser_lex(parser);
21857 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
21858
21859 if (statements != NULL && !parser->parsing_eval) {
21860 // If we have statements, then the top-level statement should be
21861 // explicitly checked as well. We have to do this here because
21862 // everywhere else we check all but the last statement.
21863 assert(statements->body.size > 0);
21864 pm_void_statement_check(parser, statements->body.nodes[statements->body.size - 1]);
21865 }
21866
21867 pm_constant_id_list_t locals;
21868 pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
21869 pm_parser_scope_pop(parser);
21870
21871 // At the top level, see if we need to wrap the statements in a program
21872 // node with a while loop based on the options.
21874 statements = wrap_statements(parser, statements);
21875 } else {
21876 flush_block_exits(parser, previous_block_exits);
21877 }
21878
21879 pm_node_list_free(&current_block_exits);
21880
21881 // If this is an empty file, then we're still going to parse all of the
21882 // statements in order to gather up all of the comments and such. Here we'll
21883 // correct the location information.
21884 if (statements == NULL) {
21885 statements = pm_statements_node_create(parser);
21886 pm_statements_node_location_set(statements, parser->start, parser->start);
21887 }
21888
21889 return UP(pm_program_node_create(parser, &locals, statements));
21890}
21891
21892/******************************************************************************/
21893/* External functions */
21894/******************************************************************************/
21895
21905static const char *
21906pm_strnstr(const char *big, const char *little, size_t big_length) {
21907 size_t little_length = strlen(little);
21908
21909 for (const char *max = big + big_length - little_length; big <= max; big++) {
21910 if (*big == *little && memcmp(big, little, little_length) == 0) return big;
21911 }
21912
21913 return NULL;
21914}
21915
21916#ifdef _WIN32
21917#define pm_parser_warn_shebang_carriage_return(parser, start, length) ((void) 0)
21918#else
21924static void
21925pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
21926 if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
21927 pm_parser_warn(parser, start, start + length, PM_WARN_SHEBANG_CARRIAGE_RETURN);
21928 }
21929}
21930#endif
21931
21936static void
21937pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const char *engine, size_t length) {
21938 const char *switches = pm_strnstr(engine, " -", length);
21939 if (switches == NULL) return;
21940
21941 pm_options_t next_options = *options;
21942 options->shebang_callback(
21943 &next_options,
21944 (const uint8_t *) (switches + 1),
21945 length - ((size_t) (switches - engine)) - 1,
21946 options->shebang_callback_data
21947 );
21948
21949 size_t encoding_length;
21950 if ((encoding_length = pm_string_length(&next_options.encoding)) > 0) {
21951 const uint8_t *encoding_source = pm_string_source(&next_options.encoding);
21952 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
21953 }
21954
21955 parser->command_line = next_options.command_line;
21956 parser->frozen_string_literal = next_options.frozen_string_literal;
21957}
21958
21963pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
21964 assert(source != NULL);
21965
21966 *parser = (pm_parser_t) {
21967 .node_id = 0,
21968 .lex_state = PM_LEX_STATE_BEG,
21969 .enclosure_nesting = 0,
21970 .lambda_enclosure_nesting = -1,
21971 .brace_nesting = 0,
21972 .do_loop_stack = 0,
21973 .accepts_block_stack = 0,
21974 .lex_modes = {
21975 .index = 0,
21976 .stack = {{ .mode = PM_LEX_DEFAULT }},
21977 .current = &parser->lex_modes.stack[0],
21978 },
21979 .start = source,
21980 .end = source + size,
21981 .previous = { .type = PM_TOKEN_EOF, .start = source, .end = source },
21982 .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
21983 .next_start = NULL,
21984 .heredoc_end = NULL,
21985 .data_loc = { .start = NULL, .end = NULL },
21986 .comment_list = { 0 },
21987 .magic_comment_list = { 0 },
21988 .warning_list = { 0 },
21989 .error_list = { 0 },
21990 .current_scope = NULL,
21991 .current_context = NULL,
21992 .encoding = PM_ENCODING_UTF_8_ENTRY,
21993 .encoding_changed_callback = NULL,
21994 .encoding_comment_start = source,
21995 .lex_callback = NULL,
21996 .filepath = { 0 },
21997 .constant_pool = { 0 },
21998 .newline_list = { 0 },
21999 .integer_base = 0,
22000 .current_string = PM_STRING_EMPTY,
22001 .start_line = 1,
22002 .explicit_encoding = NULL,
22003 .command_line = 0,
22004 .parsing_eval = false,
22005 .partial_script = false,
22006 .command_start = true,
22007 .recovering = false,
22008 .encoding_locked = false,
22009 .encoding_changed = false,
22010 .pattern_matching_newlines = false,
22011 .in_keyword_arg = false,
22012 .current_block_exits = NULL,
22013 .semantic_token_seen = false,
22014 .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
22015 .current_regular_expression_ascii_only = false,
22016 .warn_mismatched_indentation = true
22017 };
22018
22019 // Initialize the constant pool. We're going to completely guess as to the
22020 // number of constants that we'll need based on the size of the input. The
22021 // ratio we chose here is actually less arbitrary than you might think.
22022 //
22023 // We took ~50K Ruby files and measured the size of the file versus the
22024 // number of constants that were found in those files. Then we found the
22025 // average and standard deviation of the ratios of constants/bytesize. Then
22026 // we added 1.34 standard deviations to the average to get a ratio that
22027 // would fit 75% of the files (for a two-tailed distribution). This works
22028 // because there was about a 0.77 correlation and the distribution was
22029 // roughly normal.
22030 //
22031 // This ratio will need to change if we add more constants to the constant
22032 // pool for another node type.
22033 uint32_t constant_size = ((uint32_t) size) / 95;
22034 pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size);
22035
22036 // Initialize the newline list. Similar to the constant pool, we're going to
22037 // guess at the number of newlines that we'll need based on the size of the
22038 // input.
22039 size_t newline_size = size / 22;
22040 pm_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
22041
22042 // If options were provided to this parse, establish them here.
22043 if (options != NULL) {
22044 // filepath option
22045 parser->filepath = options->filepath;
22046
22047 // line option
22048 parser->start_line = options->line;
22049
22050 // encoding option
22051 size_t encoding_length = pm_string_length(&options->encoding);
22052 if (encoding_length > 0) {
22053 const uint8_t *encoding_source = pm_string_source(&options->encoding);
22054 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22055 }
22056
22057 // encoding_locked option
22058 parser->encoding_locked = options->encoding_locked;
22059
22060 // frozen_string_literal option
22062
22063 // command_line option
22064 parser->command_line = options->command_line;
22065
22066 // version option
22067 parser->version = options->version;
22068
22069 // partial_script
22070 parser->partial_script = options->partial_script;
22071
22072 // scopes option
22073 parser->parsing_eval = options->scopes_count > 0;
22074 if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
22075
22076 for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
22077 const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index);
22078 pm_parser_scope_push(parser, scope_index == 0);
22079
22080 // Scopes given from the outside are not allowed to have numbered
22081 // parameters.
22082 parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
22083
22084 for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
22085 const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
22086
22087 const uint8_t *source = pm_string_source(local);
22088 size_t length = pm_string_length(local);
22089
22090 void *allocated = xmalloc(length);
22091 if (allocated == NULL) continue;
22092
22093 memcpy(allocated, source, length);
22094 pm_parser_local_add_owned(parser, (uint8_t *) allocated, length);
22095 }
22096 }
22097 }
22098
22099 // Now that we have established the user-provided options, check if
22100 // a version was given and parse as the latest version otherwise.
22101 if (parser->version == PM_OPTIONS_VERSION_UNSET) {
22103 }
22104
22105 pm_accepts_block_stack_push(parser, true);
22106
22107 // Skip past the UTF-8 BOM if it exists.
22108 if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
22109 parser->current.end += 3;
22110 parser->encoding_comment_start += 3;
22111
22112 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
22114 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
22115 }
22116 }
22117
22118 // If the -x command line flag is set, or the first shebang of the file does
22119 // not include "ruby", then we'll search for a shebang that does include
22120 // "ruby" and start parsing from there.
22121 bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
22122
22123 // If the first two bytes of the source are a shebang, then we will do a bit
22124 // of extra processing.
22125 //
22126 // First, we'll indicate that the encoding comment is at the end of the
22127 // shebang. This means that when a shebang is present the encoding comment
22128 // can begin on the second line.
22129 //
22130 // Second, we will check if the shebang includes "ruby". If it does, then we
22131 // we will start parsing from there. We will also potentially warning the
22132 // user if there is a carriage return at the end of the shebang. We will
22133 // also potentially call the shebang callback if this is the main script to
22134 // allow the caller to parse the shebang and find any command-line options.
22135 // If the shebang does not include "ruby" and this is the main script being
22136 // parsed, then we will start searching the file for a shebang that does
22137 // contain "ruby" as if -x were passed on the command line.
22138 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
22139 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->current.end);
22140
22141 if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
22142 const char *engine;
22143
22144 if ((engine = pm_strnstr((const char *) parser->start, "ruby", length)) != NULL) {
22145 if (newline != NULL) {
22146 parser->encoding_comment_start = newline + 1;
22147
22148 if (options == NULL || options->main_script) {
22149 pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1);
22150 }
22151 }
22152
22153 if (options != NULL && options->main_script && options->shebang_callback != NULL) {
22154 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) parser->start)));
22155 }
22156
22157 search_shebang = false;
22158 } else if (options != NULL && options->main_script && !parser->parsing_eval) {
22159 search_shebang = true;
22160 }
22161 }
22162
22163 // Here we're going to find the first shebang that includes "ruby" and start
22164 // parsing from there.
22165 if (search_shebang) {
22166 // If a shebang that includes "ruby" is not found, then we're going to a
22167 // a load error to the list of errors on the parser.
22168 bool found_shebang = false;
22169
22170 // This is going to point to the start of each line as we check it.
22171 // We'll maintain a moving window looking at each line at they come.
22172 const uint8_t *cursor = parser->start;
22173
22174 // The newline pointer points to the end of the current line that we're
22175 // considering. If it is NULL, then we're at the end of the file.
22176 const uint8_t *newline = next_newline(cursor, parser->end - cursor);
22177
22178 while (newline != NULL) {
22179 pm_newline_list_append(&parser->newline_list, newline);
22180
22181 cursor = newline + 1;
22182 newline = next_newline(cursor, parser->end - cursor);
22183
22184 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - cursor);
22185 if (length > 2 && cursor[0] == '#' && cursor[1] == '!') {
22186 const char *engine;
22187 if ((engine = pm_strnstr((const char *) cursor, "ruby", length)) != NULL) {
22188 found_shebang = true;
22189
22190 if (newline != NULL) {
22191 pm_parser_warn_shebang_carriage_return(parser, cursor, length + 1);
22192 parser->encoding_comment_start = newline + 1;
22193 }
22194
22195 if (options != NULL && options->shebang_callback != NULL) {
22196 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) cursor)));
22197 }
22198
22199 break;
22200 }
22201 }
22202 }
22203
22204 if (found_shebang) {
22205 parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22206 parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22207 } else {
22208 pm_parser_err(parser, parser->start, parser->start, PM_ERR_SCRIPT_NOT_FOUND);
22209 pm_newline_list_clear(&parser->newline_list);
22210 }
22211 }
22212
22213 // The encoding comment can start after any amount of inline whitespace, so
22214 // here we'll advance it to the first non-inline-whitespace character so
22215 // that it is ready for future comparisons.
22216 parser->encoding_comment_start += pm_strspn_inline_whitespace(parser->encoding_comment_start, parser->end - parser->encoding_comment_start);
22217}
22218
22227
22231static inline void
22232pm_comment_list_free(pm_list_t *list) {
22233 pm_list_node_t *node, *next;
22234
22235 for (node = list->head; node != NULL; node = next) {
22236 next = node->next;
22237
22238 pm_comment_t *comment = (pm_comment_t *) node;
22239 xfree(comment);
22240 }
22241}
22242
22246static inline void
22247pm_magic_comment_list_free(pm_list_t *list) {
22248 pm_list_node_t *node, *next;
22249
22250 for (node = list->head; node != NULL; node = next) {
22251 next = node->next;
22252
22255 }
22256}
22257
22263 pm_string_free(&parser->filepath);
22264 pm_diagnostic_list_free(&parser->error_list);
22265 pm_diagnostic_list_free(&parser->warning_list);
22266 pm_comment_list_free(&parser->comment_list);
22267 pm_magic_comment_list_free(&parser->magic_comment_list);
22268 pm_constant_pool_free(&parser->constant_pool);
22269 pm_newline_list_free(&parser->newline_list);
22270
22271 while (parser->current_scope != NULL) {
22272 // Normally, popping the scope doesn't free the locals since it is
22273 // assumed that ownership has transferred to the AST. However if we have
22274 // scopes while we're freeing the parser, it's likely they came from
22275 // eval scopes and we need to free them explicitly here.
22276 pm_parser_scope_pop(parser);
22277 }
22278
22279 while (parser->lex_modes.index >= PM_LEX_STACK_SIZE) {
22280 lex_mode_pop(parser);
22281 }
22282}
22283
22289 return parse_program(parser);
22290}
22291
22297static bool
22298pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof) {
22299#define LINE_SIZE 4096
22300 char line[LINE_SIZE];
22301
22302 while (memset(line, '\n', LINE_SIZE), stream_fgets(line, LINE_SIZE, stream) != NULL) {
22303 size_t length = LINE_SIZE;
22304 while (length > 0 && line[length - 1] == '\n') length--;
22305
22306 if (length == LINE_SIZE) {
22307 // If we read a line that is the maximum size and it doesn't end
22308 // with a newline, then we'll just append it to the buffer and
22309 // continue reading.
22310 length--;
22311 pm_buffer_append_string(buffer, line, length);
22312 continue;
22313 }
22314
22315 // Append the line to the buffer.
22316 length--;
22317 pm_buffer_append_string(buffer, line, length);
22318
22319 // Check if the line matches the __END__ marker. If it does, then stop
22320 // reading and return false. In most circumstances, this means we should
22321 // stop reading from the stream so that the DATA constant can pick it
22322 // up.
22323 switch (length) {
22324 case 7:
22325 if (strncmp(line, "__END__", 7) == 0) return false;
22326 break;
22327 case 8:
22328 if (strncmp(line, "__END__\n", 8) == 0) return false;
22329 break;
22330 case 9:
22331 if (strncmp(line, "__END__\r\n", 9) == 0) return false;
22332 break;
22333 }
22334
22335 // All data should be read via gets. If the string returned by gets
22336 // _doesn't_ end with a newline, then we assume we hit EOF condition.
22337 if (stream_feof(stream)) {
22338 break;
22339 }
22340 }
22341
22342 return true;
22343#undef LINE_SIZE
22344}
22345
22355static bool
22356pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
22357 pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) parser->error_list.head;
22358
22359 for (; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
22360 if (diagnostic->diag_id == PM_ERR_HEREDOC_TERM) {
22361 return true;
22362 }
22363 }
22364
22365 return false;
22366}
22367
22375pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) {
22376 pm_buffer_init(buffer);
22377
22378 bool eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
22379
22380 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22381 pm_node_t *node = pm_parse(parser);
22382
22383 while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
22384 pm_node_destroy(parser, node);
22385 eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
22386
22387 pm_parser_free(parser);
22388 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22389 node = pm_parse(parser);
22390 }
22391
22392 return node;
22393}
22394
22399pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
22400 pm_options_t options = { 0 };
22401 pm_options_read(&options, data);
22402
22403 pm_parser_t parser;
22404 pm_parser_init(&parser, source, size, &options);
22405
22406 pm_node_t *node = pm_parse(&parser);
22407 pm_node_destroy(&parser, node);
22408
22409 bool result = parser.error_list.size == 0;
22410 pm_parser_free(&parser);
22411 pm_options_free(&options);
22412
22413 return result;
22414}
22415
22416#undef PM_CASE_KEYWORD
22417#undef PM_CASE_OPERATOR
22418#undef PM_CASE_WRITABLE
22419#undef PM_STRING_EMPTY
22420
22421// We optionally support serializing to a binary string. For systems that don't
22422// want or need this functionality, it can be turned off with the
22423// PRISM_EXCLUDE_SERIALIZATION define.
22424#ifndef PRISM_EXCLUDE_SERIALIZATION
22425
22426static inline void
22427pm_serialize_header(pm_buffer_t *buffer) {
22428 pm_buffer_append_string(buffer, "PRISM", 5);
22429 pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
22430 pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR);
22431 pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH);
22432 pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
22433}
22434
22439pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
22440 pm_serialize_header(buffer);
22441 pm_serialize_content(parser, node, buffer);
22442 pm_buffer_append_byte(buffer, '\0');
22443}
22444
22450pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22451 pm_options_t options = { 0 };
22452 pm_options_read(&options, data);
22453
22454 pm_parser_t parser;
22455 pm_parser_init(&parser, source, size, &options);
22456
22457 pm_node_t *node = pm_parse(&parser);
22458
22459 pm_serialize_header(buffer);
22460 pm_serialize_content(&parser, node, buffer);
22461 pm_buffer_append_byte(buffer, '\0');
22462
22463 pm_node_destroy(&parser, node);
22464 pm_parser_free(&parser);
22465 pm_options_free(&options);
22466}
22467
22473pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data) {
22474 pm_parser_t parser;
22475 pm_options_t options = { 0 };
22476 pm_options_read(&options, data);
22477
22478 pm_buffer_t parser_buffer;
22479 pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, stream_feof, &options);
22480 pm_serialize_header(buffer);
22481 pm_serialize_content(&parser, node, buffer);
22482 pm_buffer_append_byte(buffer, '\0');
22483
22484 pm_node_destroy(&parser, node);
22485 pm_buffer_free(&parser_buffer);
22486 pm_parser_free(&parser);
22487 pm_options_free(&options);
22488}
22489
22494pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22495 pm_options_t options = { 0 };
22496 pm_options_read(&options, data);
22497
22498 pm_parser_t parser;
22499 pm_parser_init(&parser, source, size, &options);
22500
22501 pm_node_t *node = pm_parse(&parser);
22502 pm_serialize_header(buffer);
22503 pm_serialize_encoding(parser.encoding, buffer);
22504 pm_buffer_append_varsint(buffer, parser.start_line);
22505 pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
22506
22507 pm_node_destroy(&parser, node);
22508 pm_parser_free(&parser);
22509 pm_options_free(&options);
22510}
22511
22512#endif
22513
22514/******************************************************************************/
22515/* Slice queries for the Ruby API */
22516/******************************************************************************/
22517
22519typedef enum {
22521 PM_SLICE_TYPE_ERROR = -1,
22522
22524 PM_SLICE_TYPE_NONE,
22525
22527 PM_SLICE_TYPE_LOCAL,
22528
22530 PM_SLICE_TYPE_CONSTANT,
22531
22533 PM_SLICE_TYPE_METHOD_NAME
22534} pm_slice_type_t;
22535
22539pm_slice_type_t
22540pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
22541 // first, get the right encoding object
22542 const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
22543 if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
22544
22545 // check that there is at least one character
22546 if (length == 0) return PM_SLICE_TYPE_NONE;
22547
22548 size_t width;
22549 if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
22550 // valid because alphabetical
22551 } else if (*source == '_') {
22552 // valid because underscore
22553 width = 1;
22554 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
22555 // valid because multibyte
22556 } else {
22557 // invalid because no match
22558 return PM_SLICE_TYPE_NONE;
22559 }
22560
22561 // determine the type of the slice based on the first character
22562 const uint8_t *end = source + length;
22563 pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
22564
22565 // next, iterate through all of the bytes of the string to ensure that they
22566 // are all valid identifier characters
22567 source += width;
22568
22569 while (source < end) {
22570 if ((width = encoding->alnum_char(source, end - source)) != 0) {
22571 // valid because alphanumeric
22572 source += width;
22573 } else if (*source == '_') {
22574 // valid because underscore
22575 source++;
22576 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
22577 // valid because multibyte
22578 source += width;
22579 } else {
22580 // invalid because no match
22581 break;
22582 }
22583 }
22584
22585 // accept a ! or ? at the end of the slice as a method name
22586 if (*source == '!' || *source == '?' || *source == '=') {
22587 source++;
22588 result = PM_SLICE_TYPE_METHOD_NAME;
22589 }
22590
22591 // valid if we are at the end of the slice
22592 return source == end ? result : PM_SLICE_TYPE_NONE;
22593}
22594
22599pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
22600 switch (pm_slice_type(source, length, encoding_name)) {
22601 case PM_SLICE_TYPE_ERROR:
22602 return PM_STRING_QUERY_ERROR;
22603 case PM_SLICE_TYPE_NONE:
22604 case PM_SLICE_TYPE_CONSTANT:
22605 case PM_SLICE_TYPE_METHOD_NAME:
22606 return PM_STRING_QUERY_FALSE;
22607 case PM_SLICE_TYPE_LOCAL:
22608 return PM_STRING_QUERY_TRUE;
22609 }
22610
22611 assert(false && "unreachable");
22612 return PM_STRING_QUERY_FALSE;
22613}
22614
22619pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
22620 switch (pm_slice_type(source, length, encoding_name)) {
22621 case PM_SLICE_TYPE_ERROR:
22622 return PM_STRING_QUERY_ERROR;
22623 case PM_SLICE_TYPE_NONE:
22624 case PM_SLICE_TYPE_LOCAL:
22625 case PM_SLICE_TYPE_METHOD_NAME:
22626 return PM_STRING_QUERY_FALSE;
22627 case PM_SLICE_TYPE_CONSTANT:
22628 return PM_STRING_QUERY_TRUE;
22629 }
22630
22631 assert(false && "unreachable");
22632 return PM_STRING_QUERY_FALSE;
22633}
22634
22639pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
22640#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
22641#define C1(c) (*source == c)
22642#define C2(s) (memcmp(source, s, 2) == 0)
22643#define C3(s) (memcmp(source, s, 3) == 0)
22644
22645 switch (pm_slice_type(source, length, encoding_name)) {
22646 case PM_SLICE_TYPE_ERROR:
22647 return PM_STRING_QUERY_ERROR;
22648 case PM_SLICE_TYPE_NONE:
22649 break;
22650 case PM_SLICE_TYPE_LOCAL:
22651 // numbered parameters are not valid method names
22652 return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
22653 case PM_SLICE_TYPE_CONSTANT:
22654 // all constants are valid method names
22655 case PM_SLICE_TYPE_METHOD_NAME:
22656 // all method names are valid method names
22657 return PM_STRING_QUERY_TRUE;
22658 }
22659
22660 switch (length) {
22661 case 1:
22662 return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
22663 case 2:
22664 return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
22665 case 3:
22666 return B(C3("===") || C3("<=>") || C3("[]="));
22667 default:
22668 return PM_STRING_QUERY_FALSE;
22669 }
22670
22671#undef B
22672#undef C1
22673#undef C2
22674#undef C3
22675}
pm_diagnostic_id_t
The diagnostic IDs of all of the diagnostics, used to communicate the types of errors between the par...
Definition diagnostic.h:31
#define xfree
Old name of ruby_xfree.
Definition xmalloc.h:58
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
#define xcalloc
Old name of ruby_xcalloc.
Definition xmalloc.h:55
VALUE type(ANYARGS)
ANYARGS-ed function type.
PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options)
Free the internal memory associated with the options.
Definition options.c:218
PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index)
Return a pointer to the local at the given index within the given scope.
Definition options.c:202
PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm_options_t *options, size_t index)
Return a pointer to the scope at the given index within the given options.
Definition options.c:182
static const uint8_t PM_OPTIONS_COMMAND_LINE_N
A bit representing whether or not the command line -n option was set.
Definition options.h:225
#define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
String literals should be made frozen.
Definition options.h:20
#define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
String literals should be made mutable.
Definition options.h:31
#define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
String literals may be frozen or mutable depending on the implementation default.
Definition options.h:26
static const uint8_t PM_OPTIONS_COMMAND_LINE_P
A bit representing whether or not the command line -p option was set.
Definition options.h:231
@ PM_OPTIONS_VERSION_CRUBY_3_3
The vendored version of prism in CRuby 3.3.x.
Definition options.h:89
@ PM_OPTIONS_VERSION_LATEST
The current version of prism.
Definition options.h:104
@ PM_OPTIONS_VERSION_UNSET
If an explicit version is not provided, the current version of prism will be used.
Definition options.h:86
@ PM_OPTIONS_VERSION_CRUBY_3_4
The vendored version of prism in CRuby 3.4.x.
Definition options.h:92
@ PM_OPTIONS_VERSION_CRUBY_4_0
The vendored version of prism in CRuby 4.0.x.
Definition options.h:98
pm_heredoc_indent_t
The type of indentation that a heredoc uses.
Definition parser.h:79
struct pm_context_node pm_context_node_t
This is a node in a linked list of contexts.
#define PM_LEX_STACK_SIZE
We pre-allocate a certain number of lex states in order to avoid having to call malloc too many times...
Definition parser.h:262
struct pm_parser pm_parser_t
The parser used to parse Ruby source.
Definition parser.h:267
struct pm_comment pm_comment_t
This is a node in the linked list of comments that we've found while parsing.
pm_lex_state_t
This enum combines the various bits from the above enum into individual values that represent the var...
Definition parser.h:46
struct pm_scope pm_scope_t
This struct represents a node in a linked list of scopes.
pm_heredoc_quote_t
The type of quote that a heredoc uses.
Definition parser.h:69
void(* pm_encoding_changed_callback_t)(pm_parser_t *parser)
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:499
pm_context_t
While parsing, we keep track of a stack of contexts.
Definition parser.h:274
@ PM_CONTEXT_CLASS_RESCUE
a rescue statement within a class statement
Definition parser.h:324
@ PM_CONTEXT_ELSIF
an elsif clause
Definition parser.h:351
@ PM_CONTEXT_DEF_RESCUE
a rescue statement within a method definition
Definition parser.h:336
@ PM_CONTEXT_ELSE
an else clause
Definition parser.h:348
@ PM_CONTEXT_FOR_INDEX
a for loop's index
Definition parser.h:360
@ PM_CONTEXT_CASE_WHEN
a case when statements
Definition parser.h:309
@ PM_CONTEXT_BLOCK_RESCUE
a rescue statement within a do..end block
Definition parser.h:306
@ PM_CONTEXT_MODULE
a module declaration
Definition parser.h:387
@ PM_CONTEXT_DEF_PARAMS
a method definition's parameters
Definition parser.h:339
@ PM_CONTEXT_CASE_IN
a case in statements
Definition parser.h:312
@ PM_CONTEXT_BLOCK_ELSE
a rescue else statement within a do..end block
Definition parser.h:300
@ PM_CONTEXT_LOOP_PREDICATE
the predicate clause of a loop statement
Definition parser.h:381
@ PM_CONTEXT_SCLASS
a singleton class definition
Definition parser.h:417
@ PM_CONTEXT_UNLESS
an unless statement
Definition parser.h:432
@ PM_CONTEXT_POSTEXE
an END block
Definition parser.h:405
@ PM_CONTEXT_IF
an if statement
Definition parser.h:363
@ PM_CONTEXT_MULTI_TARGET
a multiple target expression
Definition parser.h:399
@ PM_CONTEXT_LAMBDA_RESCUE
a rescue statement within a lambda expression
Definition parser.h:378
@ PM_CONTEXT_BEGIN_ELSE
a rescue else statement with an explicit begin
Definition parser.h:285
@ PM_CONTEXT_NONE
a null context, used for returning a value from a function
Definition parser.h:276
@ PM_CONTEXT_CLASS_ELSE
a rescue else statement within a class statement
Definition parser.h:321
@ PM_CONTEXT_LAMBDA_ENSURE
an ensure statement within a lambda expression
Definition parser.h:372
@ PM_CONTEXT_BLOCK_ENSURE
an ensure statement within a do..end block
Definition parser.h:297
@ PM_CONTEXT_CLASS_ENSURE
an ensure statement within a class statement
Definition parser.h:318
@ PM_CONTEXT_LAMBDA_BRACES
a lambda expression with braces
Definition parser.h:366
@ PM_CONTEXT_MODULE_ELSE
a rescue else statement within a module statement
Definition parser.h:393
@ PM_CONTEXT_PARENS
a parenthesized expression
Definition parser.h:402
@ PM_CONTEXT_BLOCK_BRACES
expressions in block arguments using braces
Definition parser.h:291
@ PM_CONTEXT_BLOCK_PARAMETERS
expressions in block parameters foo do |...| end
Definition parser.h:303
@ PM_CONTEXT_DEF_ENSURE
an ensure statement within a method definition
Definition parser.h:330
@ PM_CONTEXT_SCLASS_RESCUE
a rescue statement with a singleton class
Definition parser.h:426
@ PM_CONTEXT_PREEXE
a BEGIN block
Definition parser.h:411
@ PM_CONTEXT_DEFINED
a defined? expression
Definition parser.h:342
@ PM_CONTEXT_MODULE_ENSURE
an ensure statement within a module statement
Definition parser.h:390
@ PM_CONTEXT_BEGIN_RESCUE
a rescue statement with an explicit begin
Definition parser.h:288
@ PM_CONTEXT_UNTIL
an until statement
Definition parser.h:435
@ PM_CONTEXT_DEF_ELSE
a rescue else statement within a method definition
Definition parser.h:333
@ PM_CONTEXT_FOR
a for loop
Definition parser.h:357
@ PM_CONTEXT_PREDICATE
a predicate inside an if/elsif/unless statement
Definition parser.h:408
@ PM_CONTEXT_BEGIN_ENSURE
an ensure statement with an explicit begin
Definition parser.h:282
@ PM_CONTEXT_SCLASS_ENSURE
an ensure statement with a singleton class
Definition parser.h:420
@ PM_CONTEXT_DEFAULT_PARAMS
a method definition's default parameter
Definition parser.h:345
@ PM_CONTEXT_LAMBDA_ELSE
a rescue else statement within a lambda expression
Definition parser.h:375
@ PM_CONTEXT_CLASS
a class declaration
Definition parser.h:315
@ PM_CONTEXT_MAIN
the top level context
Definition parser.h:384
@ PM_CONTEXT_LAMBDA_DO_END
a lambda expression with do..end
Definition parser.h:369
@ PM_CONTEXT_BEGIN
a begin statement
Definition parser.h:279
@ PM_CONTEXT_RESCUE_MODIFIER
a modifier rescue clause
Definition parser.h:414
@ PM_CONTEXT_EMBEXPR
an interpolated expression
Definition parser.h:354
@ PM_CONTEXT_TERNARY
a ternary expression
Definition parser.h:429
@ PM_CONTEXT_DEF
a method definition
Definition parser.h:327
@ PM_CONTEXT_SCLASS_ELSE
a rescue else statement with a singleton class
Definition parser.h:423
@ PM_CONTEXT_MODULE_RESCUE
a rescue statement within a module statement
Definition parser.h:396
@ PM_CONTEXT_BLOCK_KEYWORDS
expressions in block arguments using do..end
Definition parser.h:294
@ PM_CONTEXT_WHILE
a while statement
Definition parser.h:438
uint8_t pm_scope_parameters_t
The flags about scope parameters that can be set.
Definition parser.h:569
uint8_t pm_shareable_constant_value_t
The type of shareable constant value that can be set.
Definition parser.h:525
pm_comment_type_t
This is the type of a comment that we've found while parsing.
Definition parser.h:451
void pm_buffer_free(pm_buffer_t *buffer)
Free the memory associated with the buffer.
Definition pm_buffer.c:355
bool pm_buffer_init(pm_buffer_t *buffer)
Initialize a pm_buffer_t with its default values.
Definition pm_buffer.c:27
size_t pm_buffer_length(const pm_buffer_t *buffer)
Return the length of the buffer.
Definition pm_buffer.c:43
char * pm_buffer_value(const pm_buffer_t *buffer)
Return the value of the buffer.
Definition pm_buffer.c:35
#define PM_CONSTANT_ID_UNSET
When we allocate constants into the pool, we reserve 0 to mean that the slot is not yet filled.
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string)
Returns the length associated with the string.
Definition pm_string.c:351
PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string)
Returns the start pointer associated with the string.
Definition pm_string.c:359
PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string)
Free the associated memory of the given string.
Definition pm_string.c:367
#define PM_STRING_EMPTY
Defines an empty string.
Definition pm_string.h:70
#define PRISM_FALLTHROUGH
We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
Definition defines.h:253
#define PRISM_UNLIKELY(x)
The compiler should predicate that this branch will not be taken.
Definition defines.h:237
#define PRISM_ATTRIBUTE_UNUSED
GCC will warn if you specify a function or parameter that is unused at runtime.
Definition defines.h:81
#define PRISM_DEPTH_MAXIMUM
When we are parsing using recursive descent, we want to protect against malicious payloads that could...
Definition defines.h:37
#define PM_STATIC_ASSERT(line, condition, message)
We want to be able to use static assertions, but they weren't standardized until C11.
Definition defines.h:116
#define PRISM_EXPORTED_FUNCTION
By default, we compile with -fvisibility=hidden.
Definition defines.h:53
#define PM_ENCODING_US_ASCII_ENTRY
This is the US-ASCII encoding.
Definition encoding.h:252
#define PM_ENCODING_UTF_8_ENTRY
This is the default UTF-8 encoding.
Definition encoding.h:245
#define PRISM_ENCODING_ALPHABETIC_BIT
All of the lookup tables use the first bit of each embedded byte to indicate whether the codepoint is...
Definition encoding.h:68
#define PRISM_ENCODING_ALPHANUMERIC_BIT
All of the lookup tables use the second bit of each embedded byte to indicate whether the codepoint i...
Definition encoding.h:74
#define PM_NODE_LIST_FOREACH(list, index, node)
Loop through each node in the node list, writing each node to the given pm_node_t pointer.
Definition node.h:17
#define PRISM_VERSION
The version of the Prism library as a constant string.
Definition version.h:27
#define PRISM_VERSION_PATCH
The patch version of the Prism library as an int.
Definition version.h:22
#define PRISM_VERSION_MINOR
The minor version of the Prism library as an int.
Definition version.h:17
#define PRISM_VERSION_MAJOR
The major version of the Prism library as an int.
Definition version.h:12
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser)
Parse the Ruby source associated with the given parser and return the tree.
Definition prism.c:22288
PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback)
Register a callback that will be called whenever prism changes the encoding it is using to parse base...
Definition prism.c:22224
PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser)
Free any memory associated with the given parser.
Definition prism.c:22262
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options)
Parse a stream of Ruby source and return the tree.
Definition prism.c:22375
PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options)
Initialize a parser with the given start and end pointers.
Definition prism.c:21963
The main header file for the prism parser.
pm_string_query_t
Represents the results of a slice query.
Definition prism.h:265
@ PM_STRING_QUERY_TRUE
Returned if the result of the slice query is true.
Definition prism.h:273
@ PM_STRING_QUERY_ERROR
Returned if the encoding given to a slice query was invalid.
Definition prism.h:267
@ PM_STRING_QUERY_FALSE
Returned if the result of the slice query is false.
Definition prism.h:270
void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
Serialize the encoding, metadata, nodes, and constant pool.
Definition serialize.c:2147
char *() pm_parse_stream_fgets_t(char *string, int size, void *stream)
This function is used in pm_parse_stream() to retrieve a line of input from a stream.
Definition prism.h:102
void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer)
Serialize the name of the encoding to the buffer.
Definition serialize.c:2124
int() pm_parse_stream_feof_t(void *stream)
This function is used in pm_parse_stream to check whether a stream is EOF.
Definition prism.h:109
void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer)
Serialize the given list of comments to the given buffer.
Definition serialize.c:2054
const char * pm_token_type_human(pm_token_type_t token_type)
Returns the human name of the given token type.
Definition token_type.c:364
This struct is used to pass information between the regular expression parser and the error callback.
Definition prism.c:17364
pm_parser_t * parser
The parser that we are parsing the regular expression for.
Definition prism.c:17366
const uint8_t * start
The start of the regular expression.
Definition prism.c:17369
bool shared
Whether or not the source of the regular expression is shared.
Definition prism.c:17380
const uint8_t * end
The end of the regular expression.
Definition prism.c:17372
This struct is used to pass information between the regular expression parser and the named capture c...
Definition prism.c:20356
pm_constant_id_list_t names
The list of names that have been parsed.
Definition prism.c:20367
pm_parser_t * parser
The parser that is parsing the regular expression.
Definition prism.c:20358
pm_match_write_node_t * match
The match write node that is being created.
Definition prism.c:20364
pm_call_node_t * call
The call node wrapping the regular expression node.
Definition prism.c:20361
bool shared
Whether the content of the regular expression is shared.
Definition prism.c:20374
AndNode.
Definition ast.h:1272
struct pm_node * left
AndNode::left.
Definition ast.h:1288
struct pm_node * right
AndNode::right.
Definition ast.h:1301
ArgumentsNode.
Definition ast.h:1333
pm_node_t base
The embedded base node.
Definition ast.h:1335
struct pm_node_list arguments
ArgumentsNode::arguments.
Definition ast.h:1346
This is a special out parameter to the parse_arguments_list function that includes opening and closin...
Definition prism.c:1586
pm_node_t * block
The optional block attached to the call.
Definition prism.c:1597
bool has_forwarding
The flag indicating whether this arguments list has forwarding argument.
Definition prism.c:1600
pm_location_t opening_loc
The optional location of the opening parenthesis or bracket.
Definition prism.c:1588
pm_arguments_node_t * arguments
The lazily-allocated optional arguments node.
Definition prism.c:1591
pm_location_t closing_loc
The optional location of the closing parenthesis or bracket.
Definition prism.c:1594
ArrayNode.
Definition ast.h:1364
struct pm_node_list elements
ArrayNode::elements.
Definition ast.h:1374
ArrayPatternNode.
Definition ast.h:1425
struct pm_node * constant
ArrayPatternNode::constant.
Definition ast.h:1444
pm_location_t opening_loc
ArrayPatternNode::opening_loc.
Definition ast.h:1484
pm_node_t base
The embedded base node.
Definition ast.h:1427
pm_location_t closing_loc
ArrayPatternNode::closing_loc.
Definition ast.h:1494
AssocNode.
Definition ast.h:1509
struct pm_node * value
AssocNode::value.
Definition ast.h:1541
struct pm_node * key
AssocNode::key.
Definition ast.h:1528
BeginNode.
Definition ast.h:1635
struct pm_ensure_node * ensure_clause
BeginNode::ensure_clause.
Definition ast.h:1688
struct pm_rescue_node * rescue_clause
BeginNode::rescue_clause.
Definition ast.h:1668
struct pm_statements_node * statements
BeginNode::statements.
Definition ast.h:1658
pm_node_t base
The embedded base node.
Definition ast.h:1637
struct pm_else_node * else_clause
BeginNode::else_clause.
Definition ast.h:1678
This struct represents a set of binding powers used for a given token.
Definition prism.c:12163
bool binary
Whether or not this token can be used as a binary operator.
Definition prism.c:12171
pm_binding_power_t left
The left binding power.
Definition prism.c:12165
bool nonassoc
Whether or not this token can be used as non-associative binary operator.
Definition prism.c:12177
pm_binding_power_t right
The right binding power.
Definition prism.c:12168
BlockLocalVariableNode.
Definition ast.h:1754
BlockNode.
Definition ast.h:1782
BlockParameterNode.
Definition ast.h:1858
BlockParametersNode.
Definition ast.h:1912
A pm_buffer_t is a simple memory buffer that stores data in a contiguous block of memory.
Definition pm_buffer.h:22
size_t length
The length of the buffer in bytes.
Definition pm_buffer.h:24
char * value
A pointer to the start of the buffer.
Definition pm_buffer.h:30
CallNode.
Definition ast.h:2139
pm_location_t opening_loc
CallNode::opening_loc.
Definition ast.h:2200
pm_location_t closing_loc
CallNode::closing_loc.
Definition ast.h:2220
struct pm_node * receiver
CallNode::receiver.
Definition ast.h:2158
pm_constant_id_t name
CallNode::name.
Definition ast.h:2181
pm_node_t base
The embedded base node.
Definition ast.h:2141
pm_location_t equal_loc
CallNode::equal_loc.
Definition ast.h:2233
pm_location_t call_operator_loc
CallNode::call_operator_loc.
Definition ast.h:2171
pm_location_t message_loc
CallNode::message_loc.
Definition ast.h:2191
struct pm_arguments_node * arguments
CallNode::arguments.
Definition ast.h:2210
struct pm_node * block
CallNode::block.
Definition ast.h:2243
CaseMatchNode.
Definition ast.h:2578
struct pm_node_list conditions
CaseMatchNode::conditions.
Definition ast.h:2601
CaseNode.
Definition ast.h:2648
struct pm_node_list conditions
CaseNode::conditions.
Definition ast.h:2671
ClassVariableReadNode.
Definition ast.h:2943
ClassVariableTargetNode.
Definition ast.h:2972
ClassVariableWriteNode.
Definition ast.h:2995
This is a node in the linked list of comments that we've found while parsing.
Definition parser.h:461
pm_comment_type_t type
The type of comment that we've found.
Definition parser.h:469
pm_location_t location
The location of the comment in the source.
Definition parser.h:466
A list of constant IDs.
size_t size
The number of constant ids in the list.
ConstantPathNode.
Definition ast.h:3209
ConstantPathTargetNode.
Definition ast.h:3347
ConstantReadNode.
Definition ast.h:3442
A constant in the pool which effectively stores a string.
size_t length
The length of the string.
const uint8_t * start
A pointer to the start of the string.
ConstantTargetNode.
Definition ast.h:3471
ConstantWriteNode.
Definition ast.h:3494
This is a node in a linked list of contexts.
Definition parser.h:442
pm_context_t context
The context that this node represents.
Definition parser.h:444
struct pm_context_node * prev
A pointer to the previous context in the linked list.
Definition parser.h:447
This struct represents a diagnostic generated during parsing.
Definition diagnostic.h:366
ElseNode.
Definition ast.h:3673
struct pm_statements_node * statements
ElseNode::statements.
Definition ast.h:3686
This struct defines the functions necessary to implement the encoding interface so we can determine h...
Definition encoding.h:23
size_t(* alpha_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphab...
Definition encoding.h:36
size_t(* char_width)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding.
Definition encoding.h:29
bool(* isupper_char)(const uint8_t *b, ptrdiff_t n)
Return true if the next character is valid in the encoding and is an uppercase character.
Definition encoding.h:50
const char * name
The name of the encoding.
Definition encoding.h:56
size_t(* alnum_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphan...
Definition encoding.h:43
EnsureNode.
Definition ast.h:3771
struct pm_statements_node * statements
EnsureNode::statements.
Definition ast.h:3784
FindPatternNode.
Definition ast.h:3831
struct pm_node * constant
FindPatternNode::constant.
Definition ast.h:3844
pm_location_t opening_loc
FindPatternNode::opening_loc.
Definition ast.h:3896
pm_node_t base
The embedded base node.
Definition ast.h:3833
pm_location_t closing_loc
FindPatternNode::closing_loc.
Definition ast.h:3909
FlipFlopNode.
Definition ast.h:3927
FloatNode.
Definition ast.h:3960
double value
FloatNode::value.
Definition ast.h:3970
pm_node_t base
The embedded base node.
Definition ast.h:3962
ForwardingParameterNode.
Definition ast.h:4096
GlobalVariableReadNode.
Definition ast.h:4263
GlobalVariableTargetNode.
Definition ast.h:4292
GlobalVariableWriteNode.
Definition ast.h:4315
HashNode.
Definition ast.h:4377
struct pm_node_list elements
HashNode::elements.
Definition ast.h:4403
HashPatternNode.
Definition ast.h:4437
pm_location_t opening_loc
HashPatternNode::opening_loc.
Definition ast.h:4492
pm_node_t base
The embedded base node.
Definition ast.h:4439
pm_location_t closing_loc
HashPatternNode::closing_loc.
Definition ast.h:4505
struct pm_node * constant
HashPatternNode::constant.
Definition ast.h:4453
All of the information necessary to store to lexing a heredoc.
Definition parser.h:88
size_t ident_length
The length of the heredoc identifier.
Definition parser.h:93
pm_heredoc_quote_t quote
The type of quote that the heredoc uses.
Definition parser.h:96
pm_heredoc_indent_t indent
The type of indentation that the heredoc uses.
Definition parser.h:99
const uint8_t * ident_start
A pointer to the start of the heredoc identifier.
Definition parser.h:90
IfNode.
Definition ast.h:4526
struct pm_statements_node * statements
IfNode::statements.
Definition ast.h:4586
struct pm_node * subsequent
IfNode::subsequent.
Definition ast.h:4605
ImaginaryNode.
Definition ast.h:4632
InstanceVariableReadNode.
Definition ast.h:5122
InstanceVariableTargetNode.
Definition ast.h:5151
InstanceVariableWriteNode.
Definition ast.h:5174
IntegerNode.
Definition ast.h:5242
pm_integer_t value
IntegerNode::value.
Definition ast.h:5252
pm_node_t base
The embedded base node.
Definition ast.h:5244
bool negative
Whether or not the integer is negative.
Definition pm_integer.h:42
InterpolatedMatchLastLineNode.
Definition ast.h:5280
InterpolatedRegularExpressionNode.
Definition ast.h:5326
InterpolatedStringNode.
Definition ast.h:5363
pm_node_t base
The embedded base node.
Definition ast.h:5365
pm_location_t opening_loc
InterpolatedStringNode::opening_loc.
Definition ast.h:5371
InterpolatedSymbolNode.
Definition ast.h:5396
pm_node_t base
The embedded base node.
Definition ast.h:5398
InterpolatedXStringNode.
Definition ast.h:5429
pm_location_t opening_loc
InterpolatedXStringNode::opening_loc.
Definition ast.h:5437
pm_node_t base
The embedded base node.
Definition ast.h:5431
struct pm_node_list parts
InterpolatedXStringNode::parts.
Definition ast.h:5442
KeywordHashNode.
Definition ast.h:5501
void(* callback)(void *data, pm_parser_t *parser, pm_token_t *token)
This is the callback that is called when a token is lexed.
Definition parser.h:521
void * data
This opaque pointer is used to provide whatever information the user deemed necessary to the callback...
Definition parser.h:515
When lexing Ruby source, the lexer has a small amount of state to tell which kind of token it is curr...
Definition parser.h:109
uint8_t terminator
This is the terminator of the list literal.
Definition parser.h:165
size_t nesting
This keeps track of the nesting level of the list.
Definition parser.h:153
bool interpolation
Whether or not interpolation is allowed in this list.
Definition parser.h:156
uint8_t incrementor
When lexing a list, it takes into account balancing the terminator if the terminator is one of (),...
Definition parser.h:162
enum pm_lex_mode::@98 mode
The type of this lex mode.
uint8_t breakpoints[11]
This is the character set that should be used to delimit the tokens within the list.
Definition parser.h:171
pm_heredoc_lex_mode_t base
All of the data necessary to lex a heredoc.
Definition parser.h:233
bool line_continuation
True if the previous token ended with a line continuation.
Definition parser.h:249
struct pm_lex_mode * prev
The previous lex state so that it knows how to pop.
Definition parser.h:254
bool label_allowed
Whether or not at the end of the string we should allow a :, which would indicate this was a dynamic ...
Definition parser.h:208
const uint8_t * next_start
This is the pointer to the character where lexing should resume once the heredoc has been completely ...
Definition parser.h:239
union pm_lex_mode::@99 as
The data associated with this type of lex mode.
size_t * common_whitespace
This is used to track the amount of common whitespace on each line so that we know how much to dedent...
Definition parser.h:246
int32_t line
The line number.
This struct represents an abstract linked list that provides common functionality.
Definition pm_list.h:46
struct pm_list_node * next
A pointer to the next node in the list.
Definition pm_list.h:48
This represents the overall linked list.
Definition pm_list.h:55
pm_list_node_t * head
A pointer to the head of the list.
Definition pm_list.h:60
size_t size
The size of the list.
Definition pm_list.h:57
This tracks an individual local variable in a certain lexical context, as well as the number of times...
Definition parser.h:535
pm_constant_id_t name
The name of the local variable.
Definition parser.h:537
pm_location_t location
The location of the local variable in the source.
Definition parser.h:540
uint32_t hash
The hash of the local variable.
Definition parser.h:549
uint32_t index
The index of the local variable in the local table.
Definition parser.h:543
uint32_t reads
The number of times the local variable is read.
Definition parser.h:546
LocalVariableReadNode.
Definition ast.h:5743
uint32_t depth
LocalVariableReadNode::depth.
Definition ast.h:5774
pm_constant_id_t name
LocalVariableReadNode::name.
Definition ast.h:5761
LocalVariableTargetNode.
Definition ast.h:5792
LocalVariableWriteNode.
Definition ast.h:5820
uint32_t depth
LocalVariableWriteNode::depth.
Definition ast.h:5847
pm_constant_id_t name
LocalVariableWriteNode::name.
Definition ast.h:5834
This is a set of local variables in a certain lexical context (method, class, module,...
Definition parser.h:557
pm_local_t * locals
The nullable allocated memory for the local variables in the set.
Definition parser.h:565
uint32_t capacity
The capacity of the local variables set.
Definition parser.h:562
uint32_t size
The number of local variables in the set.
Definition parser.h:559
This represents a range of bytes in the source string to which a node or token corresponds.
Definition ast.h:544
const uint8_t * start
A pointer to the start location of the range in the source.
Definition ast.h:546
const uint8_t * end
A pointer to the end location of the range in the source.
Definition ast.h:549
This is a node in the linked list of magic comments that we've found while parsing.
Definition parser.h:478
MatchLastLineNode.
Definition ast.h:5912
MatchWriteNode.
Definition ast.h:6070
struct pm_node_list targets
MatchWriteNode::targets.
Definition ast.h:6083
MissingNode.
Definition ast.h:6095
MultiTargetNode.
Definition ast.h:6166
pm_node_t base
The embedded base node.
Definition ast.h:6168
pm_location_t lparen_loc
MultiTargetNode::lparen_loc.
Definition ast.h:6224
struct pm_node_list lefts
MultiTargetNode::lefts.
Definition ast.h:6184
pm_location_t rparen_loc
MultiTargetNode::rparen_loc.
Definition ast.h:6234
MultiWriteNode.
Definition ast.h:6249
size_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
A list of nodes in the source, most often used for lists of children.
Definition ast.h:557
size_t size
The number of nodes in the list.
Definition ast.h:559
struct pm_node ** nodes
The nodes in the list.
Definition ast.h:565
This is the base structure that represents a node in the syntax tree.
Definition ast.h:1052
pm_node_type_t type
This represents the type of the node.
Definition ast.h:1057
pm_location_t location
This is the location of the node in the source.
Definition ast.h:1075
OptionalParameterNode.
Definition ast.h:6522
A scope of locals surrounding the code that is being parsed.
Definition options.h:36
size_t locals_count
The number of locals in the scope.
Definition options.h:38
uint8_t forwarding
Flags for the set of forwarding parameters in this scope.
Definition options.h:44
The options that can be passed to the parser.
Definition options.h:110
uint8_t command_line
A bitset of the various options that were set on the command line.
Definition options.h:159
void * shebang_callback_data
Any additional data that should be passed along to the shebang callback if one was set.
Definition options.h:121
bool encoding_locked
Whether or not the encoding magic comments should be respected.
Definition options.h:175
bool main_script
When the file being parsed is the main script, the shebang will be considered for command-line flags ...
Definition options.h:182
pm_string_t encoding
The name of the encoding that the source file is in.
Definition options.h:136
int32_t line
The line within the file that the parse starts on.
Definition options.h:130
pm_options_shebang_callback_t shebang_callback
The callback to call when additional switches are found in a shebang comment.
Definition options.h:115
int8_t frozen_string_literal
Whether or not the frozen string literal option has been set.
Definition options.h:168
bool partial_script
When the file being parsed is considered a "partial" script, jumps will not be marked as errors if th...
Definition options.h:192
size_t scopes_count
The number of scopes surrounding the code that is being parsed.
Definition options.h:141
pm_string_t filepath
The name of the file that is currently being parsed.
Definition options.h:124
pm_options_version_t version
The version of prism that we should be parsing with.
Definition options.h:156
OrNode.
Definition ast.h:6560
struct pm_node * left
OrNode::left.
Definition ast.h:6576
struct pm_node * right
OrNode::right.
Definition ast.h:6589
ParametersNode.
Definition ast.h:6615
struct pm_node * rest
ParametersNode::rest.
Definition ast.h:6633
struct pm_block_parameter_node * block
ParametersNode::block.
Definition ast.h:6653
pm_node_t base
The embedded base node.
Definition ast.h:6617
struct pm_node * keyword_rest
ParametersNode::keyword_rest.
Definition ast.h:6648
ParenthesesNode.
Definition ast.h:6671
struct pm_node * body
ParenthesesNode::body.
Definition ast.h:6679
This struct represents the overall parser.
Definition parser.h:643
const pm_encoding_t * explicit_encoding
When a string-like expression is being lexed, any byte or escape sequence that resolves to a value wh...
Definition parser.h:843
pm_lex_state_t lex_state
The current state of the lexer.
Definition parser.h:652
uint8_t command_line
The command line flags given from the options.
Definition parser.h:862
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
Definition parser.h:758
bool partial_script
Whether or not we are parsing a "partial" script, which is a script that will be evaluated in the con...
Definition parser.h:885
bool pattern_matching_newlines
This flag indicates that we are currently parsing a pattern matching expression and impacts that calc...
Definition parser.h:912
const uint8_t * end
The pointer to the end of the source.
Definition parser.h:697
bool recovering
Whether or not we're currently recovering from a syntax error.
Definition parser.h:891
pm_node_flags_t integer_base
We want to add a flag to integer nodes that indicates their base.
Definition parser.h:800
bool warn_mismatched_indentation
By default, Ruby always warns about mismatched indentation.
Definition parser.h:933
pm_constant_pool_t constant_pool
This constant pool keeps all of the constants defined throughout the file so that we can reference th...
Definition parser.h:789
bool in_keyword_arg
This flag indicates that we are currently parsing a keyword argument.
Definition parser.h:915
const uint8_t * next_start
This is a special field set on the parser when we need the parser to jump to a specific location when...
Definition parser.h:710
pm_static_literals_t * current_hash_keys
The hash keys for the hash that is currently being parsed.
Definition parser.h:752
pm_list_t magic_comment_list
The list of magic comments that have been found while parsing.
Definition parser.h:724
int lambda_enclosure_nesting
Used to temporarily track the nesting of enclosures to determine if a { is the beginning of a lambda ...
Definition parser.h:661
pm_lex_callback_t * lex_callback
This is an optional callback that can be attached to the parser that will be called whenever a new to...
Definition parser.h:777
pm_options_version_t version
The version of prism that we should use to parse.
Definition parser.h:859
pm_token_t previous
The previous token we were considering.
Definition parser.h:700
pm_string_t current_string
This string is used to pass information from the lexer to the parser.
Definition parser.h:806
bool parsing_eval
Whether or not we are parsing an eval string.
Definition parser.h:878
bool current_regular_expression_ascii_only
True if the current regular expression being lexed contains only ASCII characters.
Definition parser.h:927
bool encoding_changed
Whether or not the encoding has been changed by a magic comment.
Definition parser.h:906
pm_location_t data_loc
An optional location that represents the location of the END marker and the rest of the content of th...
Definition parser.h:731
pm_context_node_t * current_context
The current parsing context.
Definition parser.h:743
const uint8_t * start
The pointer to the start of the source.
Definition parser.h:694
int enclosure_nesting
Tracks the current nesting of (), [], and {}.
Definition parser.h:655
pm_list_t error_list
The list of errors that have been found while parsing.
Definition parser.h:737
int8_t frozen_string_literal
Whether or not we have found a frozen_string_literal magic comment with a true or false value.
Definition parser.h:872
pm_node_list_t * current_block_exits
When parsing block exits (e.g., break, next, redo), we need to validate that they are in correct cont...
Definition parser.h:856
const uint8_t * encoding_comment_start
This pointer indicates where a comment must start if it is to be considered an encoding comment.
Definition parser.h:771
pm_lex_mode_t stack[PM_LEX_STACK_SIZE]
The stack of lexer modes.
Definition parser.h:687
pm_list_t warning_list
The list of warnings that have been found while parsing.
Definition parser.h:734
const uint8_t * heredoc_end
This field indicates the end of a heredoc whose identifier was found on the current line.
Definition parser.h:718
int brace_nesting
Used to track the nesting of braces to ensure we get the correct value when we are interpolating bloc...
Definition parser.h:667
pm_encoding_changed_callback_t encoding_changed_callback
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:765
struct pm_parser::@104 lex_modes
A stack of lex modes.
int32_t start_line
The line number at the start of the parse.
Definition parser.h:812
bool encoding_locked
This is very specialized behavior for when you want to parse in a context that does not respect encod...
Definition parser.h:899
pm_lex_mode_t * current
The current mode of the lexer.
Definition parser.h:684
pm_list_t comment_list
The list of comments that have been found while parsing.
Definition parser.h:721
size_t index
The current index into the lexer mode stack.
Definition parser.h:690
pm_string_t filepath
This is the path of the file being parsed.
Definition parser.h:783
pm_scope_t * current_scope
The current local scope.
Definition parser.h:740
bool command_start
Whether or not we're at the beginning of a command.
Definition parser.h:888
pm_newline_list_t newline_list
This is the list of newline offsets in the source file.
Definition parser.h:792
bool semantic_token_seen
Whether or not the parser has seen a token that has semantic meaning (i.e., a token that is not a com...
Definition parser.h:921
uint32_t node_id
The next node identifier that will be assigned.
Definition parser.h:649
RangeNode.
Definition ast.h:6907
struct pm_node * right
RangeNode::right.
Definition ast.h:6937
struct pm_node * left
RangeNode::left.
Definition ast.h:6923
RationalNode.
Definition ast.h:6965
pm_node_t base
The embedded base node.
Definition ast.h:6967
pm_integer_t numerator
RationalNode::numerator.
Definition ast.h:6977
In order to properly set a regular expression's encoding and to validate the byte sequence for the un...
Definition prism.c:9487
pm_buffer_t regexp_buffer
The buffer holding the regexp source.
Definition prism.c:9492
pm_token_buffer_t base
The embedded base buffer.
Definition prism.c:9489
RegularExpressionNode.
Definition ast.h:7032
pm_node_t base
The embedded base node.
Definition ast.h:7034
pm_string_t unescaped
RegularExpressionNode::unescaped.
Definition ast.h:7055
RequiredParameterNode.
Definition ast.h:7106
RescueModifierNode.
Definition ast.h:7129
struct pm_node * rescue_expression
RescueModifierNode::rescue_expression.
Definition ast.h:7147
RescueNode.
Definition ast.h:7167
struct pm_rescue_node * subsequent
RescueNode::subsequent.
Definition ast.h:7205
pm_location_t then_keyword_loc
RescueNode::then_keyword_loc.
Definition ast.h:7195
pm_node_t base
The embedded base node.
Definition ast.h:7169
This struct represents a node in a linked list of scopes.
Definition parser.h:583
struct pm_scope * previous
A pointer to the previous scope in the linked list.
Definition parser.h:585
pm_node_list_t implicit_parameters
This is a list of the implicit parameters contained within the block.
Definition parser.h:596
pm_shareable_constant_value_t shareable_constant
The current state of constant shareability for this scope.
Definition parser.h:623
pm_locals_t locals
The IDs of the locals in the given scope.
Definition parser.h:588
pm_scope_parameters_t parameters
This is a bitfield that indicates the parameters that are being used in this scope.
Definition parser.h:617
bool closed
A boolean indicating whether or not this scope can see into its parent.
Definition parser.h:629
SplatNode.
Definition ast.h:7467
struct pm_node * expression
SplatNode::expression.
Definition ast.h:7480
StatementsNode.
Definition ast.h:7495
struct pm_node_list body
StatementsNode::body.
Definition ast.h:7503
Certain sets of nodes (hash keys and when clauses) check for duplicate nodes to alert the user of pot...
StringNode.
Definition ast.h:7530
pm_node_t base
The embedded base node.
Definition ast.h:7532
pm_string_t unescaped
StringNode::unescaped.
Definition ast.h:7553
pm_location_t closing_loc
StringNode::closing_loc.
Definition ast.h:7548
pm_location_t opening_loc
StringNode::opening_loc.
Definition ast.h:7538
A generic string type that can have various ownership semantics.
Definition pm_string.h:33
const uint8_t * source
A pointer to the start of the string.
Definition pm_string.h:35
size_t length
The length of the string in bytes of memory.
Definition pm_string.h:38
enum pm_string_t::@105 type
The type of the string.
SymbolNode.
Definition ast.h:7626
pm_location_t value_loc
SymbolNode::value_loc.
Definition ast.h:7639
pm_string_t unescaped
SymbolNode::unescaped.
Definition ast.h:7649
When we're lexing certain types (strings, symbols, lists, etc.) we have string content associated wit...
Definition prism.c:9461
pm_buffer_t buffer
The buffer that we're using to keep track of the string content.
Definition prism.c:9466
const uint8_t * cursor
The cursor into the source string that points to how far we have currently copied into the buffer.
Definition prism.c:9472
This struct represents a token in the Ruby source.
Definition ast.h:529
const uint8_t * end
A pointer to the end location of the token in the source.
Definition ast.h:537
const uint8_t * start
A pointer to the start location of the token in the source.
Definition ast.h:534
pm_token_type_t type
The type of the token.
Definition ast.h:531
UndefNode.
Definition ast.h:7682
UnlessNode.
Definition ast.h:7713
struct pm_statements_node * statements
UnlessNode::statements.
Definition ast.h:7763
struct pm_else_node * else_clause
UnlessNode::else_clause.
Definition ast.h:7773
WhenNode.
Definition ast.h:7849
XStringNode.
Definition ast.h:7940