Ruby 4.1.0dev (2025-12-29 revision cb01b9023ec2007c03bddc992416c33f2c59a0e1)
prism.c
1#include "prism.h"
2
6const char *
7pm_version(void) {
8 return PRISM_VERSION;
9}
10
15#define PM_TAB_WHITESPACE_SIZE 8
16
17// Macros for min/max.
18#define MIN(a,b) (((a)<(b))?(a):(b))
19#define MAX(a,b) (((a)>(b))?(a):(b))
20
21/******************************************************************************/
22/* Helpful AST-related macros */
23/******************************************************************************/
24
25#define FL PM_NODE_FLAGS
26#define UP PM_NODE_UPCAST
27
28#define PM_TOKEN_START(token_) ((token_)->start)
29#define PM_TOKEN_END(token_) ((token_)->end)
30
31#define PM_NODE_START(node_) (UP(node_)->location.start)
32#define PM_NODE_END(node_) (UP(node_)->location.end)
33
34#define PM_LOCATION_NULL_VALUE(parser_) ((pm_location_t) { .start = (parser_)->start, .end = (parser_)->start })
35#define PM_LOCATION_TOKEN_VALUE(token_) ((pm_location_t) { .start = PM_TOKEN_START(token_), .end = PM_TOKEN_END(token_) })
36#define PM_LOCATION_NODE_VALUE(node_) ((pm_location_t) { .start = PM_NODE_START(node_), .end = PM_NODE_END(node_) })
37#define PM_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == PM_TOKEN_NOT_PROVIDED ? ((pm_location_t) { 0 }) : PM_LOCATION_TOKEN_VALUE(token))
38
39/******************************************************************************/
40/* Lex mode manipulations */
41/******************************************************************************/
42
47static inline uint8_t
48lex_mode_incrementor(const uint8_t start) {
49 switch (start) {
50 case '(':
51 case '[':
52 case '{':
53 case '<':
54 return start;
55 default:
56 return '\0';
57 }
58}
59
64static inline uint8_t
65lex_mode_terminator(const uint8_t start) {
66 switch (start) {
67 case '(':
68 return ')';
69 case '[':
70 return ']';
71 case '{':
72 return '}';
73 case '<':
74 return '>';
75 default:
76 return start;
77 }
78}
79
85static bool
86lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
87 lex_mode.prev = parser->lex_modes.current;
88 parser->lex_modes.index++;
89
90 if (parser->lex_modes.index > PM_LEX_STACK_SIZE - 1) {
92 if (parser->lex_modes.current == NULL) return false;
93
94 *parser->lex_modes.current = lex_mode;
95 } else {
96 parser->lex_modes.stack[parser->lex_modes.index] = lex_mode;
97 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
98 }
99
100 return true;
101}
102
106static inline bool
107lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
108 uint8_t incrementor = lex_mode_incrementor(delimiter);
109 uint8_t terminator = lex_mode_terminator(delimiter);
110
111 pm_lex_mode_t lex_mode = {
112 .mode = PM_LEX_LIST,
113 .as.list = {
114 .nesting = 0,
115 .interpolation = interpolation,
116 .incrementor = incrementor,
117 .terminator = terminator
118 }
119 };
120
121 // These are the places where we need to split up the content of the list.
122 // We'll use strpbrk to find the first of these characters.
123 uint8_t *breakpoints = lex_mode.as.list.breakpoints;
124 memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
125 size_t index = 7;
126
127 // Now we'll add the terminator to the list of breakpoints. If the
128 // terminator is not already a NULL byte, add it to the list.
129 if (terminator != '\0') {
130 breakpoints[index++] = terminator;
131 }
132
133 // If interpolation is allowed, then we're going to check for the #
134 // character. Otherwise we'll only look for escapes and the terminator.
135 if (interpolation) {
136 breakpoints[index++] = '#';
137 }
138
139 // If there is an incrementor, then we'll check for that as well.
140 if (incrementor != '\0') {
141 breakpoints[index++] = incrementor;
142 }
143
144 parser->explicit_encoding = NULL;
145 return lex_mode_push(parser, lex_mode);
146}
147
153static inline bool
154lex_mode_push_list_eof(pm_parser_t *parser) {
155 return lex_mode_push_list(parser, false, '\0');
156}
157
161static inline bool
162lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
163 pm_lex_mode_t lex_mode = {
164 .mode = PM_LEX_REGEXP,
165 .as.regexp = {
166 .nesting = 0,
167 .incrementor = incrementor,
168 .terminator = terminator
169 }
170 };
171
172 // These are the places where we need to split up the content of the
173 // regular expression. We'll use strpbrk to find the first of these
174 // characters.
175 uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
176 memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
177 size_t index = 4;
178
179 // First we'll add the terminator.
180 if (terminator != '\0') {
181 breakpoints[index++] = terminator;
182 }
183
184 // Next, if there is an incrementor, then we'll check for that as well.
185 if (incrementor != '\0') {
186 breakpoints[index++] = incrementor;
187 }
188
189 parser->explicit_encoding = NULL;
190 return lex_mode_push(parser, lex_mode);
191}
192
196static inline bool
197lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
198 pm_lex_mode_t lex_mode = {
199 .mode = PM_LEX_STRING,
200 .as.string = {
201 .nesting = 0,
202 .interpolation = interpolation,
203 .label_allowed = label_allowed,
204 .incrementor = incrementor,
205 .terminator = terminator
206 }
207 };
208
209 // These are the places where we need to split up the content of the
210 // string. We'll use strpbrk to find the first of these characters.
211 uint8_t *breakpoints = lex_mode.as.string.breakpoints;
212 memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
213 size_t index = 3;
214
215 // Now add in the terminator. If the terminator is not already a NULL byte,
216 // then we'll add it.
217 if (terminator != '\0') {
218 breakpoints[index++] = terminator;
219 }
220
221 // If interpolation is allowed, then we're going to check for the #
222 // character. Otherwise we'll only look for escapes and the terminator.
223 if (interpolation) {
224 breakpoints[index++] = '#';
225 }
226
227 // If we have an incrementor, then we'll add that in as a breakpoint as
228 // well.
229 if (incrementor != '\0') {
230 breakpoints[index++] = incrementor;
231 }
232
233 parser->explicit_encoding = NULL;
234 return lex_mode_push(parser, lex_mode);
235}
236
242static inline bool
243lex_mode_push_string_eof(pm_parser_t *parser) {
244 return lex_mode_push_string(parser, false, false, '\0', '\0');
245}
246
252static void
253lex_mode_pop(pm_parser_t *parser) {
254 if (parser->lex_modes.index == 0) {
255 parser->lex_modes.current->mode = PM_LEX_DEFAULT;
256 } else if (parser->lex_modes.index < PM_LEX_STACK_SIZE) {
257 parser->lex_modes.index--;
258 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
259 } else {
260 parser->lex_modes.index--;
261 pm_lex_mode_t *prev = parser->lex_modes.current->prev;
262 xfree(parser->lex_modes.current);
263 parser->lex_modes.current = prev;
264 }
265}
266
270static inline bool
271lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
272 return parser->lex_state & state;
273}
274
275typedef enum {
276 PM_IGNORED_NEWLINE_NONE = 0,
277 PM_IGNORED_NEWLINE_ALL,
278 PM_IGNORED_NEWLINE_PATTERN
279} pm_ignored_newline_type_t;
280
281static inline pm_ignored_newline_type_t
282lex_state_ignored_p(pm_parser_t *parser) {
283 bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
284
285 if (ignored) {
286 return PM_IGNORED_NEWLINE_ALL;
287 } else if ((parser->lex_state & ~((unsigned int) PM_LEX_STATE_LABEL)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) {
288 return PM_IGNORED_NEWLINE_PATTERN;
289 } else {
290 return PM_IGNORED_NEWLINE_NONE;
291 }
292}
293
294static inline bool
295lex_state_beg_p(pm_parser_t *parser) {
296 return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
297}
298
299static inline bool
300lex_state_arg_p(pm_parser_t *parser) {
301 return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
302}
303
304static inline bool
305lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
306 if (parser->current.end >= parser->end) {
307 return false;
308 }
309 return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
310}
311
312static inline bool
313lex_state_end_p(pm_parser_t *parser) {
314 return lex_state_p(parser, PM_LEX_STATE_END_ANY);
315}
316
320static inline bool
321lex_state_operator_p(pm_parser_t *parser) {
322 return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
323}
324
329static inline void
330lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
331 parser->lex_state = state;
332}
333
334#ifndef PM_DEBUG_LOGGING
339#define PM_DEBUG_LOGGING 0
340#endif
341
342#if PM_DEBUG_LOGGING
343PRISM_ATTRIBUTE_UNUSED static void
344debug_state(pm_parser_t *parser) {
345 fprintf(stderr, "STATE: ");
346 bool first = true;
347
348 if (parser->lex_state == PM_LEX_STATE_NONE) {
349 fprintf(stderr, "NONE\n");
350 return;
351 }
352
353#define CHECK_STATE(state) \
354 if (parser->lex_state & state) { \
355 if (!first) fprintf(stderr, "|"); \
356 fprintf(stderr, "%s", #state); \
357 first = false; \
358 }
359
360 CHECK_STATE(PM_LEX_STATE_BEG)
361 CHECK_STATE(PM_LEX_STATE_END)
362 CHECK_STATE(PM_LEX_STATE_ENDARG)
363 CHECK_STATE(PM_LEX_STATE_ENDFN)
364 CHECK_STATE(PM_LEX_STATE_ARG)
365 CHECK_STATE(PM_LEX_STATE_CMDARG)
366 CHECK_STATE(PM_LEX_STATE_MID)
367 CHECK_STATE(PM_LEX_STATE_FNAME)
368 CHECK_STATE(PM_LEX_STATE_DOT)
369 CHECK_STATE(PM_LEX_STATE_CLASS)
370 CHECK_STATE(PM_LEX_STATE_LABEL)
371 CHECK_STATE(PM_LEX_STATE_LABELED)
372 CHECK_STATE(PM_LEX_STATE_FITEM)
373
374#undef CHECK_STATE
375
376 fprintf(stderr, "\n");
377}
378
379static void
380debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * caller_name, int line_number) {
381 fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
382 debug_state(parser);
383 lex_state_set(parser, state);
384 fprintf(stderr, "Now: ");
385 debug_state(parser);
386 fprintf(stderr, "\n");
387}
388
389#define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
390#endif
391
392/******************************************************************************/
393/* Command-line macro helpers */
394/******************************************************************************/
395
397#define PM_PARSER_COMMAND_LINE_OPTION(parser, option) ((parser)->command_line & (option))
398
400#define PM_PARSER_COMMAND_LINE_OPTION_A(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_A)
401
403#define PM_PARSER_COMMAND_LINE_OPTION_E(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_E)
404
406#define PM_PARSER_COMMAND_LINE_OPTION_L(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_L)
407
409#define PM_PARSER_COMMAND_LINE_OPTION_N(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_N)
410
412#define PM_PARSER_COMMAND_LINE_OPTION_P(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_P)
413
415#define PM_PARSER_COMMAND_LINE_OPTION_X(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_X)
416
417/******************************************************************************/
418/* Diagnostic-related functions */
419/******************************************************************************/
420
424static inline void
425pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
426 pm_diagnostic_list_append(&parser->error_list, start, end, diag_id);
427}
428
432#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
433 pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
434
439static inline void
440pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
441 pm_parser_err(parser, parser->current.start, parser->current.end, diag_id);
442}
443
448#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
449 PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
450
455static inline void
456pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
457 pm_parser_err(parser, node->location.start, node->location.end, diag_id);
458}
459
464#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
465 PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
466
471#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
472 PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
473
478static inline void
479pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
480 pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id);
481}
482
487static inline void
488pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
489 pm_parser_err(parser, token->start, token->end, diag_id);
490}
491
496#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
497 PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
498
503#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
504 PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
505
509static inline void
510pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
511 pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
512}
513
518static inline void
519pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
520 pm_parser_warn(parser, token->start, token->end, diag_id);
521}
522
527static inline void
528pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
529 pm_parser_warn(parser, node->location.start, node->location.end, diag_id);
530}
531
535#define PM_PARSER_WARN_FORMAT(parser, start, end, diag_id, ...) \
536 pm_diagnostic_list_append_format(&parser->warning_list, start, end, diag_id, __VA_ARGS__)
537
542#define PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, ...) \
543 PM_PARSER_WARN_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
544
549#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
550 PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
551
556#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
557 PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
558
564static void
565pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
566 PM_PARSER_ERR_FORMAT(
567 parser,
568 ident_start,
569 ident_start + ident_length,
570 PM_ERR_HEREDOC_TERM,
571 (int) ident_length,
572 (const char *) ident_start
573 );
574}
575
576/******************************************************************************/
577/* Scope-related functions */
578/******************************************************************************/
579
583static bool
584pm_parser_scope_push(pm_parser_t *parser, bool closed) {
585 pm_scope_t *scope = (pm_scope_t *) xmalloc(sizeof(pm_scope_t));
586 if (scope == NULL) return false;
587
588 *scope = (pm_scope_t) {
589 .previous = parser->current_scope,
590 .locals = { 0 },
591 .parameters = PM_SCOPE_PARAMETERS_NONE,
592 .implicit_parameters = { 0 },
593 .shareable_constant = parser->current_scope == NULL ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
594 .closed = closed
595 };
596
597 parser->current_scope = scope;
598 return true;
599}
600
605static bool
606pm_parser_scope_toplevel_p(pm_parser_t *parser) {
607 pm_scope_t *scope = parser->current_scope;
608
609 do {
610 if (scope->previous == NULL) return true;
611 if (scope->closed) return false;
612 } while ((scope = scope->previous) != NULL);
613
614 assert(false && "unreachable");
615 return true;
616}
617
621static pm_scope_t *
622pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
623 pm_scope_t *scope = parser->current_scope;
624
625 while (depth-- > 0) {
626 assert(scope != NULL);
627 scope = scope->previous;
628 }
629
630 return scope;
631}
632
633typedef enum {
634 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
635 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
636 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
637} pm_scope_forwarding_param_check_result_t;
638
639static pm_scope_forwarding_param_check_result_t
640pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
641 pm_scope_t *scope = parser->current_scope;
642 bool conflict = false;
643
644 while (scope != NULL) {
645 if (scope->parameters & mask) {
646 if (scope->closed) {
647 if (conflict) {
648 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
649 } else {
650 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
651 }
652 }
653
654 conflict = true;
655 }
656
657 if (scope->closed) break;
658 scope = scope->previous;
659 }
660
661 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
662}
663
664static void
665pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
666 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
667 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
668 // Pass.
669 break;
670 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
671 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
672 break;
673 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
674 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
675 break;
676 }
677}
678
679static void
680pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
681 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
682 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
683 // Pass.
684 break;
685 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
686 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
687 break;
688 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
689 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
690 break;
691 }
692}
693
694static void
695pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
696 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
697 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
698 // Pass.
699 break;
700 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
701 // This shouldn't happen, because ... is not allowed in the
702 // declaration of blocks. If we get here, we assume we already have
703 // an error for this.
704 break;
705 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
706 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
707 break;
708 }
709}
710
711static void
712pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
713 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
714 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
715 // Pass.
716 break;
717 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
718 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
719 break;
720 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
721 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
722 break;
723 }
724}
725
730pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
731 return parser->current_scope->shareable_constant;
732}
733
738static void
739pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constant_value_t shareable_constant) {
740 pm_scope_t *scope = parser->current_scope;
741
742 do {
743 scope->shareable_constant = shareable_constant;
744 } while (!scope->closed && (scope = scope->previous) != NULL);
745}
746
747/******************************************************************************/
748/* Local variable-related functions */
749/******************************************************************************/
750
754#define PM_LOCALS_HASH_THRESHOLD 9
755
756static void
757pm_locals_free(pm_locals_t *locals) {
758 if (locals->capacity > 0) {
759 xfree(locals->locals);
760 }
761}
762
767static uint32_t
768pm_locals_hash(pm_constant_id_t name) {
769 name = ((name >> 16) ^ name) * 0x45d9f3b;
770 name = ((name >> 16) ^ name) * 0x45d9f3b;
771 name = (name >> 16) ^ name;
772 return name;
773}
774
779static void
780pm_locals_resize(pm_locals_t *locals) {
781 uint32_t next_capacity = locals->capacity == 0 ? 4 : (locals->capacity * 2);
782 assert(next_capacity > locals->capacity);
783
784 pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
785 if (next_locals == NULL) abort();
786
787 if (next_capacity < PM_LOCALS_HASH_THRESHOLD) {
788 if (locals->size > 0) {
789 memcpy(next_locals, locals->locals, locals->size * sizeof(pm_local_t));
790 }
791 } else {
792 // If we just switched from a list to a hash, then we need to fill in
793 // the hash values of all of the locals.
794 bool hash_needed = (locals->capacity <= PM_LOCALS_HASH_THRESHOLD);
795 uint32_t mask = next_capacity - 1;
796
797 for (uint32_t index = 0; index < locals->capacity; index++) {
798 pm_local_t *local = &locals->locals[index];
799
800 if (local->name != PM_CONSTANT_ID_UNSET) {
801 if (hash_needed) local->hash = pm_locals_hash(local->name);
802
803 uint32_t hash = local->hash;
804 while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
805 next_locals[hash & mask] = *local;
806 }
807 }
808 }
809
810 pm_locals_free(locals);
811 locals->locals = next_locals;
812 locals->capacity = next_capacity;
813}
814
830static bool
831pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start, const uint8_t *end, uint32_t reads) {
832 if (locals->size >= (locals->capacity / 4 * 3)) {
833 pm_locals_resize(locals);
834 }
835
836 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
837 for (uint32_t index = 0; index < locals->capacity; index++) {
838 pm_local_t *local = &locals->locals[index];
839
840 if (local->name == PM_CONSTANT_ID_UNSET) {
841 *local = (pm_local_t) {
842 .name = name,
843 .location = { .start = start, .end = end },
844 .index = locals->size++,
845 .reads = reads,
846 .hash = 0
847 };
848 return true;
849 } else if (local->name == name) {
850 return false;
851 }
852 }
853 } else {
854 uint32_t mask = locals->capacity - 1;
855 uint32_t hash = pm_locals_hash(name);
856 uint32_t initial_hash = hash;
857
858 do {
859 pm_local_t *local = &locals->locals[hash & mask];
860
861 if (local->name == PM_CONSTANT_ID_UNSET) {
862 *local = (pm_local_t) {
863 .name = name,
864 .location = { .start = start, .end = end },
865 .index = locals->size++,
866 .reads = reads,
867 .hash = initial_hash
868 };
869 return true;
870 } else if (local->name == name) {
871 return false;
872 } else {
873 hash++;
874 }
875 } while ((hash & mask) != initial_hash);
876 }
877
878 assert(false && "unreachable");
879 return true;
880}
881
886static uint32_t
887pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
888 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
889 for (uint32_t index = 0; index < locals->size; index++) {
890 pm_local_t *local = &locals->locals[index];
891 if (local->name == name) return index;
892 }
893 } else {
894 uint32_t mask = locals->capacity - 1;
895 uint32_t hash = pm_locals_hash(name);
896 uint32_t initial_hash = hash & mask;
897
898 do {
899 pm_local_t *local = &locals->locals[hash & mask];
900
901 if (local->name == PM_CONSTANT_ID_UNSET) {
902 return UINT32_MAX;
903 } else if (local->name == name) {
904 return hash & mask;
905 } else {
906 hash++;
907 }
908 } while ((hash & mask) != initial_hash);
909 }
910
911 return UINT32_MAX;
912}
913
918static void
919pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
920 uint32_t index = pm_locals_find(locals, name);
921 assert(index != UINT32_MAX);
922
923 pm_local_t *local = &locals->locals[index];
924 assert(local->reads < UINT32_MAX);
925
926 local->reads++;
927}
928
933static void
934pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
935 uint32_t index = pm_locals_find(locals, name);
936 assert(index != UINT32_MAX);
937
938 pm_local_t *local = &locals->locals[index];
939 assert(local->reads > 0);
940
941 local->reads--;
942}
943
947static uint32_t
948pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
949 uint32_t index = pm_locals_find(locals, name);
950 assert(index != UINT32_MAX);
951
952 return locals->locals[index].reads;
953}
954
963static void
964pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
965 pm_constant_id_list_init_capacity(list, locals->size);
966
967 // If we're still below the threshold for switching to a hash, then we only
968 // need to loop over the locals until we hit the size because the locals are
969 // stored in a list.
970 uint32_t capacity = locals->capacity < PM_LOCALS_HASH_THRESHOLD ? locals->size : locals->capacity;
971
972 // We will only warn for unused variables if we're not at the top level, or
973 // if we're parsing a file outside of eval or -e.
974 bool warn_unused = !toplevel || (!parser->parsing_eval && !PM_PARSER_COMMAND_LINE_OPTION_E(parser));
975
976 for (uint32_t index = 0; index < capacity; index++) {
977 pm_local_t *local = &locals->locals[index];
978
979 if (local->name != PM_CONSTANT_ID_UNSET) {
980 pm_constant_id_list_insert(list, (size_t) local->index, local->name);
981
982 if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_newline_list_line(&parser->newline_list, local->location.start, parser->start_line) >= 0))) {
983 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
984
985 if (constant->length >= 1 && *constant->start != '_') {
986 PM_PARSER_WARN_FORMAT(
987 parser,
988 local->location.start,
989 local->location.end,
990 PM_WARN_UNUSED_LOCAL_VARIABLE,
991 (int) constant->length,
992 (const char *) constant->start
993 );
994 }
995 }
996 }
997 }
998}
999
1000/******************************************************************************/
1001/* Node-related functions */
1002/******************************************************************************/
1003
1007static inline pm_constant_id_t
1008pm_parser_constant_id_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
1009 return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
1010}
1011
1015static inline pm_constant_id_t
1016pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
1017 return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
1018}
1019
1023static inline pm_constant_id_t
1024pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
1025 return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length);
1026}
1027
1031static inline pm_constant_id_t
1032pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1033 return pm_parser_constant_id_location(parser, token->start, token->end);
1034}
1035
1040static inline pm_constant_id_t
1041pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1042 return token->type == PM_TOKEN_NOT_PROVIDED ? 0 : pm_parser_constant_id_token(parser, token);
1043}
1044
1050static pm_node_t *
1051pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
1052 pm_node_t *void_node = NULL;
1053
1054 while (node != NULL) {
1055 switch (PM_NODE_TYPE(node)) {
1056 case PM_RETURN_NODE:
1057 case PM_BREAK_NODE:
1058 case PM_NEXT_NODE:
1059 case PM_REDO_NODE:
1060 case PM_RETRY_NODE:
1061 case PM_MATCH_REQUIRED_NODE:
1062 return void_node != NULL ? void_node : node;
1063 case PM_MATCH_PREDICATE_NODE:
1064 return NULL;
1065 case PM_BEGIN_NODE: {
1066 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1067
1068 if (cast->ensure_clause != NULL) {
1069 if (cast->rescue_clause != NULL) {
1070 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->rescue_clause));
1071 if (vn != NULL) return vn;
1072 }
1073
1074 if (cast->statements != NULL) {
1075 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1076 if (vn != NULL) return vn;
1077 }
1078
1079 node = UP(cast->ensure_clause);
1080 } else if (cast->rescue_clause != NULL) {
1081 if (cast->statements == NULL) return NULL;
1082
1083 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1084 if (vn == NULL) return NULL;
1085 if (void_node == NULL) void_node = vn;
1086
1087 for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
1088 pm_node_t *vn = pm_check_value_expression(parser, UP(rescue_clause->statements));
1089 if (vn == NULL) {
1090 void_node = NULL;
1091 break;
1092 }
1093 if (void_node == NULL) {
1094 void_node = vn;
1095 }
1096 }
1097
1098 if (cast->else_clause != NULL) {
1099 node = UP(cast->else_clause);
1100 } else {
1101 return void_node;
1102 }
1103 } else {
1104 node = UP(cast->statements);
1105 }
1106
1107 break;
1108 }
1109 case PM_ENSURE_NODE: {
1110 pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
1111 node = UP(cast->statements);
1112 break;
1113 }
1114 case PM_PARENTHESES_NODE: {
1116 node = UP(cast->body);
1117 break;
1118 }
1119 case PM_STATEMENTS_NODE: {
1121 node = cast->body.nodes[cast->body.size - 1];
1122 break;
1123 }
1124 case PM_IF_NODE: {
1125 pm_if_node_t *cast = (pm_if_node_t *) node;
1126 if (cast->statements == NULL || cast->subsequent == NULL) {
1127 return NULL;
1128 }
1129 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1130 if (vn == NULL) {
1131 return NULL;
1132 }
1133 if (void_node == NULL) {
1134 void_node = vn;
1135 }
1136 node = cast->subsequent;
1137 break;
1138 }
1139 case PM_UNLESS_NODE: {
1140 pm_unless_node_t *cast = (pm_unless_node_t *) node;
1141 if (cast->statements == NULL || cast->else_clause == NULL) {
1142 return NULL;
1143 }
1144 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1145 if (vn == NULL) {
1146 return NULL;
1147 }
1148 if (void_node == NULL) {
1149 void_node = vn;
1150 }
1151 node = UP(cast->else_clause);
1152 break;
1153 }
1154 case PM_ELSE_NODE: {
1155 pm_else_node_t *cast = (pm_else_node_t *) node;
1156 node = UP(cast->statements);
1157 break;
1158 }
1159 case PM_AND_NODE: {
1160 pm_and_node_t *cast = (pm_and_node_t *) node;
1161 node = cast->left;
1162 break;
1163 }
1164 case PM_OR_NODE: {
1165 pm_or_node_t *cast = (pm_or_node_t *) node;
1166 node = cast->left;
1167 break;
1168 }
1169 case PM_LOCAL_VARIABLE_WRITE_NODE: {
1171
1172 pm_scope_t *scope = parser->current_scope;
1173 for (uint32_t depth = 0; depth < cast->depth; depth++) scope = scope->previous;
1174
1175 pm_locals_read(&scope->locals, cast->name);
1176 return NULL;
1177 }
1178 default:
1179 return NULL;
1180 }
1181 }
1182
1183 return NULL;
1184}
1185
1186static inline void
1187pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
1188 pm_node_t *void_node = pm_check_value_expression(parser, node);
1189 if (void_node != NULL) {
1190 pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
1191 }
1192}
1193
1197static void
1198pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
1199 const char *type = NULL;
1200 int length = 0;
1201
1202 switch (PM_NODE_TYPE(node)) {
1203 case PM_BACK_REFERENCE_READ_NODE:
1204 case PM_CLASS_VARIABLE_READ_NODE:
1205 case PM_GLOBAL_VARIABLE_READ_NODE:
1206 case PM_INSTANCE_VARIABLE_READ_NODE:
1207 case PM_LOCAL_VARIABLE_READ_NODE:
1208 case PM_NUMBERED_REFERENCE_READ_NODE:
1209 type = "a variable";
1210 length = 10;
1211 break;
1212 case PM_CALL_NODE: {
1213 const pm_call_node_t *cast = (const pm_call_node_t *) node;
1214 if (cast->call_operator_loc.start != NULL || cast->message_loc.start == NULL) break;
1215
1216 const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
1217 switch (message->length) {
1218 case 1:
1219 switch (message->start[0]) {
1220 case '+':
1221 case '-':
1222 case '*':
1223 case '/':
1224 case '%':
1225 case '|':
1226 case '^':
1227 case '&':
1228 case '>':
1229 case '<':
1230 type = (const char *) message->start;
1231 length = 1;
1232 break;
1233 }
1234 break;
1235 case 2:
1236 switch (message->start[1]) {
1237 case '=':
1238 if (message->start[0] == '<' || message->start[0] == '>' || message->start[0] == '!' || message->start[0] == '=') {
1239 type = (const char *) message->start;
1240 length = 2;
1241 }
1242 break;
1243 case '@':
1244 if (message->start[0] == '+' || message->start[0] == '-') {
1245 type = (const char *) message->start;
1246 length = 2;
1247 }
1248 break;
1249 case '*':
1250 if (message->start[0] == '*') {
1251 type = (const char *) message->start;
1252 length = 2;
1253 }
1254 break;
1255 }
1256 break;
1257 case 3:
1258 if (memcmp(message->start, "<=>", 3) == 0) {
1259 type = "<=>";
1260 length = 3;
1261 }
1262 break;
1263 }
1264
1265 break;
1266 }
1267 case PM_CONSTANT_PATH_NODE:
1268 type = "::";
1269 length = 2;
1270 break;
1271 case PM_CONSTANT_READ_NODE:
1272 type = "a constant";
1273 length = 10;
1274 break;
1275 case PM_DEFINED_NODE:
1276 type = "defined?";
1277 length = 8;
1278 break;
1279 case PM_FALSE_NODE:
1280 type = "false";
1281 length = 5;
1282 break;
1283 case PM_FLOAT_NODE:
1284 case PM_IMAGINARY_NODE:
1285 case PM_INTEGER_NODE:
1286 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1287 case PM_INTERPOLATED_STRING_NODE:
1288 case PM_RATIONAL_NODE:
1289 case PM_REGULAR_EXPRESSION_NODE:
1290 case PM_SOURCE_ENCODING_NODE:
1291 case PM_SOURCE_FILE_NODE:
1292 case PM_SOURCE_LINE_NODE:
1293 case PM_STRING_NODE:
1294 case PM_SYMBOL_NODE:
1295 type = "a literal";
1296 length = 9;
1297 break;
1298 case PM_NIL_NODE:
1299 type = "nil";
1300 length = 3;
1301 break;
1302 case PM_RANGE_NODE: {
1303 const pm_range_node_t *cast = (const pm_range_node_t *) node;
1304
1305 if (PM_NODE_FLAG_P(cast, PM_RANGE_FLAGS_EXCLUDE_END)) {
1306 type = "...";
1307 length = 3;
1308 } else {
1309 type = "..";
1310 length = 2;
1311 }
1312
1313 break;
1314 }
1315 case PM_SELF_NODE:
1316 type = "self";
1317 length = 4;
1318 break;
1319 case PM_TRUE_NODE:
1320 type = "true";
1321 length = 4;
1322 break;
1323 default:
1324 break;
1325 }
1326
1327 if (type != NULL) {
1328 PM_PARSER_WARN_NODE_FORMAT(parser, node, PM_WARN_VOID_STATEMENT, length, type);
1329 }
1330}
1331
1336static void
1337pm_void_statements_check(pm_parser_t *parser, const pm_statements_node_t *node, bool last_value) {
1338 assert(node->body.size > 0);
1339 const size_t size = node->body.size - (last_value ? 1 : 0);
1340 for (size_t index = 0; index < size; index++) {
1341 pm_void_statement_check(parser, node->body.nodes[index]);
1342 }
1343}
1344
1350typedef enum {
1351 PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL,
1352 PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP,
1353 PM_CONDITIONAL_PREDICATE_TYPE_NOT
1354} pm_conditional_predicate_type_t;
1355
1359static void
1360pm_parser_warn_conditional_predicate_literal(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type, pm_diagnostic_id_t diag_id, const char *prefix) {
1361 switch (type) {
1362 case PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL:
1363 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "condition");
1364 break;
1365 case PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP:
1366 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "flip-flop");
1367 break;
1368 case PM_CONDITIONAL_PREDICATE_TYPE_NOT:
1369 break;
1370 }
1371}
1372
1377static bool
1378pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1379 switch (PM_NODE_TYPE(node)) {
1380 case PM_ARRAY_NODE: {
1381 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1382
1383 const pm_array_node_t *cast = (const pm_array_node_t *) node;
1384 for (size_t index = 0; index < cast->elements.size; index++) {
1385 if (!pm_conditional_predicate_warn_write_literal_p(cast->elements.nodes[index])) return false;
1386 }
1387
1388 return true;
1389 }
1390 case PM_HASH_NODE: {
1391 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1392
1393 const pm_hash_node_t *cast = (const pm_hash_node_t *) node;
1394 for (size_t index = 0; index < cast->elements.size; index++) {
1395 const pm_node_t *element = cast->elements.nodes[index];
1396 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) return false;
1397
1398 const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element;
1399 if (!pm_conditional_predicate_warn_write_literal_p(assoc->key) || !pm_conditional_predicate_warn_write_literal_p(assoc->value)) return false;
1400 }
1401
1402 return true;
1403 }
1404 case PM_FALSE_NODE:
1405 case PM_FLOAT_NODE:
1406 case PM_IMAGINARY_NODE:
1407 case PM_INTEGER_NODE:
1408 case PM_NIL_NODE:
1409 case PM_RATIONAL_NODE:
1410 case PM_REGULAR_EXPRESSION_NODE:
1411 case PM_SOURCE_ENCODING_NODE:
1412 case PM_SOURCE_FILE_NODE:
1413 case PM_SOURCE_LINE_NODE:
1414 case PM_STRING_NODE:
1415 case PM_SYMBOL_NODE:
1416 case PM_TRUE_NODE:
1417 return true;
1418 default:
1419 return false;
1420 }
1421}
1422
1427static inline void
1428pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1429 if (pm_conditional_predicate_warn_write_literal_p(node)) {
1430 pm_parser_warn_node(parser, node, parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1431 }
1432}
1433
1446static void
1447pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type) {
1448 switch (PM_NODE_TYPE(node)) {
1449 case PM_AND_NODE: {
1450 pm_and_node_t *cast = (pm_and_node_t *) node;
1451 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1452 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1453 break;
1454 }
1455 case PM_OR_NODE: {
1456 pm_or_node_t *cast = (pm_or_node_t *) node;
1457 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1458 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1459 break;
1460 }
1461 case PM_PARENTHESES_NODE: {
1463
1464 if ((cast->body != NULL) && PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE)) {
1465 pm_statements_node_t *statements = (pm_statements_node_t *) cast->body;
1466 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1467 }
1468
1469 break;
1470 }
1471 case PM_BEGIN_NODE: {
1472 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1473 if (cast->statements != NULL) {
1474 pm_statements_node_t *statements = cast->statements;
1475 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1476 }
1477 break;
1478 }
1479 case PM_RANGE_NODE: {
1480 pm_range_node_t *cast = (pm_range_node_t *) node;
1481
1482 if (cast->left != NULL) pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1483 if (cast->right != NULL) pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1484
1485 // Here we change the range node into a flip flop node. We can do
1486 // this since the nodes are exactly the same except for the type.
1487 // We're only asserting against the size when we should probably
1488 // assert against the entire layout, but we'll assume tests will
1489 // catch this.
1490 assert(sizeof(pm_range_node_t) == sizeof(pm_flip_flop_node_t));
1491 node->type = PM_FLIP_FLOP_NODE;
1492
1493 break;
1494 }
1495 case PM_REGULAR_EXPRESSION_NODE:
1496 // Here we change the regular expression node into a match last line
1497 // node. We can do this since the nodes are exactly the same except
1498 // for the type.
1500 node->type = PM_MATCH_LAST_LINE_NODE;
1501
1502 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1503 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "regex ");
1504 }
1505
1506 break;
1507 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1508 // Here we change the interpolated regular expression node into an
1509 // interpolated match last line node. We can do this since the nodes
1510 // are exactly the same except for the type.
1512 node->type = PM_INTERPOLATED_MATCH_LAST_LINE_NODE;
1513
1514 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1515 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "regex ");
1516 }
1517
1518 break;
1519 case PM_INTEGER_NODE:
1520 if (type == PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP) {
1521 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1522 pm_parser_warn_node(parser, node, PM_WARN_INTEGER_IN_FLIP_FLOP);
1523 }
1524 } else {
1525 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1526 }
1527 break;
1528 case PM_STRING_NODE:
1529 case PM_SOURCE_FILE_NODE:
1530 case PM_INTERPOLATED_STRING_NODE:
1531 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "string ");
1532 break;
1533 case PM_SYMBOL_NODE:
1534 case PM_INTERPOLATED_SYMBOL_NODE:
1535 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "symbol ");
1536 break;
1537 case PM_SOURCE_LINE_NODE:
1538 case PM_SOURCE_ENCODING_NODE:
1539 case PM_FLOAT_NODE:
1540 case PM_RATIONAL_NODE:
1541 case PM_IMAGINARY_NODE:
1542 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1543 break;
1544 case PM_CLASS_VARIABLE_WRITE_NODE:
1545 pm_conditional_predicate_warn_write_literal(parser, ((pm_class_variable_write_node_t *) node)->value);
1546 break;
1547 case PM_CONSTANT_WRITE_NODE:
1548 pm_conditional_predicate_warn_write_literal(parser, ((pm_constant_write_node_t *) node)->value);
1549 break;
1550 case PM_GLOBAL_VARIABLE_WRITE_NODE:
1551 pm_conditional_predicate_warn_write_literal(parser, ((pm_global_variable_write_node_t *) node)->value);
1552 break;
1553 case PM_INSTANCE_VARIABLE_WRITE_NODE:
1554 pm_conditional_predicate_warn_write_literal(parser, ((pm_instance_variable_write_node_t *) node)->value);
1555 break;
1556 case PM_LOCAL_VARIABLE_WRITE_NODE:
1557 pm_conditional_predicate_warn_write_literal(parser, ((pm_local_variable_write_node_t *) node)->value);
1558 break;
1559 case PM_MULTI_WRITE_NODE:
1560 pm_conditional_predicate_warn_write_literal(parser, ((pm_multi_write_node_t *) node)->value);
1561 break;
1562 default:
1563 break;
1564 }
1565}
1566
1575static inline pm_token_t
1576not_provided(pm_parser_t *parser) {
1577 return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
1578}
1579
1602
1606static inline const uint8_t *
1607pm_arguments_end(pm_arguments_t *arguments) {
1608 if (arguments->block != NULL) {
1609 const uint8_t *end = arguments->block->location.end;
1610 if (arguments->closing_loc.start != NULL && arguments->closing_loc.end > end) {
1611 end = arguments->closing_loc.end;
1612 }
1613 return end;
1614 }
1615 if (arguments->closing_loc.start != NULL) {
1616 return arguments->closing_loc.end;
1617 }
1618 if (arguments->arguments != NULL) {
1619 return arguments->arguments->base.location.end;
1620 }
1621 return arguments->closing_loc.end;
1622}
1623
1628static void
1629pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
1630 // First, check that we have arguments and that we don't have a closing
1631 // location for them.
1632 if (arguments->arguments == NULL || arguments->closing_loc.start != NULL) {
1633 return;
1634 }
1635
1636 // Next, check that we don't have a single parentheses argument. This would
1637 // look like:
1638 //
1639 // foo (1) {}
1640 //
1641 // In this case, it's actually okay for the block to be attached to the
1642 // call, even though it looks like it's attached to the argument.
1643 if (arguments->arguments->arguments.size == 1 && PM_NODE_TYPE_P(arguments->arguments->arguments.nodes[0], PM_PARENTHESES_NODE)) {
1644 return;
1645 }
1646
1647 // If we didn't hit a case before this check, then at this point we need to
1648 // add a syntax error.
1649 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
1650}
1651
1652/******************************************************************************/
1653/* Basic character checks */
1654/******************************************************************************/
1655
1662static inline size_t
1663char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1664 if (n <= 0) return 0;
1665
1666 if (parser->encoding_changed) {
1667 size_t width;
1668
1669 if ((width = parser->encoding->alpha_char(b, n)) != 0) {
1670 return width;
1671 } else if (*b == '_') {
1672 return 1;
1673 } else if (*b >= 0x80) {
1674 return parser->encoding->char_width(b, n);
1675 } else {
1676 return 0;
1677 }
1678 } else if (*b < 0x80) {
1679 return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
1680 } else {
1681 return pm_encoding_utf_8_char_width(b, n);
1682 }
1683}
1684
1689static inline size_t
1690char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
1691 if (n <= 0) {
1692 return 0;
1693 } else if (*b < 0x80) {
1694 return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
1695 } else {
1696 return pm_encoding_utf_8_char_width(b, n);
1697 }
1698}
1699
1705static inline size_t
1706char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1707 if (n <= 0) {
1708 return 0;
1709 } else if (parser->encoding_changed) {
1710 size_t width;
1711
1712 if ((width = parser->encoding->alnum_char(b, n)) != 0) {
1713 return width;
1714 } else if (*b == '_') {
1715 return 1;
1716 } else if (*b >= 0x80) {
1717 return parser->encoding->char_width(b, n);
1718 } else {
1719 return 0;
1720 }
1721 } else {
1722 return char_is_identifier_utf8(b, n);
1723 }
1724}
1725
1726// Here we're defining a perfect hash for the characters that are allowed in
1727// global names. This is used to quickly check the next character after a $ to
1728// see if it's a valid character for a global name.
1729#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
1730#define PUNCT(idx) ( \
1731 BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
1732 BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
1733 BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
1734 BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
1735 BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
1736 BIT('0', idx))
1737
1738const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
1739
1740#undef BIT
1741#undef PUNCT
1742
1743static inline bool
1744char_is_global_name_punctuation(const uint8_t b) {
1745 const unsigned int i = (const unsigned int) b;
1746 if (i <= 0x20 || 0x7e < i) return false;
1747
1748 return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
1749}
1750
1751static inline bool
1752token_is_setter_name(pm_token_t *token) {
1753 return (
1754 (token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
1755 ((token->type == PM_TOKEN_IDENTIFIER) &&
1756 (token->end - token->start >= 2) &&
1757 (token->end[-1] == '='))
1758 );
1759}
1760
1764static bool
1765pm_local_is_keyword(const char *source, size_t length) {
1766#define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
1767
1768 switch (length) {
1769 case 2:
1770 switch (source[0]) {
1771 case 'd': KEYWORD("do"); return false;
1772 case 'i': KEYWORD("if"); KEYWORD("in"); return false;
1773 case 'o': KEYWORD("or"); return false;
1774 default: return false;
1775 }
1776 case 3:
1777 switch (source[0]) {
1778 case 'a': KEYWORD("and"); return false;
1779 case 'd': KEYWORD("def"); return false;
1780 case 'e': KEYWORD("end"); return false;
1781 case 'f': KEYWORD("for"); return false;
1782 case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
1783 default: return false;
1784 }
1785 case 4:
1786 switch (source[0]) {
1787 case 'c': KEYWORD("case"); return false;
1788 case 'e': KEYWORD("else"); return false;
1789 case 'n': KEYWORD("next"); return false;
1790 case 'r': KEYWORD("redo"); return false;
1791 case 's': KEYWORD("self"); return false;
1792 case 't': KEYWORD("then"); KEYWORD("true"); return false;
1793 case 'w': KEYWORD("when"); return false;
1794 default: return false;
1795 }
1796 case 5:
1797 switch (source[0]) {
1798 case 'a': KEYWORD("alias"); return false;
1799 case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
1800 case 'c': KEYWORD("class"); return false;
1801 case 'e': KEYWORD("elsif"); return false;
1802 case 'f': KEYWORD("false"); return false;
1803 case 'r': KEYWORD("retry"); return false;
1804 case 's': KEYWORD("super"); return false;
1805 case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
1806 case 'w': KEYWORD("while"); return false;
1807 case 'y': KEYWORD("yield"); return false;
1808 default: return false;
1809 }
1810 case 6:
1811 switch (source[0]) {
1812 case 'e': KEYWORD("ensure"); return false;
1813 case 'm': KEYWORD("module"); return false;
1814 case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
1815 case 'u': KEYWORD("unless"); return false;
1816 default: return false;
1817 }
1818 case 8:
1819 KEYWORD("__LINE__");
1820 KEYWORD("__FILE__");
1821 return false;
1822 case 12:
1823 KEYWORD("__ENCODING__");
1824 return false;
1825 default:
1826 return false;
1827 }
1828
1829#undef KEYWORD
1830}
1831
1832/******************************************************************************/
1833/* Node flag handling functions */
1834/******************************************************************************/
1835
1839static inline void
1840pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
1841 node->flags |= flag;
1842}
1843
1847static inline void
1848pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
1849 node->flags &= (pm_node_flags_t) ~flag;
1850}
1851
1855static inline void
1856pm_node_flag_set_repeated_parameter(pm_node_t *node) {
1857 assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
1858 PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
1859 PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
1860 PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
1861 PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
1862 PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
1863 PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
1864 PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
1865
1866 pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
1867}
1868
1869/******************************************************************************/
1870/* Node creation functions */
1871/******************************************************************************/
1872
1878#define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
1879
1883static inline pm_node_flags_t
1884pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
1885 pm_node_flags_t flags = 0;
1886
1887 if (closing->type == PM_TOKEN_REGEXP_END) {
1888 pm_buffer_t unknown_flags = { 0 };
1889
1890 for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
1891 switch (*flag) {
1892 case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
1893 case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
1894 case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
1895 case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
1896
1897 case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
1898 case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
1899 case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
1900 case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
1901
1902 default: pm_buffer_append_byte(&unknown_flags, *flag);
1903 }
1904 }
1905
1906 size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
1907 if (unknown_flags_length != 0) {
1908 const char *word = unknown_flags_length >= 2 ? "options" : "option";
1909 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
1910 }
1911 pm_buffer_free(&unknown_flags);
1912 }
1913
1914 return flags;
1915}
1916
1917#undef PM_REGULAR_EXPRESSION_ENCODING_MASK
1918
1919static pm_statements_node_t *
1920pm_statements_node_create(pm_parser_t *parser);
1921
1922static void
1923pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline);
1924
1925static size_t
1926pm_statements_node_body_length(pm_statements_node_t *node);
1927
1932static inline void *
1933pm_node_alloc(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
1934 void *memory = xcalloc(1, size);
1935 if (memory == NULL) {
1936 fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
1937 abort();
1938 }
1939 return memory;
1940}
1941
1942#define PM_NODE_ALLOC(parser_, type_) (type_ *) pm_node_alloc(parser_, sizeof(type_))
1943#define PM_NODE_INIT(parser_, type_, flags_, start_, end_) (pm_node_t) { \
1944 .type = (type_), \
1945 .flags = (flags_), \
1946 .node_id = ++(parser_)->node_id, \
1947 .location = { .start = (start_), .end = (end_) } \
1948}
1949
1950#define PM_NODE_INIT_UNSET(parser_, type_, flags_) PM_NODE_INIT(parser_, type_, flags_, NULL, NULL)
1951#define PM_NODE_INIT_BASE(parser_, type_, flags_) PM_NODE_INIT(parser_, type_, flags_, (parser_)->start, (parser_)->start)
1952#define PM_NODE_INIT_TOKEN(parser_, type_, flags_, token_) PM_NODE_INIT(parser_, type_, flags_, PM_TOKEN_START(token_), PM_TOKEN_END(token_))
1953#define PM_NODE_INIT_NODE(parser_, type_, flags_, node_) PM_NODE_INIT(parser_, type_, flags_, PM_NODE_START(node_), PM_NODE_END(node_))
1954
1955#define PM_NODE_INIT_TOKENS(parser_, type_, flags_, left_, right_) PM_NODE_INIT(parser_, type_, flags_, PM_TOKEN_START(left_), PM_TOKEN_END(right_))
1956#define PM_NODE_INIT_NODES(parser_, type_, flags_, left_, right_) PM_NODE_INIT(parser_, type_, flags_, PM_NODE_START(left_), PM_NODE_END(right_))
1957#define PM_NODE_INIT_TOKEN_NODE(parser_, type_, flags_, token_, node_) PM_NODE_INIT(parser_, type_, flags_, PM_TOKEN_START(token_), PM_NODE_END(node_))
1958#define PM_NODE_INIT_NODE_TOKEN(parser_, type_, flags_, node_, token_) PM_NODE_INIT(parser_, type_, flags_, PM_NODE_START(node_), PM_TOKEN_END(token_))
1959
1963static pm_missing_node_t *
1964pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
1965 pm_missing_node_t *node = PM_NODE_ALLOC(parser, pm_missing_node_t);
1966
1967 *node = (pm_missing_node_t) {
1968 .base = PM_NODE_INIT(parser, PM_MISSING_NODE, 0, start, end)
1969 };
1970
1971 return node;
1972}
1973
1977static pm_alias_global_variable_node_t *
1978pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1979 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1980 pm_alias_global_variable_node_t *node = PM_NODE_ALLOC(parser, pm_alias_global_variable_node_t);
1981
1982 *node = (pm_alias_global_variable_node_t) {
1983 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_ALIAS_GLOBAL_VARIABLE_NODE, 0, keyword, old_name),
1984 .new_name = new_name,
1985 .old_name = old_name,
1986 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1987 };
1988
1989 return node;
1990}
1991
1995static pm_alias_method_node_t *
1996pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1997 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1998 pm_alias_method_node_t *node = PM_NODE_ALLOC(parser, pm_alias_method_node_t);
1999
2000 *node = (pm_alias_method_node_t) {
2001 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_ALIAS_METHOD_NODE, 0, keyword, old_name),
2002 .new_name = new_name,
2003 .old_name = old_name,
2004 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
2005 };
2006
2007 return node;
2008}
2009
2013static pm_alternation_pattern_node_t *
2014pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
2015 pm_alternation_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_alternation_pattern_node_t);
2016
2017 *node = (pm_alternation_pattern_node_t) {
2018 .base = PM_NODE_INIT_NODES(parser, PM_ALTERNATION_PATTERN_NODE, 0, left, right),
2019 .left = left,
2020 .right = right,
2021 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2022 };
2023
2024 return node;
2025}
2026
2030static pm_and_node_t *
2031pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
2032 pm_assert_value_expression(parser, left);
2033
2034 pm_and_node_t *node = PM_NODE_ALLOC(parser, pm_and_node_t);
2035
2036 *node = (pm_and_node_t) {
2037 .base = PM_NODE_INIT_NODES(parser, PM_AND_NODE, 0, left, right),
2038 .left = left,
2039 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2040 .right = right
2041 };
2042
2043 return node;
2044}
2045
2049static pm_arguments_node_t *
2050pm_arguments_node_create(pm_parser_t *parser) {
2051 pm_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_arguments_node_t);
2052
2053 *node = (pm_arguments_node_t) {
2054 .base = PM_NODE_INIT_BASE(parser, PM_ARGUMENTS_NODE, 0),
2055 .arguments = { 0 }
2056 };
2057
2058 return node;
2059}
2060
2064static size_t
2065pm_arguments_node_size(pm_arguments_node_t *node) {
2066 return node->arguments.size;
2067}
2068
2072static void
2073pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argument) {
2074 if (pm_arguments_node_size(node) == 0) {
2075 node->base.location.start = argument->location.start;
2076 }
2077
2078 if (node->base.location.end < argument->location.end) {
2079 node->base.location.end = argument->location.end;
2080 }
2081
2082 pm_node_list_append(&node->arguments, argument);
2083
2084 if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
2085 if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
2086 pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
2087 } else {
2088 pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
2089 }
2090 }
2091}
2092
2096static pm_array_node_t *
2097pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
2098 pm_array_node_t *node = PM_NODE_ALLOC(parser, pm_array_node_t);
2099
2100 *node = (pm_array_node_t) {
2101 .base = PM_NODE_INIT_TOKEN(parser, PM_ARRAY_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening),
2102 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2103 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2104 .elements = { 0 }
2105 };
2106
2107 return node;
2108}
2109
2113static inline void
2114pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
2115 if (!node->elements.size && !node->opening_loc.start) {
2116 node->base.location.start = element->location.start;
2117 }
2118
2119 pm_node_list_append(&node->elements, element);
2120 node->base.location.end = element->location.end;
2121
2122 // If the element is not a static literal, then the array is not a static
2123 // literal. Turn that flag off.
2124 if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
2125 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
2126 }
2127
2128 if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
2129 pm_node_flag_set(UP(node), PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
2130 }
2131}
2132
2136static void
2137pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) {
2138 assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == PM_TOKEN_MISSING || closing->type == PM_TOKEN_NOT_PROVIDED);
2139 node->base.location.end = closing->end;
2140 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2141}
2142
2147static pm_array_pattern_node_t *
2148pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
2149 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2150
2151 *node = (pm_array_pattern_node_t) {
2152 .base = PM_NODE_INIT_NODES(parser, PM_ARRAY_PATTERN_NODE, 0, nodes->nodes[0], nodes->nodes[nodes->size - 1]),
2153 .constant = NULL,
2154 .rest = NULL,
2155 .requireds = { 0 },
2156 .posts = { 0 },
2157 .opening_loc = { 0 },
2158 .closing_loc = { 0 }
2159 };
2160
2161 // For now we're going to just copy over each pointer manually. This could be
2162 // much more efficient, as we could instead resize the node list.
2163 bool found_rest = false;
2164 pm_node_t *child;
2165
2166 PM_NODE_LIST_FOREACH(nodes, index, child) {
2167 if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
2168 node->rest = child;
2169 found_rest = true;
2170 } else if (found_rest) {
2171 pm_node_list_append(&node->posts, child);
2172 } else {
2173 pm_node_list_append(&node->requireds, child);
2174 }
2175 }
2176
2177 return node;
2178}
2179
2183static pm_array_pattern_node_t *
2184pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
2185 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2186
2187 *node = (pm_array_pattern_node_t) {
2188 .base = PM_NODE_INIT_NODE(parser, PM_ARRAY_PATTERN_NODE, 0, rest),
2189 .constant = NULL,
2190 .rest = rest,
2191 .requireds = { 0 },
2192 .posts = { 0 },
2193 .opening_loc = { 0 },
2194 .closing_loc = { 0 }
2195 };
2196
2197 return node;
2198}
2199
2204static pm_array_pattern_node_t *
2205pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
2206 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2207
2208 *node = (pm_array_pattern_node_t) {
2209 .base = PM_NODE_INIT_NODE_TOKEN(parser, PM_ARRAY_PATTERN_NODE, 0, constant, closing),
2210 .constant = constant,
2211 .rest = NULL,
2212 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2213 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2214 .requireds = { 0 },
2215 .posts = { 0 }
2216 };
2217
2218 return node;
2219}
2220
2225static pm_array_pattern_node_t *
2226pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
2227 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2228
2229 *node = (pm_array_pattern_node_t) {
2230 .base = PM_NODE_INIT_TOKENS(parser, PM_ARRAY_PATTERN_NODE, 0, opening, closing),
2231 .constant = NULL,
2232 .rest = NULL,
2233 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2234 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2235 .requireds = { 0 },
2236 .posts = { 0 }
2237 };
2238
2239 return node;
2240}
2241
2242static inline void
2243pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t *inner) {
2244 pm_node_list_append(&node->requireds, inner);
2245}
2246
2250static pm_assoc_node_t *
2251pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
2252 pm_assoc_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_node_t);
2253 const uint8_t *end;
2254
2255 if (value != NULL && value->location.end > key->location.end) {
2256 end = value->location.end;
2257 } else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
2258 end = operator->end;
2259 } else {
2260 end = key->location.end;
2261 }
2262
2263 // Hash string keys will be frozen, so we can mark them as frozen here so
2264 // that the compiler picks them up and also when we check for static literal
2265 // on the keys it gets factored in.
2266 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
2267 key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
2268 }
2269
2270 // If the key and value of this assoc node are both static literals, then
2271 // we can mark this node as a static literal.
2272 pm_node_flags_t flags = 0;
2273 if (
2274 !PM_NODE_TYPE_P(key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(key, PM_HASH_NODE) && !PM_NODE_TYPE_P(key, PM_RANGE_NODE) &&
2275 value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)
2276 ) {
2277 flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
2278 }
2279
2280 *node = (pm_assoc_node_t) {
2281 .base = PM_NODE_INIT(parser, PM_ASSOC_NODE, flags, key->location.start, end),
2282 .key = key,
2283 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2284 .value = value
2285 };
2286
2287 return node;
2288}
2289
2293static pm_assoc_splat_node_t *
2294pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
2295 assert(operator->type == PM_TOKEN_USTAR_STAR);
2296 pm_assoc_splat_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_splat_node_t);
2297
2298 *node = (pm_assoc_splat_node_t) {
2299 .base = (
2300 (value == NULL)
2301 ? PM_NODE_INIT_TOKEN(parser, PM_ASSOC_SPLAT_NODE, 0, operator)
2302 : PM_NODE_INIT_TOKEN_NODE(parser, PM_ASSOC_SPLAT_NODE, 0, operator, value)
2303 ),
2304 .value = value,
2305 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2306 };
2307
2308 return node;
2309}
2310
2314static pm_back_reference_read_node_t *
2315pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
2316 assert(name->type == PM_TOKEN_BACK_REFERENCE);
2317 pm_back_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_back_reference_read_node_t);
2318
2319 *node = (pm_back_reference_read_node_t) {
2320 .base = PM_NODE_INIT_TOKEN(parser, PM_BACK_REFERENCE_READ_NODE, 0, name),
2321 .name = pm_parser_constant_id_token(parser, name)
2322 };
2323
2324 return node;
2325}
2326
2330static pm_begin_node_t *
2331pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
2332 pm_begin_node_t *node = PM_NODE_ALLOC(parser, pm_begin_node_t);
2333
2334 *node = (pm_begin_node_t) {
2335 .base = (
2336 (statements == NULL)
2337 ? PM_NODE_INIT_TOKEN(parser, PM_BEGIN_NODE, 0, begin_keyword)
2338 : PM_NODE_INIT_TOKEN_NODE(parser, PM_BEGIN_NODE, 0, begin_keyword, statements)
2339 ),
2340 .begin_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword),
2341 .statements = statements,
2342 .end_keyword_loc = { 0 }
2343 };
2344
2345 return node;
2346}
2347
2351static void
2352pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
2353 // If the begin keyword doesn't exist, we set the start on the begin_node
2354 if (!node->begin_keyword_loc.start) {
2355 node->base.location.start = rescue_clause->base.location.start;
2356 }
2357 node->base.location.end = rescue_clause->base.location.end;
2358 node->rescue_clause = rescue_clause;
2359}
2360
2364static void
2365pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
2366 node->base.location.end = else_clause->base.location.end;
2367 node->else_clause = else_clause;
2368}
2369
2373static void
2374pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
2375 node->base.location.end = ensure_clause->base.location.end;
2376 node->ensure_clause = ensure_clause;
2377}
2378
2382static void
2383pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keyword) {
2384 assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == PM_TOKEN_MISSING);
2385
2386 node->base.location.end = end_keyword->end;
2387 node->end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword);
2388}
2389
2393static pm_block_argument_node_t *
2394pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
2395 pm_block_argument_node_t *node = PM_NODE_ALLOC(parser, pm_block_argument_node_t);
2396
2397 *node = (pm_block_argument_node_t) {
2398 .base = (
2399 (expression == NULL)
2400 ? PM_NODE_INIT_TOKEN(parser, PM_BLOCK_ARGUMENT_NODE, 0, operator)
2401 : PM_NODE_INIT_TOKEN_NODE(parser, PM_BLOCK_ARGUMENT_NODE, 0, operator, expression)
2402 ),
2403 .expression = expression,
2404 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2405 };
2406
2407 return node;
2408}
2409
2413static pm_block_node_t *
2414pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
2415 pm_block_node_t *node = PM_NODE_ALLOC(parser, pm_block_node_t);
2416
2417 *node = (pm_block_node_t) {
2418 .base = PM_NODE_INIT_TOKENS(parser, PM_BLOCK_NODE, 0, opening, closing),
2419 .locals = *locals,
2420 .parameters = parameters,
2421 .body = body,
2422 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2423 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
2424 };
2425
2426 return node;
2427}
2428
2432static pm_block_parameter_node_t *
2433pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
2434 assert(operator->type == PM_TOKEN_NOT_PROVIDED || operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
2435 pm_block_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameter_node_t);
2436
2437 *node = (pm_block_parameter_node_t) {
2438 .base = (
2439 (name->type == PM_TOKEN_NOT_PROVIDED)
2440 ? PM_NODE_INIT_TOKEN(parser, PM_BLOCK_PARAMETER_NODE, 0, operator)
2441 : PM_NODE_INIT_TOKENS(parser, PM_BLOCK_PARAMETER_NODE, 0, operator, name)
2442 ),
2443 .name = pm_parser_optional_constant_id_token(parser, name),
2444 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
2445 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2446 };
2447
2448 return node;
2449}
2450
2454static pm_block_parameters_node_t *
2455pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
2456 pm_block_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameters_node_t);
2457
2458 const uint8_t *start;
2459 if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2460 start = opening->start;
2461 } else if (parameters != NULL) {
2462 start = parameters->base.location.start;
2463 } else {
2464 start = NULL;
2465 }
2466
2467 const uint8_t *end;
2468 if (parameters != NULL) {
2469 end = parameters->base.location.end;
2470 } else if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2471 end = opening->end;
2472 } else {
2473 end = NULL;
2474 }
2475
2476 *node = (pm_block_parameters_node_t) {
2477 .base = PM_NODE_INIT(parser, PM_BLOCK_PARAMETERS_NODE, 0, start, end),
2478 .parameters = parameters,
2479 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2480 .closing_loc = { 0 },
2481 .locals = { 0 }
2482 };
2483
2484 return node;
2485}
2486
2490static void
2491pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_token_t *closing) {
2492 assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == PM_TOKEN_MISSING);
2493
2494 node->base.location.end = closing->end;
2495 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2496}
2497
2501static pm_block_local_variable_node_t *
2502pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
2503 pm_block_local_variable_node_t *node = PM_NODE_ALLOC(parser, pm_block_local_variable_node_t);
2504
2505 *node = (pm_block_local_variable_node_t) {
2506 .base = PM_NODE_INIT_TOKEN(parser, PM_BLOCK_LOCAL_VARIABLE_NODE, 0, name),
2507 .name = pm_parser_constant_id_token(parser, name)
2508 };
2509
2510 return node;
2511}
2512
2516static void
2517pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
2518 pm_node_list_append(&node->locals, UP(local));
2519
2520 if (node->base.location.start == NULL) node->base.location.start = local->base.location.start;
2521 node->base.location.end = local->base.location.end;
2522}
2523
2527static pm_break_node_t *
2528pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
2529 assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
2530 pm_break_node_t *node = PM_NODE_ALLOC(parser, pm_break_node_t);
2531
2532 *node = (pm_break_node_t) {
2533 .base = (
2534 (arguments == NULL)
2535 ? PM_NODE_INIT_TOKEN(parser, PM_BREAK_NODE, 0, keyword)
2536 : PM_NODE_INIT_TOKEN_NODE(parser, PM_BREAK_NODE, 0, keyword, arguments)
2537 ),
2538 .arguments = arguments,
2539 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
2540 };
2541
2542 return node;
2543}
2544
2545// There are certain flags that we want to use internally but don't want to
2546// expose because they are not relevant beyond parsing. Therefore we'll define
2547// them here and not define them in config.yml/a header file.
2548static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = (1 << 2);
2549
2550static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = ((PM_CALL_NODE_FLAGS_LAST - 1) << 1);
2551static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = ((PM_CALL_NODE_FLAGS_LAST - 1) << 2);
2552static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = ((PM_CALL_NODE_FLAGS_LAST - 1) << 3);
2553
2559static pm_call_node_t *
2560pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
2561 pm_call_node_t *node = PM_NODE_ALLOC(parser, pm_call_node_t);
2562
2563 *node = (pm_call_node_t) {
2564 .base = PM_NODE_INIT_BASE(parser, PM_CALL_NODE, flags),
2565 .receiver = NULL,
2566 .call_operator_loc = { 0 },
2567 .message_loc = { 0 },
2568 .opening_loc = { 0 },
2569 .arguments = NULL,
2570 .closing_loc = { 0 },
2571 .equal_loc = { 0 },
2572 .block = NULL,
2573 .name = 0
2574 };
2575
2576 return node;
2577}
2578
2583static inline pm_node_flags_t
2584pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
2585 return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
2586}
2587
2592static pm_call_node_t *
2593pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
2594 pm_assert_value_expression(parser, receiver);
2595
2596 pm_node_flags_t flags = pm_call_node_ignore_visibility_flag(receiver);
2597 if (arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_ARGUMENT_NODE)) {
2598 flags |= PM_CALL_NODE_FLAGS_INDEX;
2599 }
2600
2601 pm_call_node_t *node = pm_call_node_create(parser, flags);
2602
2603 node->base.location.start = receiver->location.start;
2604 node->base.location.end = pm_arguments_end(arguments);
2605
2606 node->receiver = receiver;
2607 node->message_loc.start = arguments->opening_loc.start;
2608 node->message_loc.end = arguments->closing_loc.end;
2609
2610 node->opening_loc = arguments->opening_loc;
2611 node->arguments = arguments->arguments;
2612 node->closing_loc = arguments->closing_loc;
2613 node->block = arguments->block;
2614
2615 node->name = pm_parser_constant_id_constant(parser, "[]", 2);
2616 return node;
2617}
2618
2622static pm_call_node_t *
2623pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument, pm_node_flags_t flags) {
2624 pm_assert_value_expression(parser, receiver);
2625 pm_assert_value_expression(parser, argument);
2626
2627 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
2628
2629 node->base.location.start = MIN(receiver->location.start, argument->location.start);
2630 node->base.location.end = MAX(receiver->location.end, argument->location.end);
2631
2632 node->receiver = receiver;
2633 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2634
2635 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
2636 pm_arguments_node_arguments_append(arguments, argument);
2637 node->arguments = arguments;
2638
2639 node->name = pm_parser_constant_id_token(parser, operator);
2640 return node;
2641}
2642
2643static const uint8_t * parse_operator_symbol_name(const pm_token_t *);
2644
2648static pm_call_node_t *
2649pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
2650 pm_assert_value_expression(parser, receiver);
2651
2652 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2653
2654 node->base.location.start = receiver->location.start;
2655 const uint8_t *end = pm_arguments_end(arguments);
2656 if (end == NULL) {
2657 end = message->end;
2658 }
2659 node->base.location.end = end;
2660
2661 node->receiver = receiver;
2662 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2663 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2664 node->opening_loc = arguments->opening_loc;
2665 node->arguments = arguments->arguments;
2666 node->closing_loc = arguments->closing_loc;
2667 node->block = arguments->block;
2668
2669 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2670 pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2671 }
2672
2677 node->name = pm_parser_constant_id_location(parser, message->start, parse_operator_symbol_name(message));
2678 return node;
2679}
2680
2684static pm_call_node_t *
2685pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
2686 pm_call_node_t *node = pm_call_node_create(parser, 0);
2687 node->base.location.start = parser->start;
2688 node->base.location.end = parser->end;
2689
2690 node->receiver = receiver;
2691 node->call_operator_loc = (pm_location_t) { .start = NULL, .end = NULL };
2692 node->message_loc = (pm_location_t) { .start = NULL, .end = NULL };
2693 node->arguments = arguments;
2694
2695 node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
2696 return node;
2697}
2698
2703static pm_call_node_t *
2704pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
2705 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2706
2707 node->base.location.start = message->start;
2708 node->base.location.end = pm_arguments_end(arguments);
2709
2710 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2711 node->opening_loc = arguments->opening_loc;
2712 node->arguments = arguments->arguments;
2713 node->closing_loc = arguments->closing_loc;
2714 node->block = arguments->block;
2715
2716 node->name = pm_parser_constant_id_token(parser, message);
2717 return node;
2718}
2719
2724static pm_call_node_t *
2725pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
2726 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2727
2728 node->base.location = PM_LOCATION_NULL_VALUE(parser);
2729 node->arguments = arguments;
2730
2731 node->name = name;
2732 return node;
2733}
2734
2738static pm_call_node_t *
2739pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
2740 pm_assert_value_expression(parser, receiver);
2741 if (receiver != NULL) pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
2742
2743 pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
2744
2745 node->base.location.start = message->start;
2746 if (arguments->closing_loc.start != NULL) {
2747 node->base.location.end = arguments->closing_loc.end;
2748 } else {
2749 assert(receiver != NULL);
2750 node->base.location.end = receiver->location.end;
2751 }
2752
2753 node->receiver = receiver;
2754 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2755 node->opening_loc = arguments->opening_loc;
2756 node->arguments = arguments->arguments;
2757 node->closing_loc = arguments->closing_loc;
2758
2759 node->name = pm_parser_constant_id_constant(parser, "!", 1);
2760 return node;
2761}
2762
2766static pm_call_node_t *
2767pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
2768 pm_assert_value_expression(parser, receiver);
2769
2770 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2771
2772 node->base.location.start = receiver->location.start;
2773 node->base.location.end = pm_arguments_end(arguments);
2774
2775 node->receiver = receiver;
2776 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2777 node->opening_loc = arguments->opening_loc;
2778 node->arguments = arguments->arguments;
2779 node->closing_loc = arguments->closing_loc;
2780 node->block = arguments->block;
2781
2782 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2783 pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2784 }
2785
2786 node->name = pm_parser_constant_id_constant(parser, "call", 4);
2787 return node;
2788}
2789
2793static pm_call_node_t *
2794pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
2795 pm_assert_value_expression(parser, receiver);
2796
2797 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2798
2799 node->base.location.start = operator->start;
2800 node->base.location.end = receiver->location.end;
2801
2802 node->receiver = receiver;
2803 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2804
2805 node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
2806 return node;
2807}
2808
2813static pm_call_node_t *
2814pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
2815 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2816
2817 node->base.location = PM_LOCATION_TOKEN_VALUE(message);
2818 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2819
2820 node->name = pm_parser_constant_id_token(parser, message);
2821 return node;
2822}
2823
2828static inline bool
2829pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
2830 return (
2831 (node->message_loc.start != NULL) &&
2832 (node->message_loc.end[-1] != '!') &&
2833 (node->message_loc.end[-1] != '?') &&
2834 char_is_identifier_start(parser, node->message_loc.start, parser->end - node->message_loc.start) &&
2835 (node->opening_loc.start == NULL) &&
2836 (node->arguments == NULL) &&
2837 (node->block == NULL)
2838 );
2839}
2840
2844static void
2845pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
2846 pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
2847
2848 if (write_constant->length > 0) {
2849 size_t length = write_constant->length - 1;
2850
2851 void *memory = xmalloc(length);
2852 memcpy(memory, write_constant->start, length);
2853
2854 *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
2855 } else {
2856 // We can get here if the message was missing because of a syntax error.
2857 *read_name = pm_parser_constant_id_constant(parser, "", 0);
2858 }
2859}
2860
2864static pm_call_and_write_node_t *
2865pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2866 assert(target->block == NULL);
2867 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2868 pm_call_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_and_write_node_t);
2869
2870 *node = (pm_call_and_write_node_t) {
2871 .base = PM_NODE_INIT_NODES(parser, PM_CALL_AND_WRITE_NODE, FL(target), target, value),
2872 .receiver = target->receiver,
2873 .call_operator_loc = target->call_operator_loc,
2874 .message_loc = target->message_loc,
2875 .read_name = 0,
2876 .write_name = target->name,
2877 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2878 .value = value
2879 };
2880
2881 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
2882
2883 // Here we're going to free the target, since it is no longer necessary.
2884 // However, we don't want to call `pm_node_destroy` because we want to keep
2885 // around all of its children since we just reused them.
2886 xfree(target);
2887
2888 return node;
2889}
2890
2895static void
2896pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
2897 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
2898 if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
2899 pm_node_t *node;
2900 PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
2901 if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
2902 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
2903 break;
2904 }
2905 }
2906 }
2907
2908 if (block != NULL) {
2909 pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
2910 }
2911 }
2912}
2913
2917static pm_index_and_write_node_t *
2918pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2919 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2920 pm_index_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_and_write_node_t);
2921
2922 pm_index_arguments_check(parser, target->arguments, target->block);
2923
2924 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
2925 *node = (pm_index_and_write_node_t) {
2926 .base = PM_NODE_INIT_NODES(parser, PM_INDEX_AND_WRITE_NODE, FL(target), target, value),
2927 .receiver = target->receiver,
2928 .call_operator_loc = target->call_operator_loc,
2929 .opening_loc = target->opening_loc,
2930 .arguments = target->arguments,
2931 .closing_loc = target->closing_loc,
2932 .block = (pm_block_argument_node_t *) target->block,
2933 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2934 .value = value
2935 };
2936
2937 // Here we're going to free the target, since it is no longer necessary.
2938 // However, we don't want to call `pm_node_destroy` because we want to keep
2939 // around all of its children since we just reused them.
2940 xfree(target);
2941
2942 return node;
2943}
2944
2948static pm_call_operator_write_node_t *
2949pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2950 assert(target->block == NULL);
2951 pm_call_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_operator_write_node_t);
2952
2953 *node = (pm_call_operator_write_node_t) {
2954 .base = PM_NODE_INIT_NODES(parser, PM_CALL_OPERATOR_WRITE_NODE, FL(target), target, value),
2955 .receiver = target->receiver,
2956 .call_operator_loc = target->call_operator_loc,
2957 .message_loc = target->message_loc,
2958 .read_name = 0,
2959 .write_name = target->name,
2960 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
2961 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2962 .value = value
2963 };
2964
2965 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
2966
2967 // Here we're going to free the target, since it is no longer necessary.
2968 // However, we don't want to call `pm_node_destroy` because we want to keep
2969 // around all of its children since we just reused them.
2970 xfree(target);
2971
2972 return node;
2973}
2974
2978static pm_index_operator_write_node_t *
2979pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2980 pm_index_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_operator_write_node_t);
2981
2982 pm_index_arguments_check(parser, target->arguments, target->block);
2983
2984 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
2985 *node = (pm_index_operator_write_node_t) {
2986 .base = PM_NODE_INIT_NODES(parser, PM_INDEX_OPERATOR_WRITE_NODE, FL(target), target, value),
2987 .receiver = target->receiver,
2988 .call_operator_loc = target->call_operator_loc,
2989 .opening_loc = target->opening_loc,
2990 .arguments = target->arguments,
2991 .closing_loc = target->closing_loc,
2992 .block = (pm_block_argument_node_t *) target->block,
2993 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
2994 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2995 .value = value
2996 };
2997
2998 // Here we're going to free the target, since it is no longer necessary.
2999 // However, we don't want to call `pm_node_destroy` because we want to keep
3000 // around all of its children since we just reused them.
3001 xfree(target);
3002
3003 return node;
3004}
3005
3009static pm_call_or_write_node_t *
3010pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3011 assert(target->block == NULL);
3012 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3013 pm_call_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_or_write_node_t);
3014
3015 *node = (pm_call_or_write_node_t) {
3016 .base = PM_NODE_INIT_NODES(parser, PM_CALL_OR_WRITE_NODE, FL(target), target, value),
3017 .receiver = target->receiver,
3018 .call_operator_loc = target->call_operator_loc,
3019 .message_loc = target->message_loc,
3020 .read_name = 0,
3021 .write_name = target->name,
3022 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3023 .value = value
3024 };
3025
3026 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3027
3028 // Here we're going to free the target, since it is no longer necessary.
3029 // However, we don't want to call `pm_node_destroy` because we want to keep
3030 // around all of its children since we just reused them.
3031 xfree(target);
3032
3033 return node;
3034}
3035
3039static pm_index_or_write_node_t *
3040pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3041 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3042 pm_index_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_or_write_node_t);
3043
3044 pm_index_arguments_check(parser, target->arguments, target->block);
3045
3046 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3047 *node = (pm_index_or_write_node_t) {
3048 .base = PM_NODE_INIT_NODES(parser, PM_INDEX_OR_WRITE_NODE, FL(target), target, value),
3049 .receiver = target->receiver,
3050 .call_operator_loc = target->call_operator_loc,
3051 .opening_loc = target->opening_loc,
3052 .arguments = target->arguments,
3053 .closing_loc = target->closing_loc,
3054 .block = (pm_block_argument_node_t *) target->block,
3055 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3056 .value = value
3057 };
3058
3059 // Here we're going to free the target, since it is no longer necessary.
3060 // However, we don't want to call `pm_node_destroy` because we want to keep
3061 // around all of its children since we just reused them.
3062 xfree(target);
3063
3064 return node;
3065}
3066
3071static pm_call_target_node_t *
3072pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3073 pm_call_target_node_t *node = PM_NODE_ALLOC(parser, pm_call_target_node_t);
3074
3075 *node = (pm_call_target_node_t) {
3076 .base = PM_NODE_INIT_NODE(parser, PM_CALL_TARGET_NODE, FL(target), target),
3077 .receiver = target->receiver,
3078 .call_operator_loc = target->call_operator_loc,
3079 .name = target->name,
3080 .message_loc = target->message_loc
3081 };
3082
3083 /* It is possible to get here where we have parsed an invalid syntax tree
3084 * where the call operator was not present. In that case we will have a
3085 * problem because it is a required location. In this case we need to fill
3086 * it in with a fake location so that the syntax tree remains valid. */
3087 if (node->call_operator_loc.start == NULL) {
3088 node->call_operator_loc = (pm_location_t) {
3089 .start = target->base.location.start,
3090 .end = target->base.location.start
3091 };
3092 }
3093
3094 // Here we're going to free the target, since it is no longer necessary.
3095 // However, we don't want to call `pm_node_destroy` because we want to keep
3096 // around all of its children since we just reused them.
3097 xfree(target);
3098
3099 return node;
3100}
3101
3106static pm_index_target_node_t *
3107pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3108 pm_index_target_node_t *node = PM_NODE_ALLOC(parser, pm_index_target_node_t);
3109
3110 pm_index_arguments_check(parser, target->arguments, target->block);
3111 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3112
3113 *node = (pm_index_target_node_t) {
3114 .base = PM_NODE_INIT_NODE(parser, PM_INDEX_TARGET_NODE, FL(target) | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE, target),
3115 .receiver = target->receiver,
3116 .opening_loc = target->opening_loc,
3117 .arguments = target->arguments,
3118 .closing_loc = target->closing_loc,
3119 .block = (pm_block_argument_node_t *) target->block,
3120 };
3121
3122 // Here we're going to free the target, since it is no longer necessary.
3123 // However, we don't want to call `pm_node_destroy` because we want to keep
3124 // around all of its children since we just reused them.
3125 xfree(target);
3126
3127 return node;
3128}
3129
3133static pm_capture_pattern_node_t *
3134pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
3135 pm_capture_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_capture_pattern_node_t);
3136
3137 *node = (pm_capture_pattern_node_t) {
3138 .base = PM_NODE_INIT_NODES(parser, PM_CAPTURE_PATTERN_NODE, 0, value, target),
3139 .value = value,
3140 .target = target,
3141 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
3142 };
3143
3144 return node;
3145}
3146
3150static pm_case_node_t *
3151pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3152 pm_case_node_t *node = PM_NODE_ALLOC(parser, pm_case_node_t);
3153
3154 *node = (pm_case_node_t) {
3155 .base = PM_NODE_INIT_TOKENS(parser, PM_CASE_NODE, 0, case_keyword, end_keyword),
3156 .predicate = predicate,
3157 .else_clause = NULL,
3158 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3159 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3160 .conditions = { 0 }
3161 };
3162
3163 return node;
3164}
3165
3169static void
3170pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
3171 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
3172
3173 pm_node_list_append(&node->conditions, condition);
3174 node->base.location.end = condition->location.end;
3175}
3176
3180static void
3181pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
3182 node->else_clause = else_clause;
3183 node->base.location.end = else_clause->base.location.end;
3184}
3185
3189static void
3190pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_keyword) {
3191 node->base.location.end = end_keyword->end;
3192 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3193}
3194
3198static pm_case_match_node_t *
3199pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3200 pm_case_match_node_t *node = PM_NODE_ALLOC(parser, pm_case_match_node_t);
3201
3202 *node = (pm_case_match_node_t) {
3203 .base = PM_NODE_INIT_TOKENS(parser, PM_CASE_MATCH_NODE, 0, case_keyword, end_keyword),
3204 .predicate = predicate,
3205 .else_clause = NULL,
3206 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3207 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3208 .conditions = { 0 }
3209 };
3210
3211 return node;
3212}
3213
3217static void
3218pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condition) {
3219 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
3220
3221 pm_node_list_append(&node->conditions, condition);
3222 node->base.location.end = condition->location.end;
3223}
3224
3228static void
3229pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
3230 node->else_clause = else_clause;
3231 node->base.location.end = else_clause->base.location.end;
3232}
3233
3237static void
3238pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_token_t *end_keyword) {
3239 node->base.location.end = end_keyword->end;
3240 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3241}
3242
3246static pm_class_node_t *
3247pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
3248 pm_class_node_t *node = PM_NODE_ALLOC(parser, pm_class_node_t);
3249
3250 *node = (pm_class_node_t) {
3251 .base = PM_NODE_INIT_TOKENS(parser, PM_CLASS_NODE, 0, class_keyword, end_keyword),
3252 .locals = *locals,
3253 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
3254 .constant_path = constant_path,
3255 .inheritance_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator),
3256 .superclass = superclass,
3257 .body = body,
3258 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3259 .name = pm_parser_constant_id_token(parser, name)
3260 };
3261
3262 return node;
3263}
3264
3268static pm_class_variable_and_write_node_t *
3269pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3270 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3271 pm_class_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_and_write_node_t);
3272
3273 *node = (pm_class_variable_and_write_node_t) {
3274 .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_AND_WRITE_NODE, 0, target, value),
3275 .name = target->name,
3276 .name_loc = target->base.location,
3277 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3278 .value = value
3279 };
3280
3281 return node;
3282}
3283
3287static pm_class_variable_operator_write_node_t *
3288pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3289 pm_class_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_operator_write_node_t);
3290
3291 *node = (pm_class_variable_operator_write_node_t) {
3292 .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value),
3293 .name = target->name,
3294 .name_loc = target->base.location,
3295 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3296 .value = value,
3297 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3298 };
3299
3300 return node;
3301}
3302
3306static pm_class_variable_or_write_node_t *
3307pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3308 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3309 pm_class_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_or_write_node_t);
3310
3311 *node = (pm_class_variable_or_write_node_t) {
3312 .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_OR_WRITE_NODE, 0, target, value),
3313 .name = target->name,
3314 .name_loc = target->base.location,
3315 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3316 .value = value
3317 };
3318
3319 return node;
3320}
3321
3325static pm_class_variable_read_node_t *
3326pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
3327 assert(token->type == PM_TOKEN_CLASS_VARIABLE);
3328 pm_class_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_read_node_t);
3329
3330 *node = (pm_class_variable_read_node_t) {
3331 .base = PM_NODE_INIT_TOKEN(parser, PM_CLASS_VARIABLE_READ_NODE, 0, token),
3332 .name = pm_parser_constant_id_token(parser, token)
3333 };
3334
3335 return node;
3336}
3337
3344static inline pm_node_flags_t
3345pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
3346 if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.start == NULL) {
3347 return flags;
3348 }
3349 return 0;
3350}
3351
3355static pm_class_variable_write_node_t *
3356pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
3357 pm_class_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_write_node_t);
3358 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
3359
3360 *node = (pm_class_variable_write_node_t) {
3361 .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_WRITE_NODE, flags, read_node, value),
3362 .name = read_node->name,
3363 .name_loc = PM_LOCATION_NODE_VALUE(UP(read_node)),
3364 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3365 .value = value
3366 };
3367
3368 return node;
3369}
3370
3374static pm_constant_path_and_write_node_t *
3375pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3376 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3377 pm_constant_path_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_and_write_node_t);
3378
3379 *node = (pm_constant_path_and_write_node_t) {
3380 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_AND_WRITE_NODE, 0, target, value),
3381 .target = target,
3382 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3383 .value = value
3384 };
3385
3386 return node;
3387}
3388
3392static pm_constant_path_operator_write_node_t *
3393pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3394 pm_constant_path_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_operator_write_node_t);
3395
3396 *node = (pm_constant_path_operator_write_node_t) {
3397 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_OPERATOR_WRITE_NODE, 0, target, value),
3398 .target = target,
3399 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3400 .value = value,
3401 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3402 };
3403
3404 return node;
3405}
3406
3410static pm_constant_path_or_write_node_t *
3411pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3412 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3413 pm_constant_path_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_or_write_node_t);
3414
3415 *node = (pm_constant_path_or_write_node_t) {
3416 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_OR_WRITE_NODE, 0, target, value),
3417 .target = target,
3418 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3419 .value = value
3420 };
3421
3422 return node;
3423}
3424
3428static pm_constant_path_node_t *
3429pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3430 pm_assert_value_expression(parser, parent);
3431 pm_constant_path_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_node_t);
3432
3433 pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3434 if (name_token->type == PM_TOKEN_CONSTANT) {
3435 name = pm_parser_constant_id_token(parser, name_token);
3436 }
3437
3438 if (parent == NULL) {
3439 *node = (pm_constant_path_node_t) {
3440 .base = PM_NODE_INIT_TOKENS(parser, PM_CONSTANT_PATH_NODE, 0, delimiter, name_token),
3441 .parent = parent,
3442 .name = name,
3443 .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
3444 .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
3445 };
3446 } else {
3447 *node = (pm_constant_path_node_t) {
3448 .base = PM_NODE_INIT_NODE_TOKEN(parser, PM_CONSTANT_PATH_NODE, 0, parent, name_token),
3449 .parent = parent,
3450 .name = name,
3451 .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
3452 .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
3453 };
3454 }
3455
3456 return node;
3457}
3458
3462static pm_constant_path_write_node_t *
3463pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3464 pm_constant_path_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_write_node_t);
3465 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
3466
3467 *node = (pm_constant_path_write_node_t) {
3468 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_WRITE_NODE, flags, target, value),
3469 .target = target,
3470 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3471 .value = value
3472 };
3473
3474 return node;
3475}
3476
3480static pm_constant_and_write_node_t *
3481pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3482 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3483 pm_constant_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_and_write_node_t);
3484
3485 *node = (pm_constant_and_write_node_t) {
3486 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_AND_WRITE_NODE, 0, target, value),
3487 .name = target->name,
3488 .name_loc = target->base.location,
3489 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3490 .value = value
3491 };
3492
3493 return node;
3494}
3495
3499static pm_constant_operator_write_node_t *
3500pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3501 pm_constant_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_operator_write_node_t);
3502
3503 *node = (pm_constant_operator_write_node_t) {
3504 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_OPERATOR_WRITE_NODE, 0, target, value),
3505 .name = target->name,
3506 .name_loc = target->base.location,
3507 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3508 .value = value,
3509 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3510 };
3511
3512 return node;
3513}
3514
3518static pm_constant_or_write_node_t *
3519pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3520 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3521 pm_constant_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_or_write_node_t);
3522
3523 *node = (pm_constant_or_write_node_t) {
3524 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_OR_WRITE_NODE, 0, target, value),
3525 .name = target->name,
3526 .name_loc = target->base.location,
3527 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3528 .value = value
3529 };
3530
3531 return node;
3532}
3533
3537static pm_constant_read_node_t *
3538pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
3539 assert(name->type == PM_TOKEN_CONSTANT || name->type == PM_TOKEN_MISSING);
3540 pm_constant_read_node_t *node = PM_NODE_ALLOC(parser, pm_constant_read_node_t);
3541
3542 *node = (pm_constant_read_node_t) {
3543 .base = PM_NODE_INIT_TOKEN(parser, PM_CONSTANT_READ_NODE, 0, name),
3544 .name = pm_parser_constant_id_token(parser, name)
3545 };
3546
3547 return node;
3548}
3549
3553static pm_constant_write_node_t *
3554pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3555 pm_constant_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_write_node_t);
3556 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
3557
3558 *node = (pm_constant_write_node_t) {
3559 .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_WRITE_NODE, flags, target, value),
3560 .name = target->name,
3561 .name_loc = target->base.location,
3562 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3563 .value = value
3564 };
3565
3566 return node;
3567}
3568
3572static void
3573pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3574 switch (PM_NODE_TYPE(node)) {
3575 case PM_BEGIN_NODE: {
3576 const pm_begin_node_t *cast = (pm_begin_node_t *) node;
3577 if (cast->statements != NULL) pm_def_node_receiver_check(parser, UP(cast->statements));
3578 break;
3579 }
3580 case PM_PARENTHESES_NODE: {
3581 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
3582 if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
3583 break;
3584 }
3585 case PM_STATEMENTS_NODE: {
3586 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
3587 pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
3588 break;
3589 }
3590 case PM_ARRAY_NODE:
3591 case PM_FLOAT_NODE:
3592 case PM_IMAGINARY_NODE:
3593 case PM_INTEGER_NODE:
3594 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
3595 case PM_INTERPOLATED_STRING_NODE:
3596 case PM_INTERPOLATED_SYMBOL_NODE:
3597 case PM_INTERPOLATED_X_STRING_NODE:
3598 case PM_RATIONAL_NODE:
3599 case PM_REGULAR_EXPRESSION_NODE:
3600 case PM_SOURCE_ENCODING_NODE:
3601 case PM_SOURCE_FILE_NODE:
3602 case PM_SOURCE_LINE_NODE:
3603 case PM_STRING_NODE:
3604 case PM_SYMBOL_NODE:
3605 case PM_X_STRING_NODE:
3606 pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
3607 break;
3608 default:
3609 break;
3610 }
3611}
3612
3616static pm_def_node_t *
3617pm_def_node_create(
3618 pm_parser_t *parser,
3619 pm_constant_id_t name,
3620 const pm_token_t *name_loc,
3621 pm_node_t *receiver,
3622 pm_parameters_node_t *parameters,
3623 pm_node_t *body,
3624 pm_constant_id_list_t *locals,
3625 const pm_token_t *def_keyword,
3626 const pm_token_t *operator,
3627 const pm_token_t *lparen,
3628 const pm_token_t *rparen,
3629 const pm_token_t *equal,
3630 const pm_token_t *end_keyword
3631) {
3632 pm_def_node_t *node = PM_NODE_ALLOC(parser, pm_def_node_t);
3633
3634 if (receiver != NULL) {
3635 pm_def_node_receiver_check(parser, receiver);
3636 }
3637
3638 *node = (pm_def_node_t) {
3639 .base = (
3640 (end_keyword->type == PM_TOKEN_NOT_PROVIDED)
3641 ? PM_NODE_INIT_TOKEN_NODE(parser, PM_DEF_NODE, 0, def_keyword, body)
3642 : PM_NODE_INIT_TOKENS(parser, PM_DEF_NODE, 0, def_keyword, end_keyword)
3643 ),
3644 .name = name,
3645 .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
3646 .receiver = receiver,
3647 .parameters = parameters,
3648 .body = body,
3649 .locals = *locals,
3650 .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
3651 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3652 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3653 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3654 .equal_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(equal),
3655 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3656 };
3657
3658 return node;
3659}
3660
3664static pm_defined_node_t *
3665pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_token_t *keyword) {
3666 pm_defined_node_t *node = PM_NODE_ALLOC(parser, pm_defined_node_t);
3667
3668 *node = (pm_defined_node_t) {
3669 .base = (
3670 (rparen->type == PM_TOKEN_NOT_PROVIDED)
3671 ? PM_NODE_INIT_TOKEN_NODE(parser, PM_DEFINED_NODE, 0, keyword, value)
3672 : PM_NODE_INIT_TOKENS(parser, PM_DEFINED_NODE, 0, keyword, rparen)
3673 ),
3674 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3675 .value = value,
3676 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3677 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
3678 };
3679
3680 return node;
3681}
3682
3686static pm_else_node_t *
3687pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3688 pm_else_node_t *node = PM_NODE_ALLOC(parser, pm_else_node_t);
3689
3690 *node = (pm_else_node_t) {
3691 .base = (
3692 ((end_keyword->type == PM_TOKEN_NOT_PROVIDED) && (statements != NULL))
3693 ? PM_NODE_INIT_TOKEN_NODE(parser, PM_ELSE_NODE, 0, else_keyword, statements)
3694 : PM_NODE_INIT_TOKENS(parser, PM_ELSE_NODE, 0, else_keyword, end_keyword)
3695 ),
3696 .else_keyword_loc = PM_LOCATION_TOKEN_VALUE(else_keyword),
3697 .statements = statements,
3698 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3699 };
3700
3701 return node;
3702}
3703
3707static pm_embedded_statements_node_t *
3708pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
3709 pm_embedded_statements_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_statements_node_t);
3710
3711 *node = (pm_embedded_statements_node_t) {
3712 .base = PM_NODE_INIT_TOKENS(parser, PM_EMBEDDED_STATEMENTS_NODE, 0, opening, closing),
3713 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
3714 .statements = statements,
3715 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
3716 };
3717
3718 return node;
3719}
3720
3724static pm_embedded_variable_node_t *
3725pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
3726 pm_embedded_variable_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_variable_node_t);
3727
3728 *node = (pm_embedded_variable_node_t) {
3729 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_EMBEDDED_VARIABLE_NODE, 0, operator, variable),
3730 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3731 .variable = variable
3732 };
3733
3734 return node;
3735}
3736
3740static pm_ensure_node_t *
3741pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3742 pm_ensure_node_t *node = PM_NODE_ALLOC(parser, pm_ensure_node_t);
3743
3744 *node = (pm_ensure_node_t) {
3745 .base = PM_NODE_INIT_TOKENS(parser, PM_ENSURE_NODE, 0, ensure_keyword, end_keyword),
3746 .ensure_keyword_loc = PM_LOCATION_TOKEN_VALUE(ensure_keyword),
3747 .statements = statements,
3748 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
3749 };
3750
3751 return node;
3752}
3753
3757static pm_false_node_t *
3758pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
3759 assert(token->type == PM_TOKEN_KEYWORD_FALSE);
3760 pm_false_node_t *node = PM_NODE_ALLOC(parser, pm_false_node_t);
3761
3762 *node = (pm_false_node_t) {
3763 .base = PM_NODE_INIT_TOKEN(parser, PM_FALSE_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
3764 };
3765
3766 return node;
3767}
3768
3773static pm_find_pattern_node_t *
3774pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
3775 pm_find_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_find_pattern_node_t);
3776
3777 pm_node_t *left = nodes->nodes[0];
3778 assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
3779 pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left;
3780
3781 pm_node_t *right;
3782
3783 if (nodes->size == 1) {
3784 right = UP(pm_missing_node_create(parser, left->location.end, left->location.end));
3785 } else {
3786 right = nodes->nodes[nodes->size - 1];
3787 assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
3788 }
3789
3790#if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
3791 // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode.
3792 // The resulting AST will anyway be ignored, but this file still needs to compile.
3793 pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node;
3794#else
3795 pm_node_t *right_splat_node = right;
3796#endif
3797 *node = (pm_find_pattern_node_t) {
3798 .base = PM_NODE_INIT_NODES(parser, PM_FIND_PATTERN_NODE, 0, left, right),
3799 .constant = NULL,
3800 .left = left_splat_node,
3801 .right = right_splat_node,
3802 .requireds = { 0 },
3803 .opening_loc = { 0 },
3804 .closing_loc = { 0 }
3805 };
3806
3807 // For now we're going to just copy over each pointer manually. This could be
3808 // much more efficient, as we could instead resize the node list to only point
3809 // to 1...-1.
3810 for (size_t index = 1; index < nodes->size - 1; index++) {
3811 pm_node_list_append(&node->requireds, nodes->nodes[index]);
3812 }
3813
3814 return node;
3815}
3816
3821static double
3822pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
3823 ptrdiff_t diff = token->end - token->start;
3824 if (diff <= 0) return 0.0;
3825
3826 // First, get a buffer of the content.
3827 size_t length = (size_t) diff;
3828 char *buffer = xmalloc(sizeof(char) * (length + 1));
3829 memcpy((void *) buffer, token->start, length);
3830
3831 // Next, determine if we need to replace the decimal point because of
3832 // locale-specific options, and then normalize them if we have to.
3833 char decimal_point = *localeconv()->decimal_point;
3834 if (decimal_point != '.') {
3835 for (size_t index = 0; index < length; index++) {
3836 if (buffer[index] == '.') buffer[index] = decimal_point;
3837 }
3838 }
3839
3840 // Next, handle underscores by removing them from the buffer.
3841 for (size_t index = 0; index < length; index++) {
3842 if (buffer[index] == '_') {
3843 memmove((void *) (buffer + index), (void *) (buffer + index + 1), length - index);
3844 length--;
3845 }
3846 }
3847
3848 // Null-terminate the buffer so that strtod cannot read off the end.
3849 buffer[length] = '\0';
3850
3851 // Now, call strtod to parse the value. Note that CRuby has their own
3852 // version of strtod which avoids locales. We're okay using the locale-aware
3853 // version because we've already validated through the parser that the token
3854 // is in a valid format.
3855 errno = 0;
3856 char *eptr;
3857 double value = strtod(buffer, &eptr);
3858
3859 // This should never happen, because we've already checked that the token
3860 // is in a valid format. However it's good to be safe.
3861 if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
3862 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, (*token), PM_ERR_FLOAT_PARSE);
3863 xfree((void *) buffer);
3864 return 0.0;
3865 }
3866
3867 // If errno is set, then it should only be ERANGE. At this point we need to
3868 // check if it's infinity (it should be).
3869 if (errno == ERANGE && PRISM_ISINF(value)) {
3870 int warn_width;
3871 const char *ellipsis;
3872
3873 if (length > 20) {
3874 warn_width = 20;
3875 ellipsis = "...";
3876 } else {
3877 warn_width = (int) length;
3878 ellipsis = "";
3879 }
3880
3881 pm_diagnostic_list_append_format(&parser->warning_list, token->start, token->end, PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
3882 value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
3883 }
3884
3885 // Finally we can free the buffer and return the value.
3886 xfree((void *) buffer);
3887 return value;
3888}
3889
3893static pm_float_node_t *
3894pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
3895 assert(token->type == PM_TOKEN_FLOAT);
3896 pm_float_node_t *node = PM_NODE_ALLOC(parser, pm_float_node_t);
3897
3898 *node = (pm_float_node_t) {
3899 .base = PM_NODE_INIT_TOKEN(parser, PM_FLOAT_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
3900 .value = pm_double_parse(parser, token)
3901 };
3902
3903 return node;
3904}
3905
3909static pm_imaginary_node_t *
3910pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
3911 assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
3912
3913 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
3914 *node = (pm_imaginary_node_t) {
3915 .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
3916 .numeric = UP(pm_float_node_create(parser, &((pm_token_t) {
3917 .type = PM_TOKEN_FLOAT,
3918 .start = token->start,
3919 .end = token->end - 1
3920 })))
3921 };
3922
3923 return node;
3924}
3925
3929static pm_rational_node_t *
3930pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
3931 assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
3932
3933 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
3934 *node = (pm_rational_node_t) {
3935 .base = PM_NODE_INIT_TOKEN(parser, PM_RATIONAL_NODE, PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL, token),
3936 .numerator = { 0 },
3937 .denominator = { 0 }
3938 };
3939
3940 const uint8_t *start = token->start;
3941 const uint8_t *end = token->end - 1; // r
3942
3943 while (start < end && *start == '0') start++; // 0.1 -> .1
3944 while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
3945
3946 size_t length = (size_t) (end - start);
3947 if (length == 1) {
3948 node->denominator.value = 1;
3949 return node;
3950 }
3951
3952 const uint8_t *point = memchr(start, '.', length);
3953 assert(point && "should have a decimal point");
3954
3955 uint8_t *digits = xmalloc(length);
3956 if (digits == NULL) {
3957 fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
3958 abort();
3959 }
3960
3961 memcpy(digits, start, (unsigned long) (point - start));
3962 memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
3963 pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
3964
3965 digits[0] = '1';
3966 if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
3967 pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
3968 xfree(digits);
3969
3970 pm_integers_reduce(&node->numerator, &node->denominator);
3971 return node;
3972}
3973
3978static pm_imaginary_node_t *
3979pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
3980 assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
3981
3982 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
3983 *node = (pm_imaginary_node_t) {
3984 .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
3985 .numeric = UP(pm_float_node_rational_create(parser, &((pm_token_t) {
3986 .type = PM_TOKEN_FLOAT_RATIONAL,
3987 .start = token->start,
3988 .end = token->end - 1
3989 })))
3990 };
3991
3992 return node;
3993}
3994
3998static pm_for_node_t *
3999pm_for_node_create(
4000 pm_parser_t *parser,
4001 pm_node_t *index,
4002 pm_node_t *collection,
4003 pm_statements_node_t *statements,
4004 const pm_token_t *for_keyword,
4005 const pm_token_t *in_keyword,
4006 const pm_token_t *do_keyword,
4007 const pm_token_t *end_keyword
4008) {
4009 pm_for_node_t *node = PM_NODE_ALLOC(parser, pm_for_node_t);
4010
4011 *node = (pm_for_node_t) {
4012 .base = PM_NODE_INIT_TOKENS(parser, PM_FOR_NODE, 0, for_keyword, end_keyword),
4013 .index = index,
4014 .collection = collection,
4015 .statements = statements,
4016 .for_keyword_loc = PM_LOCATION_TOKEN_VALUE(for_keyword),
4017 .in_keyword_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
4018 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
4019 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4020 };
4021
4022 return node;
4023}
4024
4028static pm_forwarding_arguments_node_t *
4029pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
4030 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4031 pm_forwarding_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_arguments_node_t);
4032
4033 *node = (pm_forwarding_arguments_node_t) {
4034 .base = PM_NODE_INIT_TOKEN(parser, PM_FORWARDING_ARGUMENTS_NODE, 0, token)
4035 };
4036
4037 return node;
4038}
4039
4043static pm_forwarding_parameter_node_t *
4044pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
4045 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4046 pm_forwarding_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_parameter_node_t);
4047
4048 *node = (pm_forwarding_parameter_node_t) {
4049 .base = PM_NODE_INIT_TOKEN(parser, PM_FORWARDING_PARAMETER_NODE, 0, token)
4050 };
4051
4052 return node;
4053}
4054
4058static pm_forwarding_super_node_t *
4059pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
4060 assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
4061 assert(token->type == PM_TOKEN_KEYWORD_SUPER);
4062 pm_forwarding_super_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_super_node_t);
4063
4064 pm_block_node_t *block = NULL;
4065 if (arguments->block != NULL) {
4066 block = (pm_block_node_t *) arguments->block;
4067 }
4068
4069 *node = (pm_forwarding_super_node_t) {
4070 .base = (
4071 (block == NULL)
4072 ? PM_NODE_INIT_TOKEN(parser, PM_FORWARDING_SUPER_NODE, 0, token)
4073 : PM_NODE_INIT_TOKEN_NODE(parser, PM_FORWARDING_SUPER_NODE, 0, token, block)
4074 ),
4075 .block = block
4076 };
4077
4078 return node;
4079}
4080
4085static pm_hash_pattern_node_t *
4086pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4087 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4088
4089 *node = (pm_hash_pattern_node_t) {
4090 .base = PM_NODE_INIT_TOKENS(parser, PM_HASH_PATTERN_NODE, 0, opening, closing),
4091 .constant = NULL,
4092 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4093 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
4094 .elements = { 0 },
4095 .rest = NULL
4096 };
4097
4098 return node;
4099}
4100
4104static pm_hash_pattern_node_t *
4105pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
4106 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4107
4108 const uint8_t *start;
4109 const uint8_t *end;
4110
4111 if (elements->size > 0) {
4112 if (rest) {
4113 start = MIN(rest->location.start, elements->nodes[0]->location.start);
4114 end = MAX(rest->location.end, elements->nodes[elements->size - 1]->location.end);
4115 } else {
4116 start = elements->nodes[0]->location.start;
4117 end = elements->nodes[elements->size - 1]->location.end;
4118 }
4119 } else {
4120 assert(rest != NULL);
4121 start = rest->location.start;
4122 end = rest->location.end;
4123 }
4124
4125 *node = (pm_hash_pattern_node_t) {
4126 .base = PM_NODE_INIT(parser, PM_HASH_PATTERN_NODE, 0, start, end),
4127 .constant = NULL,
4128 .elements = { 0 },
4129 .rest = rest,
4130 .opening_loc = { 0 },
4131 .closing_loc = { 0 }
4132 };
4133
4134 pm_node_list_concat(&node->elements, elements);
4135 return node;
4136}
4137
4141static pm_constant_id_t
4142pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
4143 switch (PM_NODE_TYPE(target)) {
4144 case PM_GLOBAL_VARIABLE_READ_NODE:
4145 return ((pm_global_variable_read_node_t *) target)->name;
4146 case PM_BACK_REFERENCE_READ_NODE:
4147 return ((pm_back_reference_read_node_t *) target)->name;
4148 case PM_NUMBERED_REFERENCE_READ_NODE:
4149 // This will only ever happen in the event of a syntax error, but we
4150 // still need to provide something for the node.
4151 return pm_parser_constant_id_location(parser, target->location.start, target->location.end);
4152 default:
4153 assert(false && "unreachable");
4154 return (pm_constant_id_t) -1;
4155 }
4156}
4157
4161static pm_global_variable_and_write_node_t *
4162pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4163 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4164 pm_global_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_and_write_node_t);
4165
4166 *node = (pm_global_variable_and_write_node_t) {
4167 .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_AND_WRITE_NODE, 0, target, value),
4168 .name = pm_global_variable_write_name(parser, target),
4169 .name_loc = target->location,
4170 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4171 .value = value
4172 };
4173
4174 return node;
4175}
4176
4180static pm_global_variable_operator_write_node_t *
4181pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4182 pm_global_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_operator_write_node_t);
4183
4184 *node = (pm_global_variable_operator_write_node_t) {
4185 .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value),
4186 .name = pm_global_variable_write_name(parser, target),
4187 .name_loc = target->location,
4188 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4189 .value = value,
4190 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4191 };
4192
4193 return node;
4194}
4195
4199static pm_global_variable_or_write_node_t *
4200pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4201 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4202 pm_global_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_or_write_node_t);
4203
4204 *node = (pm_global_variable_or_write_node_t) {
4205 .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_OR_WRITE_NODE, 0, target, value),
4206 .name = pm_global_variable_write_name(parser, target),
4207 .name_loc = target->location,
4208 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4209 .value = value
4210 };
4211
4212 return node;
4213}
4214
4218static pm_global_variable_read_node_t *
4219pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4220 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4221
4222 *node = (pm_global_variable_read_node_t) {
4223 .base = PM_NODE_INIT_TOKEN(parser, PM_GLOBAL_VARIABLE_READ_NODE, 0, name),
4224 .name = pm_parser_constant_id_token(parser, name)
4225 };
4226
4227 return node;
4228}
4229
4233static pm_global_variable_read_node_t *
4234pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
4235 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4236
4237 *node = (pm_global_variable_read_node_t) {
4238 .base = PM_NODE_INIT_BASE(parser, PM_GLOBAL_VARIABLE_READ_NODE, 0),
4239 .name = name
4240 };
4241
4242 return node;
4243}
4244
4248static pm_global_variable_write_node_t *
4249pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4250 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4251 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
4252
4253 *node = (pm_global_variable_write_node_t) {
4254 .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_WRITE_NODE, flags, target, value),
4255 .name = pm_global_variable_write_name(parser, target),
4256 .name_loc = PM_LOCATION_NODE_VALUE(target),
4257 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
4258 .value = value
4259 };
4260
4261 return node;
4262}
4263
4267static pm_global_variable_write_node_t *
4268pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
4269 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4270
4271 *node = (pm_global_variable_write_node_t) {
4272 .base = PM_NODE_INIT_BASE(parser, PM_GLOBAL_VARIABLE_WRITE_NODE, 0),
4273 .name = name,
4274 .name_loc = PM_LOCATION_NULL_VALUE(parser),
4275 .operator_loc = PM_LOCATION_NULL_VALUE(parser),
4276 .value = value
4277 };
4278
4279 return node;
4280}
4281
4285static pm_hash_node_t *
4286pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4287 assert(opening != NULL);
4288 pm_hash_node_t *node = PM_NODE_ALLOC(parser, pm_hash_node_t);
4289
4290 *node = (pm_hash_node_t) {
4291 .base = PM_NODE_INIT_TOKEN(parser, PM_HASH_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening),
4292 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4293 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
4294 .elements = { 0 }
4295 };
4296
4297 return node;
4298}
4299
4303static inline void
4304pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
4305 pm_node_list_append(&hash->elements, element);
4306
4307 bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
4308 if (static_literal) {
4309 pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
4310 static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
4311 static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
4312 static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
4313 }
4314
4315 if (!static_literal) {
4316 pm_node_flag_unset(UP(hash), PM_NODE_FLAG_STATIC_LITERAL);
4317 }
4318}
4319
4320static inline void
4321pm_hash_node_closing_loc_set(pm_hash_node_t *hash, pm_token_t *token) {
4322 hash->base.location.end = token->end;
4323 hash->closing_loc = PM_LOCATION_TOKEN_VALUE(token);
4324}
4325
4329static pm_if_node_t *
4330pm_if_node_create(pm_parser_t *parser,
4331 const pm_token_t *if_keyword,
4332 pm_node_t *predicate,
4333 const pm_token_t *then_keyword,
4334 pm_statements_node_t *statements,
4335 pm_node_t *subsequent,
4336 const pm_token_t *end_keyword
4337) {
4338 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4339 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4340
4341 const uint8_t *end;
4342 if (end_keyword->type != PM_TOKEN_NOT_PROVIDED) {
4343 end = end_keyword->end;
4344 } else if (subsequent != NULL) {
4345 end = subsequent->location.end;
4346 } else if (pm_statements_node_body_length(statements) != 0) {
4347 end = statements->base.location.end;
4348 } else {
4349 end = predicate->location.end;
4350 }
4351
4352 *node = (pm_if_node_t) {
4353 .base = PM_NODE_INIT(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, if_keyword->start, end),
4354 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4355 .predicate = predicate,
4356 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
4357 .statements = statements,
4358 .subsequent = subsequent,
4359 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
4360 };
4361
4362 return node;
4363}
4364
4368static pm_if_node_t *
4369pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
4370 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4371 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4372
4373 pm_statements_node_t *statements = pm_statements_node_create(parser);
4374 pm_statements_node_body_append(parser, statements, statement, true);
4375
4376 *node = (pm_if_node_t) {
4377 .base = PM_NODE_INIT_NODES(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, statement, predicate),
4378 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4379 .predicate = predicate,
4380 .then_keyword_loc = { 0 },
4381 .statements = statements,
4382 .subsequent = NULL,
4383 .end_keyword_loc = { 0 }
4384 };
4385
4386 return node;
4387}
4388
4392static pm_if_node_t *
4393pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
4394 pm_assert_value_expression(parser, predicate);
4395 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4396
4397 pm_statements_node_t *if_statements = pm_statements_node_create(parser);
4398 pm_statements_node_body_append(parser, if_statements, true_expression, true);
4399
4400 pm_statements_node_t *else_statements = pm_statements_node_create(parser);
4401 pm_statements_node_body_append(parser, else_statements, false_expression, true);
4402
4403 pm_token_t end_keyword = not_provided(parser);
4404 pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, &end_keyword);
4405
4406 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4407
4408 *node = (pm_if_node_t) {
4409 .base = PM_NODE_INIT_NODES(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, predicate, false_expression),
4410 .if_keyword_loc = { 0 },
4411 .predicate = predicate,
4412 .then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark),
4413 .statements = if_statements,
4414 .subsequent = UP(else_node),
4415 .end_keyword_loc = { 0 }
4416 };
4417
4418 return node;
4419
4420}
4421
4422static inline void
4423pm_if_node_end_keyword_loc_set(pm_if_node_t *node, const pm_token_t *keyword) {
4424 node->base.location.end = keyword->end;
4425 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4426}
4427
4428static inline void
4429pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword) {
4430 node->base.location.end = keyword->end;
4431 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4432}
4433
4437static pm_implicit_node_t *
4438pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
4439 pm_implicit_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_node_t);
4440
4441 *node = (pm_implicit_node_t) {
4442 .base = PM_NODE_INIT_NODE(parser, PM_IMPLICIT_NODE, 0, value),
4443 .value = value
4444 };
4445
4446 return node;
4447}
4448
4452static pm_implicit_rest_node_t *
4453pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
4454 assert(token->type == PM_TOKEN_COMMA);
4455
4456 pm_implicit_rest_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_rest_node_t);
4457
4458 *node = (pm_implicit_rest_node_t) {
4459 .base = PM_NODE_INIT_TOKEN(parser, PM_IMPLICIT_REST_NODE, 0, token)
4460 };
4461
4462 return node;
4463}
4464
4468static pm_integer_node_t *
4469pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4470 assert(token->type == PM_TOKEN_INTEGER);
4471 pm_integer_node_t *node = PM_NODE_ALLOC(parser, pm_integer_node_t);
4472
4473 *node = (pm_integer_node_t) {
4474 .base = PM_NODE_INIT_TOKEN(parser, PM_INTEGER_NODE, base | PM_NODE_FLAG_STATIC_LITERAL, token),
4475 .value = { 0 }
4476 };
4477
4478 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4479 switch (base) {
4480 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4481 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4482 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4483 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4484 default: assert(false && "unreachable"); break;
4485 }
4486
4487 pm_integer_parse(&node->value, integer_base, token->start, token->end);
4488 return node;
4489}
4490
4495static pm_imaginary_node_t *
4496pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4497 assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
4498
4499 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4500 *node = (pm_imaginary_node_t) {
4501 .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
4502 .numeric = UP(pm_integer_node_create(parser, base, &((pm_token_t) {
4503 .type = PM_TOKEN_INTEGER,
4504 .start = token->start,
4505 .end = token->end - 1
4506 })))
4507 };
4508
4509 return node;
4510}
4511
4516static pm_rational_node_t *
4517pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4518 assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
4519
4520 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4521 *node = (pm_rational_node_t) {
4522 .base = PM_NODE_INIT_TOKEN(parser, PM_RATIONAL_NODE, base | PM_NODE_FLAG_STATIC_LITERAL, token),
4523 .numerator = { 0 },
4524 .denominator = { .value = 1, 0 }
4525 };
4526
4527 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4528 switch (base) {
4529 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4530 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4531 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4532 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4533 default: assert(false && "unreachable"); break;
4534 }
4535
4536 pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
4537
4538 return node;
4539}
4540
4545static pm_imaginary_node_t *
4546pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4547 assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
4548
4549 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4550 *node = (pm_imaginary_node_t) {
4551 .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
4552 .numeric = UP(pm_integer_node_rational_create(parser, base, &((pm_token_t) {
4553 .type = PM_TOKEN_INTEGER_RATIONAL,
4554 .start = token->start,
4555 .end = token->end - 1
4556 })))
4557 };
4558
4559 return node;
4560}
4561
4565static pm_in_node_t *
4566pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
4567 pm_in_node_t *node = PM_NODE_ALLOC(parser, pm_in_node_t);
4568
4569 const uint8_t *end;
4570 if (statements != NULL) {
4571 end = statements->base.location.end;
4572 } else if (then_keyword->type != PM_TOKEN_NOT_PROVIDED) {
4573 end = then_keyword->end;
4574 } else {
4575 end = pattern->location.end;
4576 }
4577
4578 *node = (pm_in_node_t) {
4579 .base = PM_NODE_INIT(parser, PM_IN_NODE, 0, in_keyword->start, end),
4580 .pattern = pattern,
4581 .statements = statements,
4582 .in_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
4583 .then_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword)
4584 };
4585
4586 return node;
4587}
4588
4592static pm_instance_variable_and_write_node_t *
4593pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4594 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4595 pm_instance_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_and_write_node_t);
4596
4597 *node = (pm_instance_variable_and_write_node_t) {
4598 .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_AND_WRITE_NODE, 0, target, value),
4599 .name = target->name,
4600 .name_loc = target->base.location,
4601 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4602 .value = value
4603 };
4604
4605 return node;
4606}
4607
4611static pm_instance_variable_operator_write_node_t *
4612pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4613 pm_instance_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_operator_write_node_t);
4614
4615 *node = (pm_instance_variable_operator_write_node_t) {
4616 .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value),
4617 .name = target->name,
4618 .name_loc = target->base.location,
4619 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4620 .value = value,
4621 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4622 };
4623
4624 return node;
4625}
4626
4630static pm_instance_variable_or_write_node_t *
4631pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4632 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4633 pm_instance_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_or_write_node_t);
4634
4635 *node = (pm_instance_variable_or_write_node_t) {
4636 .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_OR_WRITE_NODE, 0, target, value),
4637 .name = target->name,
4638 .name_loc = target->base.location,
4639 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4640 .value = value
4641 };
4642
4643 return node;
4644}
4645
4649static pm_instance_variable_read_node_t *
4650pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
4651 assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
4652 pm_instance_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_read_node_t);
4653
4654 *node = (pm_instance_variable_read_node_t) {
4655 .base = PM_NODE_INIT_TOKEN(parser, PM_INSTANCE_VARIABLE_READ_NODE, 0, token),
4656 .name = pm_parser_constant_id_token(parser, token)
4657 };
4658
4659 return node;
4660}
4661
4666static pm_instance_variable_write_node_t *
4667pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
4668 pm_instance_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_write_node_t);
4669 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
4670
4671 *node = (pm_instance_variable_write_node_t) {
4672 .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_WRITE_NODE, flags, read_node, value),
4673 .name = read_node->name,
4674 .name_loc = PM_LOCATION_NODE_VALUE(read_node),
4675 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
4676 .value = value
4677 };
4678
4679 return node;
4680}
4681
4687static void
4688pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
4689 switch (PM_NODE_TYPE(part)) {
4690 case PM_STRING_NODE:
4691 pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4692 break;
4693 case PM_EMBEDDED_STATEMENTS_NODE: {
4694 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
4695 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
4696
4697 if (embedded == NULL) {
4698 // If there are no statements or more than one statement, then
4699 // we lose the static literal flag.
4700 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
4701 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
4702 // If the embedded statement is a string, then we can keep the
4703 // static literal flag and mark the string as frozen.
4704 pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4705 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
4706 // If the embedded statement is an interpolated string and it's
4707 // a static literal, then we can keep the static literal flag.
4708 } else {
4709 // Otherwise we lose the static literal flag.
4710 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
4711 }
4712
4713 break;
4714 }
4715 case PM_EMBEDDED_VARIABLE_NODE:
4716 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
4717 break;
4718 default:
4719 assert(false && "unexpected node type");
4720 break;
4721 }
4722
4723 pm_node_list_append(parts, part);
4724}
4725
4729static pm_interpolated_regular_expression_node_t *
4730pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4731 pm_interpolated_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_regular_expression_node_t);
4732
4733 *node = (pm_interpolated_regular_expression_node_t) {
4734 .base = PM_NODE_INIT_TOKEN(parser, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening),
4735 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4736 .closing_loc = PM_LOCATION_TOKEN_VALUE(opening),
4737 .parts = { 0 }
4738 };
4739
4740 return node;
4741}
4742
4743static inline void
4744pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
4745 if (node->base.location.start > part->location.start) {
4746 node->base.location.start = part->location.start;
4747 }
4748 if (node->base.location.end < part->location.end) {
4749 node->base.location.end = part->location.end;
4750 }
4751
4752 pm_interpolated_node_append(UP(node), &node->parts, part);
4753}
4754
4755static inline void
4756pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
4757 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
4758 node->base.location.end = closing->end;
4759 pm_node_flag_set(UP(node), pm_regular_expression_flags_create(parser, closing));
4760}
4761
4785static inline void
4786pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
4787#define CLEAR_FLAGS(node) \
4788 node->base.flags = (pm_node_flags_t) (FL(node) & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
4789
4790#define MUTABLE_FLAGS(node) \
4791 node->base.flags = (pm_node_flags_t) ((FL(node) | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
4792
4793 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
4794 node->base.location.start = part->location.start;
4795 }
4796
4797 node->base.location.end = MAX(node->base.location.end, part->location.end);
4798
4799 switch (PM_NODE_TYPE(part)) {
4800 case PM_STRING_NODE:
4801 // If inner string is not frozen, it stops being a static literal. We should *not* clear other flags,
4802 // because concatenating two frozen strings (`'foo' 'bar'`) is still frozen. This holds true for
4803 // as long as this interpolation only consists of other string literals.
4804 if (!PM_NODE_FLAG_P(part, PM_STRING_FLAGS_FROZEN)) {
4805 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
4806 }
4807 part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
4808 break;
4809 case PM_INTERPOLATED_STRING_NODE:
4810 if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
4811 // If the string that we're concatenating is a static literal,
4812 // then we can keep the static literal flag for this string.
4813 } else {
4814 // Otherwise, we lose the static literal flag here and we should
4815 // also clear the mutability flags.
4816 CLEAR_FLAGS(node);
4817 }
4818 break;
4819 case PM_EMBEDDED_STATEMENTS_NODE: {
4820 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
4821 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
4822
4823 if (embedded == NULL) {
4824 // If we're embedding multiple statements or no statements, then
4825 // the string is not longer a static literal.
4826 CLEAR_FLAGS(node);
4827 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
4828 // If the embedded statement is a string, then we can make that
4829 // string as frozen and static literal, and not touch the static
4830 // literal status of this string.
4831 embedded->flags = (pm_node_flags_t) ((embedded->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
4832
4833 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
4834 MUTABLE_FLAGS(node);
4835 }
4836 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
4837 // If the embedded statement is an interpolated string, but that
4838 // string is marked as static literal, then we can keep our
4839 // static literal status for this string.
4840 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
4841 MUTABLE_FLAGS(node);
4842 }
4843 } else {
4844 // In all other cases, we lose the static literal flag here and
4845 // become mutable.
4846 CLEAR_FLAGS(node);
4847 }
4848
4849 break;
4850 }
4851 case PM_EMBEDDED_VARIABLE_NODE:
4852 // Embedded variables clear static literal, which means we also
4853 // should clear the mutability flags.
4854 CLEAR_FLAGS(node);
4855 break;
4856 case PM_X_STRING_NODE:
4857 case PM_INTERPOLATED_X_STRING_NODE:
4858 case PM_SYMBOL_NODE:
4859 case PM_INTERPOLATED_SYMBOL_NODE:
4860 // These will only happen in error cases. But we want to handle it
4861 // here so that we don't fail the assertion.
4862 CLEAR_FLAGS(node);
4863 break;
4864 default:
4865 assert(false && "unexpected node type");
4866 break;
4867 }
4868
4869 pm_node_list_append(&node->parts, part);
4870
4871#undef CLEAR_FLAGS
4872#undef MUTABLE_FLAGS
4873}
4874
4878static pm_interpolated_string_node_t *
4879pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
4880 pm_interpolated_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_string_node_t);
4881 pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
4882
4883 switch (parser->frozen_string_literal) {
4884 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
4885 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
4886 break;
4887 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
4888 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
4889 break;
4890 }
4891
4892 *node = (pm_interpolated_string_node_t) {
4893 .base = PM_NODE_INIT_TOKENS(parser, PM_INTERPOLATED_STRING_NODE, flags, opening, closing),
4894 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
4895 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
4896 .parts = { 0 }
4897 };
4898
4899 if (parts != NULL) {
4900 pm_node_t *part;
4901 PM_NODE_LIST_FOREACH(parts, index, part) {
4902 pm_interpolated_string_node_append(node, part);
4903 }
4904 }
4905
4906 return node;
4907}
4908
4912static void
4913pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, const pm_token_t *closing) {
4914 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
4915 node->base.location.end = closing->end;
4916}
4917
4918static void
4919pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) {
4920 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
4921 node->base.location.start = part->location.start;
4922 }
4923
4924 pm_interpolated_node_append(UP(node), &node->parts, part);
4925 node->base.location.end = MAX(node->base.location.end, part->location.end);
4926}
4927
4928static void
4929pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
4930 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
4931 node->base.location.end = closing->end;
4932}
4933
4937static pm_interpolated_symbol_node_t *
4938pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
4939 pm_interpolated_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_symbol_node_t);
4940
4941 *node = (pm_interpolated_symbol_node_t) {
4942 .base = PM_NODE_INIT_TOKENS(parser, PM_INTERPOLATED_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening, closing),
4943 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
4944 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
4945 .parts = { 0 }
4946 };
4947
4948 if (parts != NULL) {
4949 pm_node_t *part;
4950 PM_NODE_LIST_FOREACH(parts, index, part) {
4951 pm_interpolated_symbol_node_append(node, part);
4952 }
4953 }
4954
4955 return node;
4956}
4957
4961static pm_interpolated_x_string_node_t *
4962pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4963 pm_interpolated_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_x_string_node_t);
4964
4965 *node = (pm_interpolated_x_string_node_t) {
4966 .base = PM_NODE_INIT_TOKENS(parser, PM_INTERPOLATED_X_STRING_NODE, 0, opening, closing),
4967 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
4968 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
4969 .parts = { 0 }
4970 };
4971
4972 return node;
4973}
4974
4975static inline void
4976pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
4977 pm_interpolated_node_append(UP(node), &node->parts, part);
4978 node->base.location.end = part->location.end;
4979}
4980
4981static inline void
4982pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
4983 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
4984 node->base.location.end = closing->end;
4985}
4986
4990static pm_it_local_variable_read_node_t *
4991pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4992 pm_it_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_it_local_variable_read_node_t);
4993
4994 *node = (pm_it_local_variable_read_node_t) {
4995 .base = PM_NODE_INIT_TOKEN(parser, PM_IT_LOCAL_VARIABLE_READ_NODE, 0, name),
4996 };
4997
4998 return node;
4999}
5000
5004static pm_it_parameters_node_t *
5005pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5006 pm_it_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_it_parameters_node_t);
5007
5008 *node = (pm_it_parameters_node_t) {
5009 .base = PM_NODE_INIT_TOKENS(parser, PM_IT_PARAMETERS_NODE, 0, opening, closing),
5010 };
5011
5012 return node;
5013}
5014
5018static pm_keyword_hash_node_t *
5019pm_keyword_hash_node_create(pm_parser_t *parser) {
5020 pm_keyword_hash_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_hash_node_t);
5021
5022 *node = (pm_keyword_hash_node_t) {
5023 .base = PM_NODE_INIT_UNSET(parser, PM_KEYWORD_HASH_NODE, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS),
5024 .elements = { 0 }
5025 };
5026
5027 return node;
5028}
5029
5033static void
5034pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
5035 // If the element being added is not an AssocNode or does not have a symbol
5036 // key, then we want to turn the SYMBOL_KEYS flag off.
5037 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
5038 pm_node_flag_unset(UP(hash), PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
5039 }
5040
5041 pm_node_list_append(&hash->elements, element);
5042 if (hash->base.location.start == NULL) {
5043 hash->base.location.start = element->location.start;
5044 }
5045 hash->base.location.end = element->location.end;
5046}
5047
5051static pm_required_keyword_parameter_node_t *
5052pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
5053 pm_required_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_keyword_parameter_node_t);
5054
5055 *node = (pm_required_keyword_parameter_node_t) {
5056 .base = PM_NODE_INIT_TOKEN(parser, PM_REQUIRED_KEYWORD_PARAMETER_NODE, 0, name),
5057 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5058 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5059 };
5060
5061 return node;
5062}
5063
5067static pm_optional_keyword_parameter_node_t *
5068pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
5069 pm_optional_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_keyword_parameter_node_t);
5070
5071 *node = (pm_optional_keyword_parameter_node_t) {
5072 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_OPTIONAL_KEYWORD_PARAMETER_NODE, 0, name, value),
5073 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5074 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5075 .value = value
5076 };
5077
5078 return node;
5079}
5080
5084static pm_keyword_rest_parameter_node_t *
5085pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
5086 pm_keyword_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_rest_parameter_node_t);
5087
5088 *node = (pm_keyword_rest_parameter_node_t) {
5089 .base = (
5090 (name->type == PM_TOKEN_NOT_PROVIDED)
5091 ? PM_NODE_INIT_TOKEN(parser, PM_KEYWORD_REST_PARAMETER_NODE, 0, operator)
5092 : PM_NODE_INIT_TOKENS(parser, PM_KEYWORD_REST_PARAMETER_NODE, 0, operator, name)
5093 ),
5094 .name = pm_parser_optional_constant_id_token(parser, name),
5095 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
5096 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5097 };
5098
5099 return node;
5100}
5101
5105static pm_lambda_node_t *
5106pm_lambda_node_create(
5107 pm_parser_t *parser,
5108 pm_constant_id_list_t *locals,
5109 const pm_token_t *operator,
5110 const pm_token_t *opening,
5111 const pm_token_t *closing,
5112 pm_node_t *parameters,
5113 pm_node_t *body
5114) {
5115 pm_lambda_node_t *node = PM_NODE_ALLOC(parser, pm_lambda_node_t);
5116
5117 *node = (pm_lambda_node_t) {
5118 .base = PM_NODE_INIT_TOKENS(parser, PM_LAMBDA_NODE, 0, operator, closing),
5119 .locals = *locals,
5120 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5121 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5122 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
5123 .parameters = parameters,
5124 .body = body
5125 };
5126
5127 return node;
5128}
5129
5133static pm_local_variable_and_write_node_t *
5134pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5135 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5136 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5137 pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t);
5138
5139 *node = (pm_local_variable_and_write_node_t) {
5140 .base = PM_NODE_INIT_NODES(parser, PM_LOCAL_VARIABLE_AND_WRITE_NODE, 0, target, value),
5141 .name_loc = target->location,
5142 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5143 .value = value,
5144 .name = name,
5145 .depth = depth
5146 };
5147
5148 return node;
5149}
5150
5154static pm_local_variable_operator_write_node_t *
5155pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5156 pm_local_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_operator_write_node_t);
5157
5158 *node = (pm_local_variable_operator_write_node_t) {
5159 .base = PM_NODE_INIT_NODES(parser, PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value),
5160 .name_loc = target->location,
5161 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5162 .value = value,
5163 .name = name,
5164 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5165 .depth = depth
5166 };
5167
5168 return node;
5169}
5170
5174static pm_local_variable_or_write_node_t *
5175pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5176 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5177 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5178 pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t);
5179
5180 *node = (pm_local_variable_or_write_node_t) {
5181 .base = PM_NODE_INIT_NODES(parser, PM_LOCAL_VARIABLE_OR_WRITE_NODE, 0, target, value),
5182 .name_loc = target->location,
5183 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5184 .value = value,
5185 .name = name,
5186 .depth = depth
5187 };
5188
5189 return node;
5190}
5191
5195static pm_local_variable_read_node_t *
5196pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
5197 if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
5198
5199 pm_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_read_node_t);
5200
5201 *node = (pm_local_variable_read_node_t) {
5202 .base = PM_NODE_INIT_TOKEN(parser, PM_LOCAL_VARIABLE_READ_NODE, 0, name),
5203 .name = name_id,
5204 .depth = depth
5205 };
5206
5207 return node;
5208}
5209
5213static pm_local_variable_read_node_t *
5214pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5215 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5216 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, false);
5217}
5218
5223static pm_local_variable_read_node_t *
5224pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5225 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5226 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, true);
5227}
5228
5232static pm_local_variable_write_node_t *
5233pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
5234 pm_local_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_write_node_t);
5235 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
5236
5237 *node = (pm_local_variable_write_node_t) {
5238 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_LOCAL_VARIABLE_WRITE_NODE, flags, name_loc, value),
5239 .name = name,
5240 .depth = depth,
5241 .value = value,
5242 .name_loc = *name_loc,
5243 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator)
5244 };
5245
5246 return node;
5247}
5248
5252static inline bool
5253pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5254 return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5255}
5256
5261static inline bool
5262pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
5263 return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
5264}
5265
5270static inline void
5271pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
5272 if (pm_token_is_numbered_parameter(start, end)) {
5273 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start);
5274 }
5275}
5276
5281static pm_local_variable_target_node_t *
5282pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
5283 pm_refute_numbered_parameter(parser, location->start, location->end);
5284 pm_local_variable_target_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_target_node_t);
5285
5286 *node = (pm_local_variable_target_node_t) {
5287 .base = PM_NODE_INIT_TOKEN(parser, PM_LOCAL_VARIABLE_TARGET_NODE, 0, location),
5288 .name = name,
5289 .depth = depth
5290 };
5291
5292 return node;
5293}
5294
5298static pm_match_predicate_node_t *
5299pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5300 pm_assert_value_expression(parser, value);
5301
5302 pm_match_predicate_node_t *node = PM_NODE_ALLOC(parser, pm_match_predicate_node_t);
5303
5304 *node = (pm_match_predicate_node_t) {
5305 .base = PM_NODE_INIT_NODES(parser, PM_MATCH_PREDICATE_NODE, 0, value, pattern),
5306 .value = value,
5307 .pattern = pattern,
5308 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5309 };
5310
5311 return node;
5312}
5313
5317static pm_match_required_node_t *
5318pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5319 pm_assert_value_expression(parser, value);
5320
5321 pm_match_required_node_t *node = PM_NODE_ALLOC(parser, pm_match_required_node_t);
5322
5323 *node = (pm_match_required_node_t) {
5324 .base = PM_NODE_INIT_NODES(parser, PM_MATCH_REQUIRED_NODE, 0, value, pattern),
5325 .value = value,
5326 .pattern = pattern,
5327 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5328 };
5329
5330 return node;
5331}
5332
5336static pm_match_write_node_t *
5337pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
5338 pm_match_write_node_t *node = PM_NODE_ALLOC(parser, pm_match_write_node_t);
5339
5340 *node = (pm_match_write_node_t) {
5341 .base = PM_NODE_INIT_NODE(parser, PM_MATCH_WRITE_NODE, 0, call),
5342 .call = call,
5343 .targets = { 0 }
5344 };
5345
5346 return node;
5347}
5348
5352static pm_module_node_t *
5353pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
5354 pm_module_node_t *node = PM_NODE_ALLOC(parser, pm_module_node_t);
5355
5356 *node = (pm_module_node_t) {
5357 .base = PM_NODE_INIT_TOKENS(parser, PM_MODULE_NODE, 0, module_keyword, end_keyword),
5358 .locals = (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
5359 .module_keyword_loc = PM_LOCATION_TOKEN_VALUE(module_keyword),
5360 .constant_path = constant_path,
5361 .body = body,
5362 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
5363 .name = pm_parser_constant_id_token(parser, name)
5364 };
5365
5366 return node;
5367}
5368
5372static pm_multi_target_node_t *
5373pm_multi_target_node_create(pm_parser_t *parser) {
5374 pm_multi_target_node_t *node = PM_NODE_ALLOC(parser, pm_multi_target_node_t);
5375
5376 *node = (pm_multi_target_node_t) {
5377 .base = PM_NODE_INIT_UNSET(parser, PM_MULTI_TARGET_NODE, 0),
5378 .lefts = { 0 },
5379 .rest = NULL,
5380 .rights = { 0 },
5381 .lparen_loc = { 0 },
5382 .rparen_loc = { 0 }
5383 };
5384
5385 return node;
5386}
5387
5391static void
5392pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
5393 if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
5394 if (node->rest == NULL) {
5395 node->rest = target;
5396 } else {
5397 pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
5398 pm_node_list_append(&node->rights, target);
5399 }
5400 } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
5401 if (node->rest == NULL) {
5402 node->rest = target;
5403 } else {
5404 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
5405 pm_node_list_append(&node->rights, target);
5406 }
5407 } else if (node->rest == NULL) {
5408 pm_node_list_append(&node->lefts, target);
5409 } else {
5410 pm_node_list_append(&node->rights, target);
5411 }
5412
5413 if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) {
5414 node->base.location.start = target->location.start;
5415 }
5416
5417 if (node->base.location.end == NULL || (node->base.location.end < target->location.end)) {
5418 node->base.location.end = target->location.end;
5419 }
5420}
5421
5425static void
5426pm_multi_target_node_opening_set(pm_multi_target_node_t *node, const pm_token_t *lparen) {
5427 node->base.location.start = lparen->start;
5428 node->lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen);
5429}
5430
5434static void
5435pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t *rparen) {
5436 node->base.location.end = rparen->end;
5437 node->rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen);
5438}
5439
5443static pm_multi_write_node_t *
5444pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5445 pm_multi_write_node_t *node = PM_NODE_ALLOC(parser, pm_multi_write_node_t);
5446 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
5447
5448 *node = (pm_multi_write_node_t) {
5449 .base = PM_NODE_INIT_NODES(parser, PM_MULTI_WRITE_NODE, flags, target, value),
5450 .lefts = target->lefts,
5451 .rest = target->rest,
5452 .rights = target->rights,
5453 .lparen_loc = target->lparen_loc,
5454 .rparen_loc = target->rparen_loc,
5455 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5456 .value = value
5457 };
5458
5459 // Explicitly do not call pm_node_destroy here because we want to keep
5460 // around all of the information within the MultiWriteNode node.
5461 xfree(target);
5462
5463 return node;
5464}
5465
5469static pm_next_node_t *
5470pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
5471 assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
5472 pm_next_node_t *node = PM_NODE_ALLOC(parser, pm_next_node_t);
5473
5474 *node = (pm_next_node_t) {
5475 .base = (
5476 (arguments == NULL)
5477 ? PM_NODE_INIT_TOKEN(parser, PM_NEXT_NODE, 0, keyword)
5478 : PM_NODE_INIT_TOKEN_NODE(parser, PM_NEXT_NODE, 0, keyword, arguments)
5479 ),
5480 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5481 .arguments = arguments
5482 };
5483
5484 return node;
5485}
5486
5490static pm_nil_node_t *
5491pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
5492 assert(token->type == PM_TOKEN_KEYWORD_NIL);
5493 pm_nil_node_t *node = PM_NODE_ALLOC(parser, pm_nil_node_t);
5494
5495 *node = (pm_nil_node_t) {
5496 .base = PM_NODE_INIT_TOKEN(parser, PM_NIL_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
5497 };
5498
5499 return node;
5500}
5501
5505static pm_no_keywords_parameter_node_t *
5506pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
5507 assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
5508 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
5509 pm_no_keywords_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_no_keywords_parameter_node_t);
5510
5511 *node = (pm_no_keywords_parameter_node_t) {
5512 .base = PM_NODE_INIT_TOKENS(parser, PM_NO_KEYWORDS_PARAMETER_NODE, 0, operator, keyword),
5513 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5514 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
5515 };
5516
5517 return node;
5518}
5519
5523static pm_numbered_parameters_node_t *
5524pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) {
5525 pm_numbered_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_parameters_node_t);
5526
5527 *node = (pm_numbered_parameters_node_t) {
5528 .base = PM_NODE_INIT_TOKEN(parser, PM_NUMBERED_PARAMETERS_NODE, 0, location),
5529 .maximum = maximum
5530 };
5531
5532 return node;
5533}
5534
5539#define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1))
5540
5547static uint32_t
5548pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) {
5549 const uint8_t *start = token->start + 1;
5550 const uint8_t *end = token->end;
5551
5552 ptrdiff_t diff = end - start;
5553 assert(diff > 0);
5554#if PTRDIFF_MAX > SIZE_MAX
5555 assert(diff < (ptrdiff_t) SIZE_MAX);
5556#endif
5557 size_t length = (size_t) diff;
5558
5559 char *digits = xcalloc(length + 1, sizeof(char));
5560 memcpy(digits, start, length);
5561 digits[length] = '\0';
5562
5563 char *endptr;
5564 errno = 0;
5565 unsigned long value = strtoul(digits, &endptr, 10);
5566
5567 if ((digits == endptr) || (*endptr != '\0')) {
5568 pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
5569 value = 0;
5570 }
5571
5572 xfree(digits);
5573
5574 if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
5575 PM_PARSER_WARN_FORMAT(parser, start, end, PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
5576 value = 0;
5577 }
5578
5579 return (uint32_t) value;
5580}
5581
5582#undef NTH_REF_MAX
5583
5587static pm_numbered_reference_read_node_t *
5588pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5589 assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
5590 pm_numbered_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_reference_read_node_t);
5591
5592 *node = (pm_numbered_reference_read_node_t) {
5593 .base = PM_NODE_INIT_TOKEN(parser, PM_NUMBERED_REFERENCE_READ_NODE, 0, name),
5594 .number = pm_numbered_reference_read_node_number(parser, name)
5595 };
5596
5597 return node;
5598}
5599
5603static pm_optional_parameter_node_t *
5604pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
5605 pm_optional_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_parameter_node_t);
5606
5607 *node = (pm_optional_parameter_node_t) {
5608 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_OPTIONAL_PARAMETER_NODE, 0, name, value),
5609 .name = pm_parser_constant_id_token(parser, name),
5610 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5611 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5612 .value = value
5613 };
5614
5615 return node;
5616}
5617
5621static pm_or_node_t *
5622pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
5623 pm_assert_value_expression(parser, left);
5624
5625 pm_or_node_t *node = PM_NODE_ALLOC(parser, pm_or_node_t);
5626
5627 *node = (pm_or_node_t) {
5628 .base = PM_NODE_INIT_NODES(parser, PM_OR_NODE, 0, left, right),
5629 .left = left,
5630 .right = right,
5631 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5632 };
5633
5634 return node;
5635}
5636
5640static pm_parameters_node_t *
5641pm_parameters_node_create(pm_parser_t *parser) {
5642 pm_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_parameters_node_t);
5643
5644 *node = (pm_parameters_node_t) {
5645 .base = PM_NODE_INIT_UNSET(parser, PM_PARAMETERS_NODE, 0),
5646 .rest = NULL,
5647 .keyword_rest = NULL,
5648 .block = NULL,
5649 .requireds = { 0 },
5650 .optionals = { 0 },
5651 .posts = { 0 },
5652 .keywords = { 0 }
5653 };
5654
5655 return node;
5656}
5657
5661static void
5662pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
5663 if (params->base.location.start == NULL) {
5664 params->base.location.start = param->location.start;
5665 } else {
5666 params->base.location.start = params->base.location.start < param->location.start ? params->base.location.start : param->location.start;
5667 }
5668
5669 if (params->base.location.end == NULL) {
5670 params->base.location.end = param->location.end;
5671 } else {
5672 params->base.location.end = params->base.location.end > param->location.end ? params->base.location.end : param->location.end;
5673 }
5674}
5675
5679static void
5680pm_parameters_node_requireds_append(pm_parameters_node_t *params, pm_node_t *param) {
5681 pm_parameters_node_location_set(params, param);
5682 pm_node_list_append(&params->requireds, param);
5683}
5684
5688static void
5689pm_parameters_node_optionals_append(pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
5690 pm_parameters_node_location_set(params, UP(param));
5691 pm_node_list_append(&params->optionals, UP(param));
5692}
5693
5697static void
5698pm_parameters_node_posts_append(pm_parameters_node_t *params, pm_node_t *param) {
5699 pm_parameters_node_location_set(params, param);
5700 pm_node_list_append(&params->posts, param);
5701}
5702
5706static void
5707pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
5708 pm_parameters_node_location_set(params, param);
5709 params->rest = param;
5710}
5711
5715static void
5716pm_parameters_node_keywords_append(pm_parameters_node_t *params, pm_node_t *param) {
5717 pm_parameters_node_location_set(params, param);
5718 pm_node_list_append(&params->keywords, param);
5719}
5720
5724static void
5725pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
5726 assert(params->keyword_rest == NULL);
5727 pm_parameters_node_location_set(params, param);
5728 params->keyword_rest = param;
5729}
5730
5734static void
5735pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_node_t *param) {
5736 assert(params->block == NULL);
5737 pm_parameters_node_location_set(params, UP(param));
5738 params->block = param;
5739}
5740
5744static pm_program_node_t *
5745pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
5746 pm_program_node_t *node = PM_NODE_ALLOC(parser, pm_program_node_t);
5747
5748 *node = (pm_program_node_t) {
5749 .base = PM_NODE_INIT_NODE(parser, PM_PROGRAM_NODE, 0, statements),
5750 .locals = *locals,
5751 .statements = statements
5752 };
5753
5754 return node;
5755}
5756
5760static pm_parentheses_node_t *
5761pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing, pm_node_flags_t flags) {
5762 pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t);
5763
5764 *node = (pm_parentheses_node_t) {
5765 .base = PM_NODE_INIT_TOKENS(parser, PM_PARENTHESES_NODE, flags, opening, closing),
5766 .body = body,
5767 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5768 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
5769 };
5770
5771 return node;
5772}
5773
5777static pm_pinned_expression_node_t *
5778pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
5779 pm_pinned_expression_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_expression_node_t);
5780
5781 *node = (pm_pinned_expression_node_t) {
5782 .base = PM_NODE_INIT_TOKENS(parser, PM_PINNED_EXPRESSION_NODE, 0, operator, rparen),
5783 .expression = expression,
5784 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5785 .lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen),
5786 .rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen)
5787 };
5788
5789 return node;
5790}
5791
5795static pm_pinned_variable_node_t *
5796pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
5797 pm_pinned_variable_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_variable_node_t);
5798
5799 *node = (pm_pinned_variable_node_t) {
5800 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_PINNED_VARIABLE_NODE, 0, operator, variable),
5801 .variable = variable,
5802 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5803 };
5804
5805 return node;
5806}
5807
5811static pm_post_execution_node_t *
5812pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
5813 pm_post_execution_node_t *node = PM_NODE_ALLOC(parser, pm_post_execution_node_t);
5814
5815 *node = (pm_post_execution_node_t) {
5816 .base = PM_NODE_INIT_TOKENS(parser, PM_POST_EXECUTION_NODE, 0, keyword, closing),
5817 .statements = statements,
5818 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5819 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5820 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
5821 };
5822
5823 return node;
5824}
5825
5829static pm_pre_execution_node_t *
5830pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
5831 pm_pre_execution_node_t *node = PM_NODE_ALLOC(parser, pm_pre_execution_node_t);
5832
5833 *node = (pm_pre_execution_node_t) {
5834 .base = PM_NODE_INIT_TOKENS(parser, PM_PRE_EXECUTION_NODE, 0, keyword, closing),
5835 .statements = statements,
5836 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5837 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5838 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
5839 };
5840
5841 return node;
5842}
5843
5847static pm_range_node_t *
5848pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
5849 pm_assert_value_expression(parser, left);
5850 pm_assert_value_expression(parser, right);
5851
5852 pm_range_node_t *node = PM_NODE_ALLOC(parser, pm_range_node_t);
5853 pm_node_flags_t flags = 0;
5854
5855 // Indicate that this node is an exclusive range if the operator is `...`.
5856 if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
5857 flags |= PM_RANGE_FLAGS_EXCLUDE_END;
5858 }
5859
5860 // Indicate that this node is a static literal (i.e., can be compiled with
5861 // a putobject in CRuby) if the left and right are implicit nil, explicit
5862 // nil, or integers.
5863 if (
5864 (left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
5865 (right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
5866 ) {
5867 flags |= PM_NODE_FLAG_STATIC_LITERAL;
5868 }
5869
5870 *node = (pm_range_node_t) {
5871 .base = PM_NODE_INIT(parser, PM_RANGE_NODE, flags, (left == NULL ? operator->start : left->location.start), (right == NULL ? operator->end : right->location.end)),
5872 .left = left,
5873 .right = right,
5874 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5875 };
5876
5877 return node;
5878}
5879
5883static pm_redo_node_t *
5884pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
5885 assert(token->type == PM_TOKEN_KEYWORD_REDO);
5886 pm_redo_node_t *node = PM_NODE_ALLOC(parser, pm_redo_node_t);
5887
5888 *node = (pm_redo_node_t) {
5889 .base = PM_NODE_INIT_TOKEN(parser, PM_REDO_NODE, 0, token)
5890 };
5891
5892 return node;
5893}
5894
5899static pm_regular_expression_node_t *
5900pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
5901 pm_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_regular_expression_node_t);
5902 pm_node_flags_t flags = pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL;
5903
5904 *node = (pm_regular_expression_node_t) {
5905 .base = PM_NODE_INIT_TOKENS(parser, PM_REGULAR_EXPRESSION_NODE, flags, opening, closing),
5906 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5907 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
5908 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
5909 .unescaped = *unescaped
5910 };
5911
5912 return node;
5913}
5914
5918static inline pm_regular_expression_node_t *
5919pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
5920 return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
5921}
5922
5926static pm_required_parameter_node_t *
5927pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
5928 pm_required_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_parameter_node_t);
5929
5930 *node = (pm_required_parameter_node_t) {
5931 .base = PM_NODE_INIT_TOKEN(parser, PM_REQUIRED_PARAMETER_NODE, 0, token),
5932 .name = pm_parser_constant_id_token(parser, token)
5933 };
5934
5935 return node;
5936}
5937
5941static pm_rescue_modifier_node_t *
5942pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
5943 pm_rescue_modifier_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_modifier_node_t);
5944
5945 *node = (pm_rescue_modifier_node_t) {
5946 .base = PM_NODE_INIT_NODES(parser, PM_RESCUE_MODIFIER_NODE, 0, expression, rescue_expression),
5947 .expression = expression,
5948 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5949 .rescue_expression = rescue_expression
5950 };
5951
5952 return node;
5953}
5954
5958static pm_rescue_node_t *
5959pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
5960 pm_rescue_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_node_t);
5961
5962 *node = (pm_rescue_node_t) {
5963 .base = PM_NODE_INIT_TOKEN(parser, PM_RESCUE_NODE, 0, keyword),
5964 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5965 .operator_loc = { 0 },
5966 .then_keyword_loc = { 0 },
5967 .reference = NULL,
5968 .statements = NULL,
5969 .subsequent = NULL,
5970 .exceptions = { 0 }
5971 };
5972
5973 return node;
5974}
5975
5976static inline void
5977pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) {
5978 node->operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
5979}
5980
5984static void
5985pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
5986 node->reference = reference;
5987 node->base.location.end = reference->location.end;
5988}
5989
5993static void
5994pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
5995 node->statements = statements;
5996 if (pm_statements_node_body_length(statements) > 0) {
5997 node->base.location.end = statements->base.location.end;
5998 }
5999}
6000
6004static void
6005pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
6006 node->subsequent = subsequent;
6007 node->base.location.end = subsequent->base.location.end;
6008}
6009
6013static void
6014pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) {
6015 pm_node_list_append(&node->exceptions, exception);
6016 node->base.location.end = exception->location.end;
6017}
6018
6022static pm_rest_parameter_node_t *
6023pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
6024 pm_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_rest_parameter_node_t);
6025
6026 *node = (pm_rest_parameter_node_t) {
6027 .base = (
6028 (name->type == PM_TOKEN_NOT_PROVIDED)
6029 ? PM_NODE_INIT_TOKEN(parser, PM_REST_PARAMETER_NODE, 0, operator)
6030 : PM_NODE_INIT_TOKENS(parser, PM_REST_PARAMETER_NODE, 0, operator, name)
6031 ),
6032 .name = pm_parser_optional_constant_id_token(parser, name),
6033 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
6034 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6035 };
6036
6037 return node;
6038}
6039
6043static pm_retry_node_t *
6044pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
6045 assert(token->type == PM_TOKEN_KEYWORD_RETRY);
6046 pm_retry_node_t *node = PM_NODE_ALLOC(parser, pm_retry_node_t);
6047
6048 *node = (pm_retry_node_t) {
6049 .base = PM_NODE_INIT_TOKEN(parser, PM_RETRY_NODE, 0, token)
6050 };
6051
6052 return node;
6053}
6054
6058static pm_return_node_t *
6059pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6060 pm_return_node_t *node = PM_NODE_ALLOC(parser, pm_return_node_t);
6061
6062 *node = (pm_return_node_t) {
6063 .base = (
6064 (arguments == NULL)
6065 ? PM_NODE_INIT_TOKEN(parser, PM_RETURN_NODE, 0, keyword)
6066 : PM_NODE_INIT_TOKEN_NODE(parser, PM_RETURN_NODE, 0, keyword, arguments)
6067 ),
6068 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6069 .arguments = arguments
6070 };
6071
6072 return node;
6073}
6074
6078static pm_self_node_t *
6079pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
6080 assert(token->type == PM_TOKEN_KEYWORD_SELF);
6081 pm_self_node_t *node = PM_NODE_ALLOC(parser, pm_self_node_t);
6082
6083 *node = (pm_self_node_t) {
6084 .base = PM_NODE_INIT_TOKEN(parser, PM_SELF_NODE, 0, token)
6085 };
6086
6087 return node;
6088}
6089
6093static pm_shareable_constant_node_t *
6094pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
6095 pm_shareable_constant_node_t *node = PM_NODE_ALLOC(parser, pm_shareable_constant_node_t);
6096
6097 *node = (pm_shareable_constant_node_t) {
6098 .base = PM_NODE_INIT_NODE(parser, PM_SHAREABLE_CONSTANT_NODE, (pm_node_flags_t) value, write),
6099 .write = write
6100 };
6101
6102 return node;
6103}
6104
6108static pm_singleton_class_node_t *
6109pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
6110 pm_singleton_class_node_t *node = PM_NODE_ALLOC(parser, pm_singleton_class_node_t);
6111
6112 *node = (pm_singleton_class_node_t) {
6113 .base = PM_NODE_INIT_TOKENS(parser, PM_SINGLETON_CLASS_NODE, 0, class_keyword, end_keyword),
6114 .locals = *locals,
6115 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
6116 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6117 .expression = expression,
6118 .body = body,
6119 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
6120 };
6121
6122 return node;
6123}
6124
6128static pm_source_encoding_node_t *
6129pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
6130 assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
6131 pm_source_encoding_node_t *node = PM_NODE_ALLOC(parser, pm_source_encoding_node_t);
6132
6133 *node = (pm_source_encoding_node_t) {
6134 .base = PM_NODE_INIT_TOKEN(parser, PM_SOURCE_ENCODING_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
6135 };
6136
6137 return node;
6138}
6139
6143static pm_source_file_node_t*
6144pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
6145 pm_source_file_node_t *node = PM_NODE_ALLOC(parser, pm_source_file_node_t);
6146 assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
6147
6148 pm_node_flags_t flags = 0;
6149
6150 switch (parser->frozen_string_literal) {
6151 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6152 flags |= PM_STRING_FLAGS_MUTABLE;
6153 break;
6154 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6155 flags |= PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6156 break;
6157 }
6158
6159 *node = (pm_source_file_node_t) {
6160 .base = PM_NODE_INIT_TOKEN(parser, PM_SOURCE_FILE_NODE, flags, file_keyword),
6161 .filepath = parser->filepath
6162 };
6163
6164 return node;
6165}
6166
6170static pm_source_line_node_t *
6171pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
6172 assert(token->type == PM_TOKEN_KEYWORD___LINE__);
6173 pm_source_line_node_t *node = PM_NODE_ALLOC(parser, pm_source_line_node_t);
6174
6175 *node = (pm_source_line_node_t) {
6176 .base = PM_NODE_INIT_TOKEN(parser, PM_SOURCE_LINE_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
6177 };
6178
6179 return node;
6180}
6181
6185static pm_splat_node_t *
6186pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
6187 pm_splat_node_t *node = PM_NODE_ALLOC(parser, pm_splat_node_t);
6188
6189 *node = (pm_splat_node_t) {
6190 .base = (
6191 (expression == NULL)
6192 ? PM_NODE_INIT_TOKEN(parser, PM_SPLAT_NODE, 0, operator)
6193 : PM_NODE_INIT_TOKEN_NODE(parser, PM_SPLAT_NODE, 0, operator, expression)
6194 ),
6195 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6196 .expression = expression
6197 };
6198
6199 return node;
6200}
6201
6205static pm_statements_node_t *
6206pm_statements_node_create(pm_parser_t *parser) {
6207 pm_statements_node_t *node = PM_NODE_ALLOC(parser, pm_statements_node_t);
6208
6209 *node = (pm_statements_node_t) {
6210 .base = PM_NODE_INIT_BASE(parser, PM_STATEMENTS_NODE, 0),
6211 .body = { 0 }
6212 };
6213
6214 return node;
6215}
6216
6220static size_t
6221pm_statements_node_body_length(pm_statements_node_t *node) {
6222 return node && node->body.size;
6223}
6224
6228static void
6229pm_statements_node_location_set(pm_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
6230 node->base.location = (pm_location_t) { .start = start, .end = end };
6231}
6232
6237static inline void
6238pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
6239 if (pm_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
6240 node->base.location.start = statement->location.start;
6241 }
6242
6243 if (statement->location.end > node->base.location.end) {
6244 node->base.location.end = statement->location.end;
6245 }
6246}
6247
6251static void
6252pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline) {
6253 pm_statements_node_body_update(node, statement);
6254
6255 if (node->body.size > 0) {
6256 const pm_node_t *previous = node->body.nodes[node->body.size - 1];
6257
6258 switch (PM_NODE_TYPE(previous)) {
6259 case PM_BREAK_NODE:
6260 case PM_NEXT_NODE:
6261 case PM_REDO_NODE:
6262 case PM_RETRY_NODE:
6263 case PM_RETURN_NODE:
6264 pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
6265 break;
6266 default:
6267 break;
6268 }
6269 }
6270
6271 pm_node_list_append(&node->body, statement);
6272 if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
6273}
6274
6278static void
6279pm_statements_node_body_prepend(pm_statements_node_t *node, pm_node_t *statement) {
6280 pm_statements_node_body_update(node, statement);
6281 pm_node_list_prepend(&node->body, statement);
6282 pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
6283}
6284
6288static inline pm_string_node_t *
6289pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
6290 pm_string_node_t *node = PM_NODE_ALLOC(parser, pm_string_node_t);
6291 pm_node_flags_t flags = 0;
6292
6293 switch (parser->frozen_string_literal) {
6294 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6295 flags = PM_STRING_FLAGS_MUTABLE;
6296 break;
6297 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6298 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6299 break;
6300 }
6301
6302 const uint8_t *start = (opening->type == PM_TOKEN_NOT_PROVIDED ? content->start : opening->start);
6303 const uint8_t *end = (closing->type == PM_TOKEN_NOT_PROVIDED ? content->end : closing->end);
6304
6305 *node = (pm_string_node_t) {
6306 .base = PM_NODE_INIT(parser, PM_STRING_NODE, flags, start, end),
6307 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
6308 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
6309 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
6310 .unescaped = *string
6311 };
6312
6313 return node;
6314}
6315
6319static pm_string_node_t *
6320pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6321 return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6322}
6323
6328static pm_string_node_t *
6329pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6330 pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
6331 parser->current_string = PM_STRING_EMPTY;
6332 return node;
6333}
6334
6338static pm_super_node_t *
6339pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
6340 assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
6341 pm_super_node_t *node = PM_NODE_ALLOC(parser, pm_super_node_t);
6342
6343 const uint8_t *end = pm_arguments_end(arguments);
6344 if (end == NULL) {
6345 assert(false && "unreachable");
6346 }
6347
6348 *node = (pm_super_node_t) {
6349 .base = PM_NODE_INIT(parser, PM_SUPER_NODE, 0, keyword->start, end),
6350 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6351 .lparen_loc = arguments->opening_loc,
6352 .arguments = arguments->arguments,
6353 .rparen_loc = arguments->closing_loc,
6354 .block = arguments->block
6355 };
6356
6357 return node;
6358}
6359
6364static bool
6365pm_ascii_only_p(const pm_string_t *contents) {
6366 const size_t length = pm_string_length(contents);
6367 const uint8_t *source = pm_string_source(contents);
6368
6369 for (size_t index = 0; index < length; index++) {
6370 if (source[index] & 0x80) return false;
6371 }
6372
6373 return true;
6374}
6375
6379static void
6380parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
6381 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
6382 size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
6383
6384 if (width == 0) {
6385 pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
6386 break;
6387 }
6388
6389 cursor += width;
6390 }
6391}
6392
6397static void
6398parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
6399 const pm_encoding_t *encoding = parser->encoding;
6400
6401 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
6402 size_t width = encoding->char_width(cursor, end - cursor);
6403
6404 if (width == 0) {
6405 pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
6406 break;
6407 }
6408
6409 cursor += width;
6410 }
6411}
6412
6422static inline pm_node_flags_t
6423parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
6424 if (parser->explicit_encoding != NULL) {
6425 // A Symbol may optionally have its encoding explicitly set. This will
6426 // happen if an escape sequence results in a non-ASCII code point.
6427 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
6428 if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
6429 return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
6430 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6431 return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
6432 } else if (validate) {
6433 parse_symbol_encoding_validate_other(parser, location, contents);
6434 }
6435 } else if (pm_ascii_only_p(contents)) {
6436 // Ruby stipulates that all source files must use an ASCII-compatible
6437 // encoding. Thus, all symbols appearing in source are eligible for
6438 // "downgrading" to US-ASCII.
6439 return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
6440 } else if (validate) {
6441 parse_symbol_encoding_validate_other(parser, location, contents);
6442 }
6443
6444 return 0;
6445}
6446
6447static pm_node_flags_t
6448parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding) {
6449 assert ((modifier == 'n' && modifier_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) ||
6450 (modifier == 'u' && modifier_encoding == PM_ENCODING_UTF_8_ENTRY) ||
6451 (modifier == 'e' && modifier_encoding == PM_ENCODING_EUC_JP_ENTRY) ||
6452 (modifier == 's' && modifier_encoding == PM_ENCODING_WINDOWS_31J_ENTRY));
6453
6454 // There's special validation logic used if a string does not contain any character escape sequences.
6455 if (parser->explicit_encoding == NULL) {
6456 // If an ASCII-only string without character escapes is used with an encoding modifier, then resulting Regexp
6457 // has the modifier encoding, unless the ASCII-8BIT modifier is used, in which case the Regexp "downgrades" to
6458 // the US-ASCII encoding.
6459 if (ascii_only) {
6460 return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags;
6461 }
6462
6463 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6464 if (!ascii_only) {
6465 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
6466 }
6467 } else if (parser->encoding != modifier_encoding) {
6468 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name);
6469
6470 if (modifier == 'n' && !ascii_only) {
6471 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source));
6472 }
6473 }
6474
6475 return flags;
6476 }
6477
6478 // TODO (nirvdrum 21-Feb-2024): To validate regexp sources with character escape sequences we need to know whether hex or Unicode escape sequences were used and Prism doesn't currently provide that data. We handle a subset of unambiguous cases in the meanwhile.
6479 bool mixed_encoding = false;
6480
6481 if (mixed_encoding) {
6482 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
6483 } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
6484 // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily.
6485 bool valid_string_in_modifier_encoding = true;
6486
6487 if (!valid_string_in_modifier_encoding) {
6488 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
6489 }
6490 } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
6491 // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now.
6492 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
6493 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source));
6494 }
6495 }
6496
6497 // We've determined the encoding would naturally be EUC-JP and there is no need to force the encoding to anything else.
6498 return flags;
6499}
6500
6507static pm_node_flags_t
6508parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags) {
6509 // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report.
6510 bool valid_unicode_range = true;
6511 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) {
6512 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source));
6513 return flags;
6514 }
6515
6516 // US-ASCII strings do not admit multi-byte character literals. However, character escape sequences corresponding
6517 // to multi-byte characters are allowed.
6518 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) {
6519 // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the
6520 // following error message appearing twice. We do the same for compatibility.
6521 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
6522 }
6523
6532 if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
6533 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY);
6534 }
6535
6536 if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
6537 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY);
6538 }
6539
6540 if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
6541 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY);
6542 }
6543
6544 if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
6545 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY);
6546 }
6547
6548 // At this point no encoding modifiers will be present on the regular expression as they would have already
6549 // been processed. Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all
6550 // regular expressions without an encoding modifier appearing in source are eligible for "downgrading" to US-ASCII.
6551 if (ascii_only) {
6552 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
6553 }
6554
6555 // A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string
6556 // or by specifying a modifier.
6557 //
6558 // NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points.
6559 if (parser->explicit_encoding != NULL) {
6560 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
6561 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
6562 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6563 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
6564 }
6565 }
6566
6567 return 0;
6568}
6569
6574static pm_symbol_node_t *
6575pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
6576 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
6577
6578 const uint8_t *start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start);
6579 const uint8_t *end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end);
6580
6581 *node = (pm_symbol_node_t) {
6582 .base = PM_NODE_INIT(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL | flags, start, end),
6583 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
6584 .value_loc = PM_LOCATION_TOKEN_VALUE(value),
6585 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
6586 .unescaped = *unescaped
6587 };
6588
6589 return node;
6590}
6591
6595static inline pm_symbol_node_t *
6596pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
6597 return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
6598}
6599
6603static pm_symbol_node_t *
6604pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
6605 pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
6606 parser->current_string = PM_STRING_EMPTY;
6607 return node;
6608}
6609
6613static pm_symbol_node_t *
6614pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
6615 pm_symbol_node_t *node;
6616
6617 switch (token->type) {
6618 case PM_TOKEN_LABEL: {
6619 pm_token_t opening = not_provided(parser);
6620 pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
6621
6622 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
6623 node = pm_symbol_node_create(parser, &opening, &label, &closing);
6624
6625 assert((label.end - label.start) >= 0);
6626 pm_string_shared_init(&node->unescaped, label.start, label.end);
6627 pm_node_flag_set(UP(node), parse_symbol_encoding(parser, &label, &node->unescaped, false));
6628
6629 break;
6630 }
6631 case PM_TOKEN_MISSING: {
6632 pm_token_t opening = not_provided(parser);
6633 pm_token_t closing = not_provided(parser);
6634
6635 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end };
6636 node = pm_symbol_node_create(parser, &opening, &label, &closing);
6637 break;
6638 }
6639 default:
6640 assert(false && "unreachable");
6641 node = NULL;
6642 break;
6643 }
6644
6645 return node;
6646}
6647
6651static pm_symbol_node_t *
6652pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
6653 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
6654
6655 *node = (pm_symbol_node_t) {
6656 .base = PM_NODE_INIT_BASE(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING),
6657 .value_loc = PM_LOCATION_NULL_VALUE(parser),
6658 .unescaped = { 0 }
6659 };
6660
6661 pm_string_constant_init(&node->unescaped, content, strlen(content));
6662 return node;
6663}
6664
6668static bool
6669pm_symbol_node_label_p(pm_node_t *node) {
6670 const uint8_t *end = NULL;
6671
6672 switch (PM_NODE_TYPE(node)) {
6673 case PM_SYMBOL_NODE:
6674 end = ((pm_symbol_node_t *) node)->closing_loc.end;
6675 break;
6676 case PM_INTERPOLATED_SYMBOL_NODE:
6677 end = ((pm_interpolated_symbol_node_t *) node)->closing_loc.end;
6678 break;
6679 default:
6680 return false;
6681 }
6682
6683 return (end != NULL) && (end[-1] == ':');
6684}
6685
6689static pm_symbol_node_t *
6690pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
6691 pm_symbol_node_t *new_node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
6692
6693 *new_node = (pm_symbol_node_t) {
6694 .base = PM_NODE_INIT_TOKENS(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening, closing),
6695 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
6696 .value_loc = node->content_loc,
6697 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
6698 .unescaped = node->unescaped
6699 };
6700
6701 pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
6702 pm_node_flag_set(UP(new_node), parse_symbol_encoding(parser, &content, &node->unescaped, true));
6703
6704 // We are explicitly _not_ using pm_node_destroy here because we don't want
6705 // to trash the unescaped string. We could instead copy the string if we
6706 // know that it is owned, but we're taking the fast path for now.
6707 xfree(node);
6708
6709 return new_node;
6710}
6711
6715static pm_string_node_t *
6716pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
6717 pm_string_node_t *new_node = PM_NODE_ALLOC(parser, pm_string_node_t);
6718 pm_node_flags_t flags = 0;
6719
6720 switch (parser->frozen_string_literal) {
6721 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6722 flags = PM_STRING_FLAGS_MUTABLE;
6723 break;
6724 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6725 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6726 break;
6727 }
6728
6729 *new_node = (pm_string_node_t) {
6730 .base = PM_NODE_INIT_NODE(parser, PM_STRING_NODE, flags, node),
6731 .opening_loc = node->opening_loc,
6732 .content_loc = node->value_loc,
6733 .closing_loc = node->closing_loc,
6734 .unescaped = node->unescaped
6735 };
6736
6737 // We are explicitly _not_ using pm_node_destroy here because we don't want
6738 // to trash the unescaped string. We could instead copy the string if we
6739 // know that it is owned, but we're taking the fast path for now.
6740 xfree(node);
6741
6742 return new_node;
6743}
6744
6748static pm_true_node_t *
6749pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
6750 assert(token->type == PM_TOKEN_KEYWORD_TRUE);
6751 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
6752
6753 *node = (pm_true_node_t) {
6754 .base = PM_NODE_INIT_TOKEN(parser, PM_TRUE_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
6755 };
6756
6757 return node;
6758}
6759
6763static pm_true_node_t *
6764pm_true_node_synthesized_create(pm_parser_t *parser) {
6765 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
6766
6767 *node = (pm_true_node_t) {
6768 .base = PM_NODE_INIT_BASE(parser, PM_TRUE_NODE, PM_NODE_FLAG_STATIC_LITERAL)
6769 };
6770
6771 return node;
6772}
6773
6777static pm_undef_node_t *
6778pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
6779 assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
6780 pm_undef_node_t *node = PM_NODE_ALLOC(parser, pm_undef_node_t);
6781
6782 *node = (pm_undef_node_t) {
6783 .base = PM_NODE_INIT_TOKEN(parser, PM_UNDEF_NODE, 0, token),
6784 .keyword_loc = PM_LOCATION_TOKEN_VALUE(token),
6785 .names = { 0 }
6786 };
6787
6788 return node;
6789}
6790
6794static void
6795pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
6796 node->base.location.end = name->location.end;
6797 pm_node_list_append(&node->names, name);
6798}
6799
6803static pm_unless_node_t *
6804pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
6805 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6806
6807 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
6808 pm_node_t *end = statements == NULL ? predicate : UP(statements);
6809
6810 *node = (pm_unless_node_t) {
6811 .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_UNLESS_NODE, PM_NODE_FLAG_NEWLINE, keyword, end),
6812 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6813 .predicate = predicate,
6814 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
6815 .statements = statements,
6816 .else_clause = NULL,
6817 .end_keyword_loc = { 0 }
6818 };
6819
6820 return node;
6821}
6822
6826static pm_unless_node_t *
6827pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
6828 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6829 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
6830
6831 pm_statements_node_t *statements = pm_statements_node_create(parser);
6832 pm_statements_node_body_append(parser, statements, statement, true);
6833
6834 *node = (pm_unless_node_t) {
6835 .base = PM_NODE_INIT_NODES(parser, PM_UNLESS_NODE, PM_NODE_FLAG_NEWLINE, statement, predicate),
6836 .keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword),
6837 .predicate = predicate,
6838 .then_keyword_loc = { 0 },
6839 .statements = statements,
6840 .else_clause = NULL,
6841 .end_keyword_loc = { 0 }
6842 };
6843
6844 return node;
6845}
6846
6847static inline void
6848pm_unless_node_end_keyword_loc_set(pm_unless_node_t *node, const pm_token_t *end_keyword) {
6849 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
6850 node->base.location.end = end_keyword->end;
6851}
6852
6858static void
6859pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statements) {
6860 assert(parser->current_block_exits != NULL);
6861
6862 // All of the block exits that we want to remove should be within the
6863 // statements, and since we are modifying the statements, we shouldn't have
6864 // to check the end location.
6865 const uint8_t *start = statements->base.location.start;
6866
6867 for (size_t index = parser->current_block_exits->size; index > 0; index--) {
6868 pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
6869 if (block_exit->location.start < start) break;
6870
6871 // Implicitly remove from the list by lowering the size.
6872 parser->current_block_exits->size--;
6873 }
6874}
6875
6879static pm_until_node_t *
6880pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6881 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
6882 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6883
6884 *node = (pm_until_node_t) {
6885 .base = PM_NODE_INIT_TOKENS(parser, PM_UNTIL_NODE, flags, keyword, closing),
6886 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6887 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
6888 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
6889 .predicate = predicate,
6890 .statements = statements
6891 };
6892
6893 return node;
6894}
6895
6899static pm_until_node_t *
6900pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6901 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
6902 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6903 pm_loop_modifier_block_exits(parser, statements);
6904
6905 *node = (pm_until_node_t) {
6906 .base = PM_NODE_INIT_NODES(parser, PM_UNTIL_NODE, flags, statements, predicate),
6907 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6908 .do_keyword_loc = { 0 },
6909 .closing_loc = { 0 },
6910 .predicate = predicate,
6911 .statements = statements
6912 };
6913
6914 return node;
6915}
6916
6920static pm_when_node_t *
6921pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
6922 pm_when_node_t *node = PM_NODE_ALLOC(parser, pm_when_node_t);
6923
6924 *node = (pm_when_node_t) {
6925 .base = PM_NODE_INIT_TOKEN(parser, PM_WHEN_NODE, 0, keyword),
6926 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6927 .statements = NULL,
6928 .then_keyword_loc = { 0 },
6929 .conditions = { 0 }
6930 };
6931
6932 return node;
6933}
6934
6938static void
6939pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) {
6940 node->base.location.end = condition->location.end;
6941 pm_node_list_append(&node->conditions, condition);
6942}
6943
6947static inline void
6948pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_keyword) {
6949 node->base.location.end = then_keyword->end;
6950 node->then_keyword_loc = PM_LOCATION_TOKEN_VALUE(then_keyword);
6951}
6952
6956static void
6957pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
6958 if (statements->base.location.end > node->base.location.end) {
6959 node->base.location.end = statements->base.location.end;
6960 }
6961
6962 node->statements = statements;
6963}
6964
6968static pm_while_node_t *
6969pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6970 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
6971 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6972
6973 *node = (pm_while_node_t) {
6974 .base = PM_NODE_INIT_TOKENS(parser, PM_WHILE_NODE, flags, keyword, closing),
6975 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6976 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
6977 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
6978 .predicate = predicate,
6979 .statements = statements
6980 };
6981
6982 return node;
6983}
6984
6988static pm_while_node_t *
6989pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6990 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
6991 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6992 pm_loop_modifier_block_exits(parser, statements);
6993
6994 *node = (pm_while_node_t) {
6995 .base = PM_NODE_INIT_NODES(parser, PM_WHILE_NODE, flags, statements, predicate),
6996 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6997 .do_keyword_loc = { 0 },
6998 .closing_loc = { 0 },
6999 .predicate = predicate,
7000 .statements = statements
7001 };
7002
7003 return node;
7004}
7005
7009static pm_while_node_t *
7010pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
7011 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7012
7013 *node = (pm_while_node_t) {
7014 .base = PM_NODE_INIT_BASE(parser, PM_WHILE_NODE, 0),
7015 .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7016 .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7017 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
7018 .predicate = predicate,
7019 .statements = statements
7020 };
7021
7022 return node;
7023}
7024
7029static pm_x_string_node_t *
7030pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
7031 pm_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_x_string_node_t);
7032
7033 *node = (pm_x_string_node_t) {
7034 .base = PM_NODE_INIT_TOKENS(parser, PM_X_STRING_NODE, PM_STRING_FLAGS_FROZEN, opening, closing),
7035 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
7036 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7037 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
7038 .unescaped = *unescaped
7039 };
7040
7041 return node;
7042}
7043
7047static inline pm_x_string_node_t *
7048pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7049 return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7050}
7051
7055static pm_yield_node_t *
7056pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
7057 pm_yield_node_t *node = PM_NODE_ALLOC(parser, pm_yield_node_t);
7058
7059 const uint8_t *end;
7060 if (rparen_loc->start != NULL) {
7061 end = rparen_loc->end;
7062 } else if (arguments != NULL) {
7063 end = arguments->base.location.end;
7064 } else if (lparen_loc->start != NULL) {
7065 end = lparen_loc->end;
7066 } else {
7067 end = keyword->end;
7068 }
7069
7070 *node = (pm_yield_node_t) {
7071 .base = PM_NODE_INIT(parser, PM_YIELD_NODE, 0, keyword->start, end),
7072 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7073 .lparen_loc = *lparen_loc,
7074 .arguments = arguments,
7075 .rparen_loc = *rparen_loc
7076 };
7077
7078 return node;
7079}
7080
7085static int
7086pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
7087 pm_scope_t *scope = parser->current_scope;
7088 int depth = 0;
7089
7090 while (scope != NULL) {
7091 if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
7092 if (scope->closed) break;
7093
7094 scope = scope->previous;
7095 depth++;
7096 }
7097
7098 return -1;
7099}
7100
7106static inline int
7107pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
7108 return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
7109}
7110
7114static inline void
7115pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7116 pm_locals_write(&parser->current_scope->locals, constant_id, start, end, reads);
7117}
7118
7122static pm_constant_id_t
7123pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7124 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, start, end);
7125 if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
7126 return constant_id;
7127}
7128
7132static inline pm_constant_id_t
7133pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
7134 return pm_parser_local_add_location(parser, token->start, token->end, reads);
7135}
7136
7140static pm_constant_id_t
7141pm_parser_local_add_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
7142 pm_constant_id_t constant_id = pm_parser_constant_id_owned(parser, start, length);
7143 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
7144 return constant_id;
7145}
7146
7150static pm_constant_id_t
7151pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
7152 pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
7153 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
7154 return constant_id;
7155}
7156
7164static bool
7165pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
7166 // We want to check whether the parameter name is a numbered parameter or
7167 // not.
7168 pm_refute_numbered_parameter(parser, name->start, name->end);
7169
7170 // Otherwise we'll fetch the constant id for the parameter name and check
7171 // whether it's already in the current scope.
7172 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
7173
7174 if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
7175 // Add an error if the parameter doesn't start with _ and has been seen before
7176 if ((name->start < name->end) && (*name->start != '_')) {
7177 pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
7178 }
7179 return true;
7180 }
7181 return false;
7182}
7183
7187static void
7188pm_parser_scope_pop(pm_parser_t *parser) {
7189 pm_scope_t *scope = parser->current_scope;
7190 parser->current_scope = scope->previous;
7191 pm_locals_free(&scope->locals);
7192 pm_node_list_free(&scope->implicit_parameters);
7193 xfree(scope);
7194}
7195
7196/******************************************************************************/
7197/* Stack helpers */
7198/******************************************************************************/
7199
7203static inline void
7204pm_state_stack_push(pm_state_stack_t *stack, bool value) {
7205 *stack = (*stack << 1) | (value & 1);
7206}
7207
7211static inline void
7212pm_state_stack_pop(pm_state_stack_t *stack) {
7213 *stack >>= 1;
7214}
7215
7219static inline bool
7220pm_state_stack_p(const pm_state_stack_t *stack) {
7221 return *stack & 1;
7222}
7223
7224static inline void
7225pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
7226 // Use the negation of the value to prevent stack overflow.
7227 pm_state_stack_push(&parser->accepts_block_stack, !value);
7228}
7229
7230static inline void
7231pm_accepts_block_stack_pop(pm_parser_t *parser) {
7232 pm_state_stack_pop(&parser->accepts_block_stack);
7233}
7234
7235static inline bool
7236pm_accepts_block_stack_p(pm_parser_t *parser) {
7237 return !pm_state_stack_p(&parser->accepts_block_stack);
7238}
7239
7240static inline void
7241pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
7242 pm_state_stack_push(&parser->do_loop_stack, value);
7243}
7244
7245static inline void
7246pm_do_loop_stack_pop(pm_parser_t *parser) {
7247 pm_state_stack_pop(&parser->do_loop_stack);
7248}
7249
7250static inline bool
7251pm_do_loop_stack_p(pm_parser_t *parser) {
7252 return pm_state_stack_p(&parser->do_loop_stack);
7253}
7254
7255/******************************************************************************/
7256/* Lexer check helpers */
7257/******************************************************************************/
7258
7263static inline uint8_t
7264peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
7265 if (cursor < parser->end) {
7266 return *cursor;
7267 } else {
7268 return '\0';
7269 }
7270}
7271
7277static inline uint8_t
7278peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
7279 return peek_at(parser, parser->current.end + offset);
7280}
7281
7286static inline uint8_t
7287peek(const pm_parser_t *parser) {
7288 return peek_at(parser, parser->current.end);
7289}
7290
7295static inline bool
7296match(pm_parser_t *parser, uint8_t value) {
7297 if (peek(parser) == value) {
7298 parser->current.end++;
7299 return true;
7300 }
7301 return false;
7302}
7303
7308static inline size_t
7309match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
7310 if (peek_at(parser, cursor) == '\n') {
7311 return 1;
7312 }
7313 if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
7314 return 2;
7315 }
7316 return 0;
7317}
7318
7324static inline size_t
7325match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
7326 return match_eol_at(parser, parser->current.end + offset);
7327}
7328
7334static inline size_t
7335match_eol(pm_parser_t *parser) {
7336 return match_eol_at(parser, parser->current.end);
7337}
7338
7342static inline const uint8_t *
7343next_newline(const uint8_t *cursor, ptrdiff_t length) {
7344 assert(length >= 0);
7345
7346 // Note that it's okay for us to use memchr here to look for \n because none
7347 // of the encodings that we support have \n as a component of a multi-byte
7348 // character.
7349 return memchr(cursor, '\n', (size_t) length);
7350}
7351
7355static inline bool
7356ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
7357 return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
7358}
7359
7364static bool
7365parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
7366 const pm_encoding_t *encoding = pm_encoding_find(start, end);
7367
7368 if (encoding != NULL) {
7369 if (parser->encoding != encoding) {
7370 parser->encoding = encoding;
7371 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
7372 }
7373
7374 parser->encoding_changed = (encoding != PM_ENCODING_UTF_8_ENTRY);
7375 return true;
7376 }
7377
7378 return false;
7379}
7380
7385static void
7386parser_lex_magic_comment_encoding(pm_parser_t *parser) {
7387 const uint8_t *cursor = parser->current.start + 1;
7388 const uint8_t *end = parser->current.end;
7389
7390 bool separator = false;
7391 while (true) {
7392 if (end - cursor <= 6) return;
7393 switch (cursor[6]) {
7394 case 'C': case 'c': cursor += 6; continue;
7395 case 'O': case 'o': cursor += 5; continue;
7396 case 'D': case 'd': cursor += 4; continue;
7397 case 'I': case 'i': cursor += 3; continue;
7398 case 'N': case 'n': cursor += 2; continue;
7399 case 'G': case 'g': cursor += 1; continue;
7400 case '=': case ':':
7401 separator = true;
7402 cursor += 6;
7403 break;
7404 default:
7405 cursor += 6;
7406 if (pm_char_is_whitespace(*cursor)) break;
7407 continue;
7408 }
7409 if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
7410 separator = false;
7411 }
7412
7413 while (true) {
7414 do {
7415 if (++cursor >= end) return;
7416 } while (pm_char_is_whitespace(*cursor));
7417
7418 if (separator) break;
7419 if (*cursor != '=' && *cursor != ':') return;
7420
7421 separator = true;
7422 cursor++;
7423 }
7424
7425 const uint8_t *value_start = cursor;
7426 while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
7427
7428 if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
7429 // If we were unable to parse the encoding value, then we've got an
7430 // issue because we didn't understand the encoding that the user was
7431 // trying to use. In this case we'll keep using the default encoding but
7432 // add an error to the parser to indicate an unsuccessful parse.
7433 pm_parser_err(parser, value_start, cursor, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
7434 }
7435}
7436
7437typedef enum {
7438 PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE,
7439 PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE,
7440 PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID
7441} pm_magic_comment_boolean_value_t;
7442
7447static pm_magic_comment_boolean_value_t
7448parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t value_length) {
7449 if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "true", 4) == 0) {
7450 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE;
7451 } else if (value_length == 5 && pm_strncasecmp(value_start, (const uint8_t *) "false", 5) == 0) {
7452 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE;
7453 } else {
7454 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID;
7455 }
7456}
7457
7458static inline bool
7459pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
7460 return b == '\'' || b == '"' || b == ':' || b == ';';
7461}
7462
7468static inline const uint8_t *
7469parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
7470 while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
7471 if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
7472 return cursor;
7473 }
7474 cursor++;
7475 }
7476 return NULL;
7477}
7478
7489static inline bool
7490parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
7491 bool result = true;
7492
7493 const uint8_t *start = parser->current.start + 1;
7494 const uint8_t *end = parser->current.end;
7495 if (end - start <= 7) return false;
7496
7497 const uint8_t *cursor;
7498 bool indicator = false;
7499
7500 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
7501 start = cursor + 3;
7502
7503 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
7504 end = cursor;
7505 indicator = true;
7506 } else {
7507 // If we have a start marker but not an end marker, then we cannot
7508 // have a magic comment.
7509 return false;
7510 }
7511 }
7512
7513 cursor = start;
7514 while (cursor < end) {
7515 while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
7516
7517 const uint8_t *key_start = cursor;
7518 while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
7519
7520 const uint8_t *key_end = cursor;
7521 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7522 if (cursor == end) break;
7523
7524 if (*cursor == ':') {
7525 cursor++;
7526 } else {
7527 if (!indicator) return false;
7528 continue;
7529 }
7530
7531 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7532 if (cursor == end) break;
7533
7534 const uint8_t *value_start;
7535 const uint8_t *value_end;
7536
7537 if (*cursor == '"') {
7538 value_start = ++cursor;
7539 for (; cursor < end && *cursor != '"'; cursor++) {
7540 if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
7541 }
7542 value_end = cursor;
7543 if (cursor < end && *cursor == '"') cursor++;
7544 } else {
7545 value_start = cursor;
7546 while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
7547 value_end = cursor;
7548 }
7549
7550 if (indicator) {
7551 while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
7552 } else {
7553 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7554 if (cursor != end) return false;
7555 }
7556
7557 // Here, we need to do some processing on the key to swap out dashes for
7558 // underscores. We only need to do this if there _is_ a dash in the key.
7559 pm_string_t key;
7560 const size_t key_length = (size_t) (key_end - key_start);
7561 const uint8_t *dash = pm_memchr(key_start, '-', key_length, parser->encoding_changed, parser->encoding);
7562
7563 if (dash == NULL) {
7564 pm_string_shared_init(&key, key_start, key_end);
7565 } else {
7566 uint8_t *buffer = xmalloc(key_length);
7567 if (buffer == NULL) break;
7568
7569 memcpy(buffer, key_start, key_length);
7570 buffer[dash - key_start] = '_';
7571
7572 while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
7573 buffer[dash - key_start] = '_';
7574 }
7575
7576 pm_string_owned_init(&key, buffer, key_length);
7577 }
7578
7579 // Finally, we can start checking the key against the list of known
7580 // magic comment keys, and potentially change state based on that.
7581 const uint8_t *key_source = pm_string_source(&key);
7582 uint32_t value_length = (uint32_t) (value_end - value_start);
7583
7584 // We only want to attempt to compare against encoding comments if it's
7585 // the first line in the file (or the second in the case of a shebang).
7586 if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
7587 if (
7588 (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
7589 (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
7590 ) {
7591 result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
7592 }
7593 }
7594
7595 if (key_length == 11) {
7596 if (pm_strncasecmp(key_source, (const uint8_t *) "warn_indent", 11) == 0) {
7597 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
7598 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
7599 PM_PARSER_WARN_TOKEN_FORMAT(
7600 parser,
7601 parser->current,
7602 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7603 (int) key_length,
7604 (const char *) key_source,
7605 (int) value_length,
7606 (const char *) value_start
7607 );
7608 break;
7609 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
7610 parser->warn_mismatched_indentation = false;
7611 break;
7612 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
7613 parser->warn_mismatched_indentation = true;
7614 break;
7615 }
7616 }
7617 } else if (key_length == 21) {
7618 if (pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
7619 // We only want to handle frozen string literal comments if it's
7620 // before any semantic tokens have been seen.
7621 if (semantic_token_seen) {
7622 pm_parser_warn_token(parser, &parser->current, PM_WARN_IGNORED_FROZEN_STRING_LITERAL);
7623 } else {
7624 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
7625 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
7626 PM_PARSER_WARN_TOKEN_FORMAT(
7627 parser,
7628 parser->current,
7629 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7630 (int) key_length,
7631 (const char *) key_source,
7632 (int) value_length,
7633 (const char *) value_start
7634 );
7635 break;
7636 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
7638 break;
7639 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
7641 break;
7642 }
7643 }
7644 }
7645 } else if (key_length == 24) {
7646 if (pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
7647 const uint8_t *cursor = parser->current.start;
7648 while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
7649
7650 if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
7651 pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
7652 } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
7653 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
7654 } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
7655 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
7656 } else if (value_length == 23 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_everything", 23) == 0) {
7657 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING);
7658 } else if (value_length == 17 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_copy", 17) == 0) {
7659 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY);
7660 } else {
7661 PM_PARSER_WARN_TOKEN_FORMAT(
7662 parser,
7663 parser->current,
7664 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7665 (int) key_length,
7666 (const char *) key_source,
7667 (int) value_length,
7668 (const char *) value_start
7669 );
7670 }
7671 }
7672 }
7673
7674 // When we're done, we want to free the string in case we had to
7675 // allocate memory for it.
7676 pm_string_free(&key);
7677
7678 // Allocate a new magic comment node to append to the parser's list.
7680 if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) {
7681 magic_comment->key_start = key_start;
7682 magic_comment->value_start = value_start;
7683 magic_comment->key_length = (uint32_t) key_length;
7684 magic_comment->value_length = value_length;
7685 pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
7686 }
7687 }
7688
7689 return result;
7690}
7691
7692/******************************************************************************/
7693/* Context manipulations */
7694/******************************************************************************/
7695
7696static const uint32_t context_terminators[] = {
7697 [PM_CONTEXT_NONE] = 0,
7698 [PM_CONTEXT_BEGIN] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7699 [PM_CONTEXT_BEGIN_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7700 [PM_CONTEXT_BEGIN_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7701 [PM_CONTEXT_BEGIN_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7702 [PM_CONTEXT_BLOCK_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
7703 [PM_CONTEXT_BLOCK_KEYWORDS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7704 [PM_CONTEXT_BLOCK_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7705 [PM_CONTEXT_BLOCK_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7706 [PM_CONTEXT_BLOCK_PARAMETERS] = (1U << PM_TOKEN_PIPE),
7707 [PM_CONTEXT_BLOCK_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7708 [PM_CONTEXT_CASE_WHEN] = (1U << PM_TOKEN_KEYWORD_WHEN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
7709 [PM_CONTEXT_CASE_IN] = (1U << PM_TOKEN_KEYWORD_IN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
7710 [PM_CONTEXT_CLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7711 [PM_CONTEXT_CLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7712 [PM_CONTEXT_CLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7713 [PM_CONTEXT_CLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7714 [PM_CONTEXT_DEF] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7715 [PM_CONTEXT_DEF_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7716 [PM_CONTEXT_DEF_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7717 [PM_CONTEXT_DEF_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7718 [PM_CONTEXT_DEF_PARAMS] = (1U << PM_TOKEN_EOF),
7719 [PM_CONTEXT_DEFINED] = (1U << PM_TOKEN_EOF),
7720 [PM_CONTEXT_DEFAULT_PARAMS] = (1U << PM_TOKEN_COMMA) | (1U << PM_TOKEN_PARENTHESIS_RIGHT),
7721 [PM_CONTEXT_ELSE] = (1U << PM_TOKEN_KEYWORD_END),
7722 [PM_CONTEXT_ELSIF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
7723 [PM_CONTEXT_EMBEXPR] = (1U << PM_TOKEN_EMBEXPR_END),
7724 [PM_CONTEXT_FOR] = (1U << PM_TOKEN_KEYWORD_END),
7725 [PM_CONTEXT_FOR_INDEX] = (1U << PM_TOKEN_KEYWORD_IN),
7726 [PM_CONTEXT_IF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
7727 [PM_CONTEXT_LAMBDA_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
7728 [PM_CONTEXT_LAMBDA_DO_END] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7729 [PM_CONTEXT_LAMBDA_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7730 [PM_CONTEXT_LAMBDA_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7731 [PM_CONTEXT_LAMBDA_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7732 [PM_CONTEXT_LOOP_PREDICATE] = (1U << PM_TOKEN_KEYWORD_DO) | (1U << PM_TOKEN_KEYWORD_THEN),
7733 [PM_CONTEXT_MAIN] = (1U << PM_TOKEN_EOF),
7734 [PM_CONTEXT_MODULE] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7735 [PM_CONTEXT_MODULE_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7736 [PM_CONTEXT_MODULE_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7737 [PM_CONTEXT_MODULE_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7738 [PM_CONTEXT_MULTI_TARGET] = (1U << PM_TOKEN_EOF),
7739 [PM_CONTEXT_PARENS] = (1U << PM_TOKEN_PARENTHESIS_RIGHT),
7740 [PM_CONTEXT_POSTEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
7741 [PM_CONTEXT_PREDICATE] = (1U << PM_TOKEN_KEYWORD_THEN) | (1U << PM_TOKEN_NEWLINE) | (1U << PM_TOKEN_SEMICOLON),
7742 [PM_CONTEXT_PREEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
7743 [PM_CONTEXT_RESCUE_MODIFIER] = (1U << PM_TOKEN_EOF),
7744 [PM_CONTEXT_SCLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7745 [PM_CONTEXT_SCLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7746 [PM_CONTEXT_SCLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7747 [PM_CONTEXT_SCLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7748 [PM_CONTEXT_TERNARY] = (1U << PM_TOKEN_EOF),
7749 [PM_CONTEXT_UNLESS] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7750 [PM_CONTEXT_UNTIL] = (1U << PM_TOKEN_KEYWORD_END),
7751 [PM_CONTEXT_WHILE] = (1U << PM_TOKEN_KEYWORD_END),
7752};
7753
7754static inline bool
7755context_terminator(pm_context_t context, pm_token_t *token) {
7756 return token->type < 32 && (context_terminators[context] & (1U << token->type));
7757}
7758
7763static pm_context_t
7764context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
7765 pm_context_node_t *context_node = parser->current_context;
7766
7767 while (context_node != NULL) {
7768 if (context_terminator(context_node->context, token)) return context_node->context;
7769 context_node = context_node->prev;
7770 }
7771
7772 return PM_CONTEXT_NONE;
7773}
7774
7775static bool
7776context_push(pm_parser_t *parser, pm_context_t context) {
7777 pm_context_node_t *context_node = (pm_context_node_t *) xmalloc(sizeof(pm_context_node_t));
7778 if (context_node == NULL) return false;
7779
7780 *context_node = (pm_context_node_t) { .context = context, .prev = NULL };
7781
7782 if (parser->current_context == NULL) {
7783 parser->current_context = context_node;
7784 } else {
7785 context_node->prev = parser->current_context;
7786 parser->current_context = context_node;
7787 }
7788
7789 return true;
7790}
7791
7792static void
7793context_pop(pm_parser_t *parser) {
7794 pm_context_node_t *prev = parser->current_context->prev;
7795 xfree(parser->current_context);
7796 parser->current_context = prev;
7797}
7798
7799static bool
7800context_p(const pm_parser_t *parser, pm_context_t context) {
7801 pm_context_node_t *context_node = parser->current_context;
7802
7803 while (context_node != NULL) {
7804 if (context_node->context == context) return true;
7805 context_node = context_node->prev;
7806 }
7807
7808 return false;
7809}
7810
7811static bool
7812context_def_p(const pm_parser_t *parser) {
7813 pm_context_node_t *context_node = parser->current_context;
7814
7815 while (context_node != NULL) {
7816 switch (context_node->context) {
7817 case PM_CONTEXT_DEF:
7822 return true;
7823 case PM_CONTEXT_CLASS:
7827 case PM_CONTEXT_MODULE:
7831 case PM_CONTEXT_SCLASS:
7835 return false;
7836 default:
7837 context_node = context_node->prev;
7838 }
7839 }
7840
7841 return false;
7842}
7843
7848static const char *
7849context_human(pm_context_t context) {
7850 switch (context) {
7851 case PM_CONTEXT_NONE:
7852 assert(false && "unreachable");
7853 return "";
7854 case PM_CONTEXT_BEGIN: return "begin statement";
7855 case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
7856 case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
7857 case PM_CONTEXT_BLOCK_PARAMETERS: return "'|'..'|' block parameter";
7858 case PM_CONTEXT_CASE_WHEN: return "'when' clause";
7859 case PM_CONTEXT_CASE_IN: return "'in' clause";
7860 case PM_CONTEXT_CLASS: return "class definition";
7861 case PM_CONTEXT_DEF: return "method definition";
7862 case PM_CONTEXT_DEF_PARAMS: return "method parameters";
7863 case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
7864 case PM_CONTEXT_DEFINED: return "'defined?' expression";
7865 case PM_CONTEXT_ELSE:
7872 case PM_CONTEXT_SCLASS_ELSE: return "'else' clause";
7873 case PM_CONTEXT_ELSIF: return "'elsif' clause";
7874 case PM_CONTEXT_EMBEXPR: return "embedded expression";
7881 case PM_CONTEXT_SCLASS_ENSURE: return "'ensure' clause";
7882 case PM_CONTEXT_FOR: return "for loop";
7883 case PM_CONTEXT_FOR_INDEX: return "for loop index";
7884 case PM_CONTEXT_IF: return "if statement";
7885 case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
7886 case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
7887 case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
7888 case PM_CONTEXT_MAIN: return "top level context";
7889 case PM_CONTEXT_MODULE: return "module definition";
7890 case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
7891 case PM_CONTEXT_PARENS: return "parentheses";
7892 case PM_CONTEXT_POSTEXE: return "'END' block";
7893 case PM_CONTEXT_PREDICATE: return "predicate";
7894 case PM_CONTEXT_PREEXE: return "'BEGIN' block";
7902 case PM_CONTEXT_SCLASS_RESCUE: return "'rescue' clause";
7903 case PM_CONTEXT_SCLASS: return "singleton class definition";
7904 case PM_CONTEXT_TERNARY: return "ternary expression";
7905 case PM_CONTEXT_UNLESS: return "unless statement";
7906 case PM_CONTEXT_UNTIL: return "until statement";
7907 case PM_CONTEXT_WHILE: return "while statement";
7908 }
7909
7910 assert(false && "unreachable");
7911 return "";
7912}
7913
7914/******************************************************************************/
7915/* Specific token lexers */
7916/******************************************************************************/
7917
7918static inline void
7919pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
7920 if (invalid != NULL) {
7921 pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
7922 pm_parser_err(parser, invalid, invalid + 1, diag_id);
7923 }
7924}
7925
7926static size_t
7927pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
7928 const uint8_t *invalid = NULL;
7929 size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
7930 pm_strspn_number_validate(parser, string, length, invalid);
7931 return length;
7932}
7933
7934static size_t
7935pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
7936 const uint8_t *invalid = NULL;
7937 size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
7938 pm_strspn_number_validate(parser, string, length, invalid);
7939 return length;
7940}
7941
7942static size_t
7943pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
7944 const uint8_t *invalid = NULL;
7945 size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
7946 pm_strspn_number_validate(parser, string, length, invalid);
7947 return length;
7948}
7949
7950static size_t
7951pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
7952 const uint8_t *invalid = NULL;
7953 size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
7954 pm_strspn_number_validate(parser, string, length, invalid);
7955 return length;
7956}
7957
7958static pm_token_type_t
7959lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
7960 pm_token_type_t type = PM_TOKEN_INTEGER;
7961
7962 // Here we're going to attempt to parse the optional decimal portion of a
7963 // float. If it's not there, then it's okay and we'll just continue on.
7964 if (peek(parser) == '.') {
7965 if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
7966 parser->current.end += 2;
7967 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
7968 type = PM_TOKEN_FLOAT;
7969 } else {
7970 // If we had a . and then something else, then it's not a float
7971 // suffix on a number it's a method call or something else.
7972 return type;
7973 }
7974 }
7975
7976 // Here we're going to attempt to parse the optional exponent portion of a
7977 // float. If it's not there, it's okay and we'll just continue on.
7978 if ((peek(parser) == 'e') || (peek(parser) == 'E')) {
7979 if ((peek_offset(parser, 1) == '+') || (peek_offset(parser, 1) == '-')) {
7980 parser->current.end += 2;
7981
7982 if (pm_char_is_decimal_digit(peek(parser))) {
7983 parser->current.end++;
7984 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
7985 } else {
7986 pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
7987 }
7988 } else if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
7989 parser->current.end++;
7990 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
7991 } else {
7992 return type;
7993 }
7994
7995 *seen_e = true;
7996 type = PM_TOKEN_FLOAT;
7997 }
7998
7999 return type;
8000}
8001
8002static pm_token_type_t
8003lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8004 pm_token_type_t type = PM_TOKEN_INTEGER;
8005 *seen_e = false;
8006
8007 if (peek_offset(parser, -1) == '0') {
8008 switch (*parser->current.end) {
8009 // 0d1111 is a decimal number
8010 case 'd':
8011 case 'D':
8012 parser->current.end++;
8013 if (pm_char_is_decimal_digit(peek(parser))) {
8014 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8015 } else {
8016 match(parser, '_');
8017 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
8018 }
8019
8020 break;
8021
8022 // 0b1111 is a binary number
8023 case 'b':
8024 case 'B':
8025 parser->current.end++;
8026 if (pm_char_is_binary_digit(peek(parser))) {
8027 parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
8028 } else {
8029 match(parser, '_');
8030 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
8031 }
8032
8033 parser->integer_base = PM_INTEGER_BASE_FLAGS_BINARY;
8034 break;
8035
8036 // 0o1111 is an octal number
8037 case 'o':
8038 case 'O':
8039 parser->current.end++;
8040 if (pm_char_is_octal_digit(peek(parser))) {
8041 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8042 } else {
8043 match(parser, '_');
8044 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
8045 }
8046
8047 parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
8048 break;
8049
8050 // 01111 is an octal number
8051 case '_':
8052 case '0':
8053 case '1':
8054 case '2':
8055 case '3':
8056 case '4':
8057 case '5':
8058 case '6':
8059 case '7':
8060 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8061 parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
8062 break;
8063
8064 // 0x1111 is a hexadecimal number
8065 case 'x':
8066 case 'X':
8067 parser->current.end++;
8068 if (pm_char_is_hexadecimal_digit(peek(parser))) {
8069 parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
8070 } else {
8071 match(parser, '_');
8072 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
8073 }
8074
8075 parser->integer_base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
8076 break;
8077
8078 // 0.xxx is a float
8079 case '.': {
8080 type = lex_optional_float_suffix(parser, seen_e);
8081 break;
8082 }
8083
8084 // 0exxx is a float
8085 case 'e':
8086 case 'E': {
8087 type = lex_optional_float_suffix(parser, seen_e);
8088 break;
8089 }
8090 }
8091 } else {
8092 // If it didn't start with a 0, then we'll lex as far as we can into a
8093 // decimal number.
8094 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8095
8096 // Afterward, we'll lex as far as we can into an optional float suffix.
8097 type = lex_optional_float_suffix(parser, seen_e);
8098 }
8099
8100 // At this point we have a completed number, but we want to provide the user
8101 // with a good experience if they put an additional .xxx fractional
8102 // component on the end, so we'll check for that here.
8103 if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8104 const uint8_t *fraction_start = parser->current.end;
8105 const uint8_t *fraction_end = parser->current.end + 2;
8106 fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
8107 pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
8108 }
8109
8110 return type;
8111}
8112
8113static pm_token_type_t
8114lex_numeric(pm_parser_t *parser) {
8115 pm_token_type_t type = PM_TOKEN_INTEGER;
8116 parser->integer_base = PM_INTEGER_BASE_FLAGS_DECIMAL;
8117
8118 if (parser->current.end < parser->end) {
8119 bool seen_e = false;
8120 type = lex_numeric_prefix(parser, &seen_e);
8121
8122 const uint8_t *end = parser->current.end;
8123 pm_token_type_t suffix_type = type;
8124
8125 if (type == PM_TOKEN_INTEGER) {
8126 if (match(parser, 'r')) {
8127 suffix_type = PM_TOKEN_INTEGER_RATIONAL;
8128
8129 if (match(parser, 'i')) {
8130 suffix_type = PM_TOKEN_INTEGER_RATIONAL_IMAGINARY;
8131 }
8132 } else if (match(parser, 'i')) {
8133 suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
8134 }
8135 } else {
8136 if (!seen_e && match(parser, 'r')) {
8137 suffix_type = PM_TOKEN_FLOAT_RATIONAL;
8138
8139 if (match(parser, 'i')) {
8140 suffix_type = PM_TOKEN_FLOAT_RATIONAL_IMAGINARY;
8141 }
8142 } else if (match(parser, 'i')) {
8143 suffix_type = PM_TOKEN_FLOAT_IMAGINARY;
8144 }
8145 }
8146
8147 const uint8_t b = peek(parser);
8148 if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
8149 parser->current.end = end;
8150 } else {
8151 type = suffix_type;
8152 }
8153 }
8154
8155 return type;
8156}
8157
8158static pm_token_type_t
8159lex_global_variable(pm_parser_t *parser) {
8160 if (parser->current.end >= parser->end) {
8161 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
8162 return PM_TOKEN_GLOBAL_VARIABLE;
8163 }
8164
8165 // True if multiple characters are allowed after the declaration of the
8166 // global variable. Not true when it starts with "$-".
8167 bool allow_multiple = true;
8168
8169 switch (*parser->current.end) {
8170 case '~': // $~: match-data
8171 case '*': // $*: argv
8172 case '$': // $$: pid
8173 case '?': // $?: last status
8174 case '!': // $!: error string
8175 case '@': // $@: error position
8176 case '/': // $/: input record separator
8177 case '\\': // $\: output record separator
8178 case ';': // $;: field separator
8179 case ',': // $,: output field separator
8180 case '.': // $.: last read line number
8181 case '=': // $=: ignorecase
8182 case ':': // $:: load path
8183 case '<': // $<: reading filename
8184 case '>': // $>: default output handle
8185 case '\"': // $": already loaded files
8186 parser->current.end++;
8187 return PM_TOKEN_GLOBAL_VARIABLE;
8188
8189 case '&': // $&: last match
8190 case '`': // $`: string before last match
8191 case '\'': // $': string after last match
8192 case '+': // $+: string matches last paren.
8193 parser->current.end++;
8194 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_BACK_REFERENCE;
8195
8196 case '0': {
8197 parser->current.end++;
8198 size_t width;
8199
8200 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
8201 do {
8202 parser->current.end += width;
8203 } while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
8204
8205 // $0 isn't allowed to be followed by anything.
8206 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8207 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
8208 }
8209
8210 return PM_TOKEN_GLOBAL_VARIABLE;
8211 }
8212
8213 case '1':
8214 case '2':
8215 case '3':
8216 case '4':
8217 case '5':
8218 case '6':
8219 case '7':
8220 case '8':
8221 case '9':
8222 parser->current.end += pm_strspn_decimal_digit(parser->current.end, parser->end - parser->current.end);
8223 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_NUMBERED_REFERENCE;
8224
8225 case '-':
8226 parser->current.end++;
8227 allow_multiple = false;
8229 default: {
8230 size_t width;
8231
8232 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
8233 do {
8234 parser->current.end += width;
8235 } while (allow_multiple && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
8236 } else if (pm_char_is_whitespace(peek(parser))) {
8237 // If we get here, then we have a $ followed by whitespace,
8238 // which is not allowed.
8239 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
8240 } else {
8241 // If we get here, then we have a $ followed by something that
8242 // isn't recognized as a global variable.
8243 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8244 const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8245 PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
8246 }
8247
8248 return PM_TOKEN_GLOBAL_VARIABLE;
8249 }
8250 }
8251}
8252
8265static inline pm_token_type_t
8266lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
8267 if (memcmp(current_start, value, vlen) == 0) {
8268 pm_lex_state_t last_state = parser->lex_state;
8269
8270 if (parser->lex_state & PM_LEX_STATE_FNAME) {
8271 lex_state_set(parser, PM_LEX_STATE_ENDFN);
8272 } else {
8273 lex_state_set(parser, state);
8274 if (state == PM_LEX_STATE_BEG) {
8275 parser->command_start = true;
8276 }
8277
8278 if ((modifier_type != PM_TOKEN_EOF) && !(last_state & (PM_LEX_STATE_BEG | PM_LEX_STATE_LABELED | PM_LEX_STATE_CLASS))) {
8279 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
8280 return modifier_type;
8281 }
8282 }
8283
8284 return type;
8285 }
8286
8287 return PM_TOKEN_EOF;
8288}
8289
8290static pm_token_type_t
8291lex_identifier(pm_parser_t *parser, bool previous_command_start) {
8292 // Lex as far as we can into the current identifier.
8293 size_t width;
8294 const uint8_t *end = parser->end;
8295 const uint8_t *current_start = parser->current.start;
8296 const uint8_t *current_end = parser->current.end;
8297 bool encoding_changed = parser->encoding_changed;
8298
8299 if (encoding_changed) {
8300 while ((width = char_is_identifier(parser, current_end, end - current_end)) > 0) {
8301 current_end += width;
8302 }
8303 } else {
8304 while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
8305 current_end += width;
8306 }
8307 }
8308 parser->current.end = current_end;
8309
8310 // Now cache the length of the identifier so that we can quickly compare it
8311 // against known keywords.
8312 width = (size_t) (current_end - current_start);
8313
8314 if (current_end < end) {
8315 if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
8316 // First we'll attempt to extend the identifier by a ! or ?. Then we'll
8317 // check if we're returning the defined? keyword or just an identifier.
8318 width++;
8319
8320 if (
8321 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
8322 (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
8323 ) {
8324 // If we're in a position where we can accept a : at the end of an
8325 // identifier, then we'll optionally accept it.
8326 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
8327 (void) match(parser, ':');
8328 return PM_TOKEN_LABEL;
8329 }
8330
8331 if (parser->lex_state != PM_LEX_STATE_DOT) {
8332 if (width == 8 && (lex_keyword(parser, current_start, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
8333 return PM_TOKEN_KEYWORD_DEFINED;
8334 }
8335 }
8336
8337 return PM_TOKEN_METHOD_NAME;
8338 }
8339
8340 if (lex_state_p(parser, PM_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
8341 // If we're in a position where we can accept a = at the end of an
8342 // identifier, then we'll optionally accept it.
8343 return PM_TOKEN_IDENTIFIER;
8344 }
8345
8346 if (
8347 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
8348 peek(parser) == ':' && peek_offset(parser, 1) != ':'
8349 ) {
8350 // If we're in a position where we can accept a : at the end of an
8351 // identifier, then we'll optionally accept it.
8352 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
8353 (void) match(parser, ':');
8354 return PM_TOKEN_LABEL;
8355 }
8356 }
8357
8358 if (parser->lex_state != PM_LEX_STATE_DOT) {
8359 pm_token_type_t type;
8360 switch (width) {
8361 case 2:
8362 if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
8363 if (pm_do_loop_stack_p(parser)) {
8364 return PM_TOKEN_KEYWORD_DO_LOOP;
8365 }
8366 return PM_TOKEN_KEYWORD_DO;
8367 }
8368
8369 if ((type = lex_keyword(parser, current_start, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
8370 if ((type = lex_keyword(parser, current_start, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8371 if ((type = lex_keyword(parser, current_start, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8372 break;
8373 case 3:
8374 if ((type = lex_keyword(parser, current_start, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8375 if ((type = lex_keyword(parser, current_start, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8376 if ((type = lex_keyword(parser, current_start, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8377 if ((type = lex_keyword(parser, current_start, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8378 if ((type = lex_keyword(parser, current_start, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8379 if ((type = lex_keyword(parser, current_start, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8380 if ((type = lex_keyword(parser, current_start, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8381 break;
8382 case 4:
8383 if ((type = lex_keyword(parser, current_start, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8384 if ((type = lex_keyword(parser, current_start, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8385 if ((type = lex_keyword(parser, current_start, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8386 if ((type = lex_keyword(parser, current_start, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8387 if ((type = lex_keyword(parser, current_start, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8388 if ((type = lex_keyword(parser, current_start, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8389 if ((type = lex_keyword(parser, current_start, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8390 if ((type = lex_keyword(parser, current_start, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8391 break;
8392 case 5:
8393 if ((type = lex_keyword(parser, current_start, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8394 if ((type = lex_keyword(parser, current_start, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8395 if ((type = lex_keyword(parser, current_start, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8396 if ((type = lex_keyword(parser, current_start, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8397 if ((type = lex_keyword(parser, current_start, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8398 if ((type = lex_keyword(parser, current_start, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8399 if ((type = lex_keyword(parser, current_start, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8400 if ((type = lex_keyword(parser, current_start, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8401 if ((type = lex_keyword(parser, current_start, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8402 if ((type = lex_keyword(parser, current_start, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8403 if ((type = lex_keyword(parser, current_start, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
8404 if ((type = lex_keyword(parser, current_start, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
8405 if ((type = lex_keyword(parser, current_start, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8406 break;
8407 case 6:
8408 if ((type = lex_keyword(parser, current_start, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8409 if ((type = lex_keyword(parser, current_start, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8410 if ((type = lex_keyword(parser, current_start, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
8411 if ((type = lex_keyword(parser, current_start, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8412 if ((type = lex_keyword(parser, current_start, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
8413 break;
8414 case 8:
8415 if ((type = lex_keyword(parser, current_start, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8416 if ((type = lex_keyword(parser, current_start, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8417 break;
8418 case 12:
8419 if ((type = lex_keyword(parser, current_start, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8420 break;
8421 }
8422 }
8423
8424 if (encoding_changed) {
8425 return parser->encoding->isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
8426 }
8427 return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
8428}
8429
8434static bool
8435current_token_starts_line(pm_parser_t *parser) {
8436 return (parser->current.start == parser->start) || (parser->current.start[-1] == '\n');
8437}
8438
8453static pm_token_type_t
8454lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
8455 // If there is no content following this #, then we're at the end of
8456 // the string and we can safely return string content.
8457 if (pound + 1 >= parser->end) {
8458 parser->current.end = pound + 1;
8459 return PM_TOKEN_STRING_CONTENT;
8460 }
8461
8462 // Now we'll check against the character that follows the #. If it constitutes
8463 // valid interplation, we'll handle that, otherwise we'll return
8464 // PM_TOKEN_NOT_PROVIDED.
8465 switch (pound[1]) {
8466 case '@': {
8467 // In this case we may have hit an embedded instance or class variable.
8468 if (pound + 2 >= parser->end) {
8469 parser->current.end = pound + 1;
8470 return PM_TOKEN_STRING_CONTENT;
8471 }
8472
8473 // If we're looking at a @ and there's another @, then we'll skip past the
8474 // second @.
8475 const uint8_t *variable = pound + 2;
8476 if (*variable == '@' && pound + 3 < parser->end) variable++;
8477
8478 if (char_is_identifier_start(parser, variable, parser->end - variable)) {
8479 // At this point we're sure that we've either hit an embedded instance
8480 // or class variable. In this case we'll first need to check if we've
8481 // already consumed content.
8482 if (pound > parser->current.start) {
8483 parser->current.end = pound;
8484 return PM_TOKEN_STRING_CONTENT;
8485 }
8486
8487 // Otherwise we need to return the embedded variable token
8488 // and then switch to the embedded variable lex mode.
8489 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
8490 parser->current.end = pound + 1;
8491 return PM_TOKEN_EMBVAR;
8492 }
8493
8494 // If we didn't get a valid interpolation, then this is just regular
8495 // string content. This is like if we get "#@-". In this case the caller
8496 // should keep lexing.
8497 parser->current.end = pound + 1;
8498 return PM_TOKEN_NOT_PROVIDED;
8499 }
8500 case '$':
8501 // In this case we may have hit an embedded global variable. If there's
8502 // not enough room, then we'll just return string content.
8503 if (pound + 2 >= parser->end) {
8504 parser->current.end = pound + 1;
8505 return PM_TOKEN_STRING_CONTENT;
8506 }
8507
8508 // This is the character that we're going to check to see if it is the
8509 // start of an identifier that would indicate that this is a global
8510 // variable.
8511 const uint8_t *check = pound + 2;
8512
8513 if (pound[2] == '-') {
8514 if (pound + 3 >= parser->end) {
8515 parser->current.end = pound + 2;
8516 return PM_TOKEN_STRING_CONTENT;
8517 }
8518
8519 check++;
8520 }
8521
8522 // If the character that we're going to check is the start of an
8523 // identifier, or we don't have a - and the character is a decimal number
8524 // or a global name punctuation character, then we've hit an embedded
8525 // global variable.
8526 if (
8527 char_is_identifier_start(parser, check, parser->end - check) ||
8528 (pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
8529 ) {
8530 // In this case we've hit an embedded global variable. First check to
8531 // see if we've already consumed content. If we have, then we need to
8532 // return that content as string content first.
8533 if (pound > parser->current.start) {
8534 parser->current.end = pound;
8535 return PM_TOKEN_STRING_CONTENT;
8536 }
8537
8538 // Otherwise, we need to return the embedded variable token and switch
8539 // to the embedded variable lex mode.
8540 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
8541 parser->current.end = pound + 1;
8542 return PM_TOKEN_EMBVAR;
8543 }
8544
8545 // In this case we've hit a #$ that does not indicate a global variable.
8546 // In this case we'll continue lexing past it.
8547 parser->current.end = pound + 1;
8548 return PM_TOKEN_NOT_PROVIDED;
8549 case '{':
8550 // In this case it's the start of an embedded expression. If we have
8551 // already consumed content, then we need to return that content as string
8552 // content first.
8553 if (pound > parser->current.start) {
8554 parser->current.end = pound;
8555 return PM_TOKEN_STRING_CONTENT;
8556 }
8557
8558 parser->enclosure_nesting++;
8559
8560 // Otherwise we'll skip past the #{ and begin lexing the embedded
8561 // expression.
8562 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBEXPR });
8563 parser->current.end = pound + 2;
8564 parser->command_start = true;
8565 pm_do_loop_stack_push(parser, false);
8566 return PM_TOKEN_EMBEXPR_BEGIN;
8567 default:
8568 // In this case we've hit a # that doesn't constitute interpolation. We'll
8569 // mark that by returning the not provided token type. This tells the
8570 // consumer to keep lexing forward.
8571 parser->current.end = pound + 1;
8572 return PM_TOKEN_NOT_PROVIDED;
8573 }
8574}
8575
8576static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
8577static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
8578static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
8579static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
8580static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
8581
8585static const bool ascii_printable_chars[] = {
8586 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
8587 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8588 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8589 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8590 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8591 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
8592 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8593 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
8594};
8595
8596static inline bool
8597char_is_ascii_printable(const uint8_t b) {
8598 return (b < 0x80) && ascii_printable_chars[b];
8599}
8600
8605static inline uint8_t
8606escape_hexadecimal_digit(const uint8_t value) {
8607 return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
8608}
8609
8615static inline uint32_t
8616escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length, const pm_location_t *error_location) {
8617 uint32_t value = 0;
8618 for (size_t index = 0; index < length; index++) {
8619 if (index != 0) value <<= 4;
8620 value |= escape_hexadecimal_digit(string[index]);
8621 }
8622
8623 // Here we're going to verify that the value is actually a valid Unicode
8624 // codepoint and not a surrogate pair.
8625 if (value >= 0xD800 && value <= 0xDFFF) {
8626 if (error_location != NULL) {
8627 pm_parser_err(parser, error_location->start, error_location->end, PM_ERR_ESCAPE_INVALID_UNICODE);
8628 } else {
8629 pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
8630 }
8631 return 0xFFFD;
8632 }
8633
8634 return value;
8635}
8636
8640static inline uint8_t
8641escape_byte(uint8_t value, const uint8_t flags) {
8642 if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
8643 if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
8644 return value;
8645}
8646
8650static inline void
8651escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
8652 // \u escape sequences in string-like structures implicitly change the
8653 // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
8654 // literal.
8655 if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
8656 if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
8657 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
8658 }
8659
8661 }
8662
8663 if (!pm_buffer_append_unicode_codepoint(buffer, value)) {
8664 pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
8665 pm_buffer_append_byte(buffer, 0xEF);
8666 pm_buffer_append_byte(buffer, 0xBF);
8667 pm_buffer_append_byte(buffer, 0xBD);
8668 }
8669}
8670
8675static inline void
8676escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) {
8677 if (byte >= 0x80) {
8678 if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
8679 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
8680 }
8681
8682 parser->explicit_encoding = parser->encoding;
8683 }
8684
8685 pm_buffer_append_byte(buffer, byte);
8686}
8687
8703static inline void
8704escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
8705 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8706 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
8707 }
8708
8709 escape_write_byte_encoded(parser, buffer, byte);
8710}
8711
8715static inline void
8716escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
8717 size_t width;
8718 if (parser->encoding_changed) {
8719 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8720 } else {
8721 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
8722 }
8723
8724 if (width == 1) {
8725 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
8726 } else if (width > 1) {
8727 // Valid multibyte character. Just ignore escape.
8728 pm_buffer_t *b = (flags & PM_ESCAPE_FLAG_REGEXP) ? regular_expression_buffer : buffer;
8729 pm_buffer_append_bytes(b, parser->current.end, width);
8730 parser->current.end += width;
8731 } else {
8732 // Assume the next character wasn't meant to be part of this escape
8733 // sequence since it is invalid. Add an error and move on.
8734 parser->current.end++;
8735 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
8736 }
8737}
8738
8744static void
8745escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *type) {
8746#define FLAG(value) ((value & PM_ESCAPE_FLAG_CONTROL) ? "\\C-" : (value & PM_ESCAPE_FLAG_META) ? "\\M-" : "")
8747
8748 PM_PARSER_WARN_TOKEN_FORMAT(
8749 parser,
8750 parser->current,
8751 PM_WARN_INVALID_CHARACTER,
8752 FLAG(flags),
8753 FLAG(flag),
8754 type
8755 );
8756
8757#undef FLAG
8758}
8759
8763static void
8764escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
8765 uint8_t peeked = peek(parser);
8766 switch (peeked) {
8767 case '\\': {
8768 parser->current.end++;
8769 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
8770 return;
8771 }
8772 case '\'': {
8773 parser->current.end++;
8774 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
8775 return;
8776 }
8777 case 'a': {
8778 parser->current.end++;
8779 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
8780 return;
8781 }
8782 case 'b': {
8783 parser->current.end++;
8784 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
8785 return;
8786 }
8787 case 'e': {
8788 parser->current.end++;
8789 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
8790 return;
8791 }
8792 case 'f': {
8793 parser->current.end++;
8794 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
8795 return;
8796 }
8797 case 'n': {
8798 parser->current.end++;
8799 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
8800 return;
8801 }
8802 case 'r': {
8803 parser->current.end++;
8804 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
8805 return;
8806 }
8807 case 's': {
8808 parser->current.end++;
8809 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
8810 return;
8811 }
8812 case 't': {
8813 parser->current.end++;
8814 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
8815 return;
8816 }
8817 case 'v': {
8818 parser->current.end++;
8819 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
8820 return;
8821 }
8822 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
8823 uint8_t value = (uint8_t) (*parser->current.end - '0');
8824 parser->current.end++;
8825
8826 if (pm_char_is_octal_digit(peek(parser))) {
8827 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
8828 parser->current.end++;
8829
8830 if (pm_char_is_octal_digit(peek(parser))) {
8831 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
8832 parser->current.end++;
8833 }
8834 }
8835
8836 value = escape_byte(value, flags);
8837 escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
8838 return;
8839 }
8840 case 'x': {
8841 const uint8_t *start = parser->current.end - 1;
8842
8843 parser->current.end++;
8844 uint8_t byte = peek(parser);
8845
8846 if (pm_char_is_hexadecimal_digit(byte)) {
8847 uint8_t value = escape_hexadecimal_digit(byte);
8848 parser->current.end++;
8849
8850 byte = peek(parser);
8851 if (pm_char_is_hexadecimal_digit(byte)) {
8852 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
8853 parser->current.end++;
8854 }
8855
8856 value = escape_byte(value, flags);
8857 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8858 if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
8859 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
8860 } else {
8861 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8862 }
8863 }
8864
8865 escape_write_byte_encoded(parser, buffer, value);
8866 } else {
8867 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
8868 }
8869
8870 return;
8871 }
8872 case 'u': {
8873 const uint8_t *start = parser->current.end - 1;
8874 parser->current.end++;
8875
8876 if (parser->current.end == parser->end) {
8877 const uint8_t *start = parser->current.end - 2;
8878 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
8879 } else if (peek(parser) == '{') {
8880 const uint8_t *unicode_codepoints_start = parser->current.end - 2;
8881 parser->current.end++;
8882
8883 size_t whitespace;
8884 while (true) {
8885 if ((whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
8886 parser->current.end += whitespace;
8887 } else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
8888 // This is super hacky, but it gets us nicer error
8889 // messages because we can still pass it off to the
8890 // regular expression engine even if we hit an
8891 // unterminated regular expression.
8892 parser->current.end += 2;
8893 } else {
8894 break;
8895 }
8896 }
8897
8898 const uint8_t *extra_codepoints_start = NULL;
8899 int codepoints_count = 0;
8900
8901 while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
8902 const uint8_t *unicode_start = parser->current.end;
8903 size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
8904
8905 if (hexadecimal_length > 6) {
8906 // \u{nnnn} character literal allows only 1-6 hexadecimal digits
8907 pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
8908 } else if (hexadecimal_length == 0) {
8909 // there are not hexadecimal characters
8910
8911 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8912 // If this is a regular expression, we are going to
8913 // let the regular expression engine handle this
8914 // error instead of us.
8915 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8916 } else {
8917 pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
8918 pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
8919 }
8920
8921 return;
8922 }
8923
8924 parser->current.end += hexadecimal_length;
8925 codepoints_count++;
8926 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
8927 extra_codepoints_start = unicode_start;
8928 }
8929
8930 uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length, NULL);
8931 escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
8932
8933 parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
8934 }
8935
8936 // ?\u{nnnn} character literal should contain only one codepoint
8937 // and cannot be like ?\u{nnnn mmmm}.
8938 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
8939 pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
8940 }
8941
8942 if (parser->current.end == parser->end) {
8943 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
8944 } else if (peek(parser) == '}') {
8945 parser->current.end++;
8946 } else {
8947 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8948 // If this is a regular expression, we are going to let
8949 // the regular expression engine handle this error
8950 // instead of us.
8951 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8952 } else {
8953 pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
8954 }
8955 }
8956
8957 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8958 pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
8959 }
8960 } else {
8961 size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
8962
8963 if (length == 0) {
8964 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8965 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8966 } else {
8967 const uint8_t *start = parser->current.end - 2;
8968 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
8969 }
8970 } else if (length == 4) {
8971 uint32_t value = escape_unicode(parser, parser->current.end, 4, NULL);
8972
8973 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8974 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
8975 }
8976
8977 escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
8978 parser->current.end += 4;
8979 } else {
8980 parser->current.end += length;
8981
8982 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8983 // If this is a regular expression, we are going to let
8984 // the regular expression engine handle this error
8985 // instead of us.
8986 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8987 } else {
8988 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
8989 }
8990 }
8991 }
8992
8993 return;
8994 }
8995 case 'c': {
8996 parser->current.end++;
8997 if (flags & PM_ESCAPE_FLAG_CONTROL) {
8998 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
8999 }
9000
9001 if (parser->current.end == parser->end) {
9002 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9003 return;
9004 }
9005
9006 uint8_t peeked = peek(parser);
9007 switch (peeked) {
9008 case '?': {
9009 parser->current.end++;
9010 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9011 return;
9012 }
9013 case '\\':
9014 parser->current.end++;
9015
9016 if (match(parser, 'u') || match(parser, 'U')) {
9017 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9018 return;
9019 }
9020
9021 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9022 return;
9023 case ' ':
9024 parser->current.end++;
9025 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9026 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9027 return;
9028 case '\t':
9029 parser->current.end++;
9030 escape_read_warn(parser, flags, 0, "\\t");
9031 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9032 return;
9033 default: {
9034 if (!char_is_ascii_printable(peeked)) {
9035 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9036 return;
9037 }
9038
9039 parser->current.end++;
9040 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9041 return;
9042 }
9043 }
9044 }
9045 case 'C': {
9046 parser->current.end++;
9047 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9048 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9049 }
9050
9051 if (peek(parser) != '-') {
9052 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9053 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9054 return;
9055 }
9056
9057 parser->current.end++;
9058 if (parser->current.end == parser->end) {
9059 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9060 return;
9061 }
9062
9063 uint8_t peeked = peek(parser);
9064 switch (peeked) {
9065 case '?': {
9066 parser->current.end++;
9067 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9068 return;
9069 }
9070 case '\\':
9071 parser->current.end++;
9072
9073 if (match(parser, 'u') || match(parser, 'U')) {
9074 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9075 return;
9076 }
9077
9078 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9079 return;
9080 case ' ':
9081 parser->current.end++;
9082 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9083 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9084 return;
9085 case '\t':
9086 parser->current.end++;
9087 escape_read_warn(parser, flags, 0, "\\t");
9088 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9089 return;
9090 default: {
9091 if (!char_is_ascii_printable(peeked)) {
9092 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9093 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9094 return;
9095 }
9096
9097 parser->current.end++;
9098 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9099 return;
9100 }
9101 }
9102 }
9103 case 'M': {
9104 parser->current.end++;
9105 if (flags & PM_ESCAPE_FLAG_META) {
9106 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
9107 }
9108
9109 if (peek(parser) != '-') {
9110 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9111 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
9112 return;
9113 }
9114
9115 parser->current.end++;
9116 if (parser->current.end == parser->end) {
9117 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
9118 return;
9119 }
9120
9121 uint8_t peeked = peek(parser);
9122 switch (peeked) {
9123 case '\\':
9124 parser->current.end++;
9125
9126 if (match(parser, 'u') || match(parser, 'U')) {
9127 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9128 return;
9129 }
9130
9131 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
9132 return;
9133 case ' ':
9134 parser->current.end++;
9135 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_META, "\\s");
9136 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9137 return;
9138 case '\t':
9139 parser->current.end++;
9140 escape_read_warn(parser, flags & ((uint8_t) ~PM_ESCAPE_FLAG_CONTROL), PM_ESCAPE_FLAG_META, "\\t");
9141 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9142 return;
9143 default:
9144 if (!char_is_ascii_printable(peeked)) {
9145 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9146 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
9147 return;
9148 }
9149
9150 parser->current.end++;
9151 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9152 return;
9153 }
9154 }
9155 case '\r': {
9156 if (peek_offset(parser, 1) == '\n') {
9157 parser->current.end += 2;
9158 escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
9159 return;
9160 }
9162 }
9163 default: {
9164 if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
9165 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9166 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
9167 return;
9168 }
9169 if (parser->current.end < parser->end) {
9170 escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags);
9171 } else {
9172 pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
9173 }
9174 return;
9175 }
9176 }
9177}
9178
9204static pm_token_type_t
9205lex_question_mark(pm_parser_t *parser) {
9206 if (lex_state_end_p(parser)) {
9207 lex_state_set(parser, PM_LEX_STATE_BEG);
9208 return PM_TOKEN_QUESTION_MARK;
9209 }
9210
9211 if (parser->current.end >= parser->end) {
9212 pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
9213 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
9214 return PM_TOKEN_CHARACTER_LITERAL;
9215 }
9216
9217 if (pm_char_is_whitespace(*parser->current.end)) {
9218 lex_state_set(parser, PM_LEX_STATE_BEG);
9219 return PM_TOKEN_QUESTION_MARK;
9220 }
9221
9222 lex_state_set(parser, PM_LEX_STATE_BEG);
9223
9224 if (match(parser, '\\')) {
9225 lex_state_set(parser, PM_LEX_STATE_END);
9226
9227 pm_buffer_t buffer;
9228 pm_buffer_init_capacity(&buffer, 3);
9229
9230 escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
9231 pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
9232
9233 return PM_TOKEN_CHARACTER_LITERAL;
9234 } else {
9235 size_t encoding_width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9236
9237 // Ternary operators can have a ? immediately followed by an identifier
9238 // which starts with an underscore. We check for this case here.
9239 if (
9240 !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
9241 (
9242 (parser->current.end + encoding_width >= parser->end) ||
9243 !char_is_identifier(parser, parser->current.end + encoding_width, parser->end - (parser->current.end + encoding_width))
9244 )
9245 ) {
9246 lex_state_set(parser, PM_LEX_STATE_END);
9247 parser->current.end += encoding_width;
9248 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
9249 return PM_TOKEN_CHARACTER_LITERAL;
9250 }
9251 }
9252
9253 return PM_TOKEN_QUESTION_MARK;
9254}
9255
9260static pm_token_type_t
9261lex_at_variable(pm_parser_t *parser) {
9262 pm_token_type_t type = match(parser, '@') ? PM_TOKEN_CLASS_VARIABLE : PM_TOKEN_INSTANCE_VARIABLE;
9263 const uint8_t *end = parser->end;
9264
9265 size_t width;
9266 if ((width = char_is_identifier_start(parser, parser->current.end, end - parser->current.end)) > 0) {
9267 parser->current.end += width;
9268
9269 while ((width = char_is_identifier(parser, parser->current.end, end - parser->current.end)) > 0) {
9270 parser->current.end += width;
9271 }
9272 } else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) {
9273 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
9274 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) {
9275 diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
9276 }
9277
9278 size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end);
9279 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
9280 } else {
9281 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
9282 pm_parser_err_token(parser, &parser->current, diag_id);
9283 }
9284
9285 // If we're lexing an embedded variable, then we need to pop back into the
9286 // parent lex context.
9287 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
9288 lex_mode_pop(parser);
9289 }
9290
9291 return type;
9292}
9293
9297static inline void
9298parser_lex_callback(pm_parser_t *parser) {
9299 if (parser->lex_callback) {
9300 parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current);
9301 }
9302}
9303
9307static inline pm_comment_t *
9308parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
9309 pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t));
9310 if (comment == NULL) return NULL;
9311
9312 *comment = (pm_comment_t) {
9313 .type = type,
9314 .location = { parser->current.start, parser->current.end }
9315 };
9316
9317 return comment;
9318}
9319
9325static pm_token_type_t
9326lex_embdoc(pm_parser_t *parser) {
9327 // First, lex out the EMBDOC_BEGIN token.
9328 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9329
9330 if (newline == NULL) {
9331 parser->current.end = parser->end;
9332 } else {
9333 pm_newline_list_append(&parser->newline_list, newline);
9334 parser->current.end = newline + 1;
9335 }
9336
9337 parser->current.type = PM_TOKEN_EMBDOC_BEGIN;
9338 parser_lex_callback(parser);
9339
9340 // Now, create a comment that is going to be attached to the parser.
9341 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
9342 if (comment == NULL) return PM_TOKEN_EOF;
9343
9344 // Now, loop until we find the end of the embedded documentation or the end
9345 // of the file.
9346 while (parser->current.end + 4 <= parser->end) {
9347 parser->current.start = parser->current.end;
9348
9349 // If we've hit the end of the embedded documentation then we'll return
9350 // that token here.
9351 if (
9352 (memcmp(parser->current.end, "=end", 4) == 0) &&
9353 (
9354 (parser->current.end + 4 == parser->end) || // end of file
9355 pm_char_is_whitespace(parser->current.end[4]) || // whitespace
9356 (parser->current.end[4] == '\0') || // NUL or end of script
9357 (parser->current.end[4] == '\004') || // ^D
9358 (parser->current.end[4] == '\032') // ^Z
9359 )
9360 ) {
9361 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9362
9363 if (newline == NULL) {
9364 parser->current.end = parser->end;
9365 } else {
9366 pm_newline_list_append(&parser->newline_list, newline);
9367 parser->current.end = newline + 1;
9368 }
9369
9370 parser->current.type = PM_TOKEN_EMBDOC_END;
9371 parser_lex_callback(parser);
9372
9373 comment->location.end = parser->current.end;
9374 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9375
9376 return PM_TOKEN_EMBDOC_END;
9377 }
9378
9379 // Otherwise, we'll parse until the end of the line and return a line of
9380 // embedded documentation.
9381 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9382
9383 if (newline == NULL) {
9384 parser->current.end = parser->end;
9385 } else {
9386 pm_newline_list_append(&parser->newline_list, newline);
9387 parser->current.end = newline + 1;
9388 }
9389
9390 parser->current.type = PM_TOKEN_EMBDOC_LINE;
9391 parser_lex_callback(parser);
9392 }
9393
9394 pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
9395
9396 comment->location.end = parser->current.end;
9397 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9398
9399 return PM_TOKEN_EOF;
9400}
9401
9407static inline void
9408parser_lex_ignored_newline(pm_parser_t *parser) {
9409 parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
9410 parser_lex_callback(parser);
9411}
9412
9422static inline void
9423parser_flush_heredoc_end(pm_parser_t *parser) {
9424 assert(parser->heredoc_end <= parser->end);
9425 parser->next_start = parser->heredoc_end;
9426 parser->heredoc_end = NULL;
9427}
9428
9432static bool
9433parser_end_of_line_p(const pm_parser_t *parser) {
9434 const uint8_t *cursor = parser->current.end;
9435
9436 while (cursor < parser->end && *cursor != '\n' && *cursor != '#') {
9437 if (!pm_char_is_inline_whitespace(*cursor++)) return false;
9438 }
9439
9440 return true;
9441}
9442
9461typedef struct {
9467
9472 const uint8_t *cursor;
9474
9494
9498static inline void
9499pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
9500 pm_buffer_append_byte(&token_buffer->buffer, byte);
9501}
9502
9503static inline void
9504pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
9505 pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
9506}
9507
9511static inline size_t
9512parser_char_width(const pm_parser_t *parser) {
9513 size_t width;
9514 if (parser->encoding_changed) {
9515 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9516 } else {
9517 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
9518 }
9519
9520 // TODO: If the character is invalid in the given encoding, then we'll just
9521 // push one byte into the buffer. This should actually be an error.
9522 return (width == 0 ? 1 : width);
9523}
9524
9528static void
9529pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
9530 size_t width = parser_char_width(parser);
9531 pm_buffer_append_bytes(&token_buffer->buffer, parser->current.end, width);
9532 parser->current.end += width;
9533}
9534
9535static void
9536pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
9537 size_t width = parser_char_width(parser);
9538 pm_buffer_append_bytes(&token_buffer->base.buffer, parser->current.end, width);
9539 pm_buffer_append_bytes(&token_buffer->regexp_buffer, parser->current.end, width);
9540 parser->current.end += width;
9541}
9542
9543static bool
9544pm_slice_ascii_only_p(const uint8_t *value, size_t length) {
9545 for (size_t index = 0; index < length; index++) {
9546 if (value[index] & 0x80) return false;
9547 }
9548
9549 return true;
9550}
9551
9558static inline void
9559pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9560 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->buffer), pm_buffer_length(&token_buffer->buffer));
9561}
9562
9563static inline void
9564pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9565 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->base.buffer), pm_buffer_length(&token_buffer->base.buffer));
9566 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p((const uint8_t *) pm_buffer_value(&token_buffer->regexp_buffer), pm_buffer_length(&token_buffer->regexp_buffer));
9567 pm_buffer_free(&token_buffer->regexp_buffer);
9568}
9569
9579static void
9580pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9581 if (token_buffer->cursor == NULL) {
9582 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
9583 } else {
9584 pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
9585 pm_token_buffer_copy(parser, token_buffer);
9586 }
9587}
9588
9589static void
9590pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9591 if (token_buffer->base.cursor == NULL) {
9592 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
9593 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p(parser->current.start, (size_t) (parser->current.end - parser->current.start));
9594 } else {
9595 pm_buffer_append_bytes(&token_buffer->base.buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
9596 pm_buffer_append_bytes(&token_buffer->regexp_buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
9597 pm_regexp_token_buffer_copy(parser, token_buffer);
9598 }
9599}
9600
9601#define PM_TOKEN_BUFFER_DEFAULT_SIZE 16
9602
9611static void
9612pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9613 const uint8_t *start;
9614 if (token_buffer->cursor == NULL) {
9615 pm_buffer_init_capacity(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9616 start = parser->current.start;
9617 } else {
9618 start = token_buffer->cursor;
9619 }
9620
9621 const uint8_t *end = parser->current.end - 1;
9622 assert(end >= start);
9623 pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
9624
9625 token_buffer->cursor = end;
9626}
9627
9628static void
9629pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9630 const uint8_t *start;
9631 if (token_buffer->base.cursor == NULL) {
9632 pm_buffer_init_capacity(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9633 pm_buffer_init_capacity(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9634 start = parser->current.start;
9635 } else {
9636 start = token_buffer->base.cursor;
9637 }
9638
9639 const uint8_t *end = parser->current.end - 1;
9640 pm_buffer_append_bytes(&token_buffer->base.buffer, start, (size_t) (end - start));
9641 pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, (size_t) (end - start));
9642
9643 token_buffer->base.cursor = end;
9644}
9645
9646#undef PM_TOKEN_BUFFER_DEFAULT_SIZE
9647
9652static inline size_t
9653pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
9654 size_t whitespace = 0;
9655
9656 switch (indent) {
9657 case PM_HEREDOC_INDENT_NONE:
9658 // Do nothing, we can't match a terminator with
9659 // indentation and there's no need to calculate common
9660 // whitespace.
9661 break;
9662 case PM_HEREDOC_INDENT_DASH:
9663 // Skip past inline whitespace.
9664 *cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
9665 break;
9666 case PM_HEREDOC_INDENT_TILDE:
9667 // Skip past inline whitespace and calculate common
9668 // whitespace.
9669 while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
9670 if (**cursor == '\t') {
9671 whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
9672 } else {
9673 whitespace++;
9674 }
9675 (*cursor)++;
9676 }
9677
9678 break;
9679 }
9680
9681 return whitespace;
9682}
9683
9688static uint8_t
9689pm_lex_percent_delimiter(pm_parser_t *parser) {
9690 size_t eol_length = match_eol(parser);
9691
9692 if (eol_length) {
9693 if (parser->heredoc_end) {
9694 // If we have already lexed a heredoc, then the newline has already
9695 // been added to the list. In this case we want to just flush the
9696 // heredoc end.
9697 parser_flush_heredoc_end(parser);
9698 } else {
9699 // Otherwise, we'll add the newline to the list of newlines.
9700 pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
9701 }
9702
9703 uint8_t delimiter = *parser->current.end;
9704
9705 // If our delimiter is \r\n, we want to treat it as if it's \n.
9706 // For example, %\r\nfoo\r\n should be "foo"
9707 if (eol_length == 2) {
9708 delimiter = *(parser->current.end + 1);
9709 }
9710
9711 parser->current.end += eol_length;
9712 return delimiter;
9713 }
9714
9715 return *parser->current.end++;
9716}
9717
9722#define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
9723
9730static void
9731parser_lex(pm_parser_t *parser) {
9732 assert(parser->current.end <= parser->end);
9733 parser->previous = parser->current;
9734
9735 // This value mirrors cmd_state from CRuby.
9736 bool previous_command_start = parser->command_start;
9737 parser->command_start = false;
9738
9739 // This is used to communicate to the newline lexing function that we've
9740 // already seen a comment.
9741 bool lexed_comment = false;
9742
9743 // Here we cache the current value of the semantic token seen flag. This is
9744 // used to reset it in case we find a token that shouldn't flip this flag.
9745 unsigned int semantic_token_seen = parser->semantic_token_seen;
9746 parser->semantic_token_seen = true;
9747
9748 switch (parser->lex_modes.current->mode) {
9749 case PM_LEX_DEFAULT:
9750 case PM_LEX_EMBEXPR:
9751 case PM_LEX_EMBVAR:
9752
9753 // We have a specific named label here because we are going to jump back to
9754 // this location in the event that we have lexed a token that should not be
9755 // returned to the parser. This includes comments, ignored newlines, and
9756 // invalid tokens of some form.
9757 lex_next_token: {
9758 // If we have the special next_start pointer set, then we're going to jump
9759 // to that location and start lexing from there.
9760 if (parser->next_start != NULL) {
9761 parser->current.end = parser->next_start;
9762 parser->next_start = NULL;
9763 }
9764
9765 // This value mirrors space_seen from CRuby. It tracks whether or not
9766 // space has been eaten before the start of the next token.
9767 bool space_seen = false;
9768
9769 // First, we're going to skip past any whitespace at the front of the next
9770 // token.
9771 bool chomping = true;
9772 while (parser->current.end < parser->end && chomping) {
9773 switch (*parser->current.end) {
9774 case ' ':
9775 case '\t':
9776 case '\f':
9777 case '\v':
9778 parser->current.end++;
9779 space_seen = true;
9780 break;
9781 case '\r':
9782 if (match_eol_offset(parser, 1)) {
9783 chomping = false;
9784 } else {
9785 pm_parser_warn(parser, parser->current.end, parser->current.end + 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
9786 parser->current.end++;
9787 space_seen = true;
9788 }
9789 break;
9790 case '\\': {
9791 size_t eol_length = match_eol_offset(parser, 1);
9792 if (eol_length) {
9793 if (parser->heredoc_end) {
9794 parser->current.end = parser->heredoc_end;
9795 parser->heredoc_end = NULL;
9796 } else {
9797 parser->current.end += eol_length + 1;
9798 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
9799 space_seen = true;
9800 }
9801 } else if (pm_char_is_inline_whitespace(*parser->current.end)) {
9802 parser->current.end += 2;
9803 } else {
9804 chomping = false;
9805 }
9806
9807 break;
9808 }
9809 default:
9810 chomping = false;
9811 break;
9812 }
9813 }
9814
9815 // Next, we'll set to start of this token to be the current end.
9816 parser->current.start = parser->current.end;
9817
9818 // We'll check if we're at the end of the file. If we are, then we
9819 // need to return the EOF token.
9820 if (parser->current.end >= parser->end) {
9821 // If we hit EOF, but the EOF came immediately after a newline,
9822 // set the start of the token to the newline. This way any EOF
9823 // errors will be reported as happening on that line rather than
9824 // a line after. For example "foo(\n" should report an error
9825 // on line 1 even though EOF technically occurs on line 2.
9826 if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
9827 parser->current.start -= 1;
9828 }
9829 LEX(PM_TOKEN_EOF);
9830 }
9831
9832 // Finally, we'll check the current character to determine the next
9833 // token.
9834 switch (*parser->current.end++) {
9835 case '\0': // NUL or end of script
9836 case '\004': // ^D
9837 case '\032': // ^Z
9838 parser->current.end--;
9839 LEX(PM_TOKEN_EOF);
9840
9841 case '#': { // comments
9842 const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
9843 parser->current.end = ending == NULL ? parser->end : ending;
9844
9845 // If we found a comment while lexing, then we're going to
9846 // add it to the list of comments in the file and keep
9847 // lexing.
9848 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_INLINE);
9849 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9850
9851 if (ending) parser->current.end++;
9852 parser->current.type = PM_TOKEN_COMMENT;
9853 parser_lex_callback(parser);
9854
9855 // Here, parse the comment to see if it's a magic comment
9856 // and potentially change state on the parser.
9857 if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
9858 ptrdiff_t length = parser->current.end - parser->current.start;
9859
9860 // If we didn't find a magic comment within the first
9861 // pass and we're at the start of the file, then we need
9862 // to do another pass to potentially find other patterns
9863 // for encoding comments.
9864 if (length >= 10 && !parser->encoding_locked) {
9865 parser_lex_magic_comment_encoding(parser);
9866 }
9867 }
9868
9869 lexed_comment = true;
9870 }
9872 case '\r':
9873 case '\n': {
9874 parser->semantic_token_seen = semantic_token_seen & 0x1;
9875 size_t eol_length = match_eol_at(parser, parser->current.end - 1);
9876
9877 if (eol_length) {
9878 // The only way you can have carriage returns in this
9879 // particular loop is if you have a carriage return
9880 // followed by a newline. In that case we'll just skip
9881 // over the carriage return and continue lexing, in
9882 // order to make it so that the newline token
9883 // encapsulates both the carriage return and the
9884 // newline. Note that we need to check that we haven't
9885 // already lexed a comment here because that falls
9886 // through into here as well.
9887 if (!lexed_comment) {
9888 parser->current.end += eol_length - 1; // skip CR
9889 }
9890
9891 if (parser->heredoc_end == NULL) {
9892 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
9893 }
9894 }
9895
9896 if (parser->heredoc_end) {
9897 parser_flush_heredoc_end(parser);
9898 }
9899
9900 // If this is an ignored newline, then we can continue lexing after
9901 // calling the callback with the ignored newline token.
9902 switch (lex_state_ignored_p(parser)) {
9903 case PM_IGNORED_NEWLINE_NONE:
9904 break;
9905 case PM_IGNORED_NEWLINE_PATTERN:
9906 if (parser->pattern_matching_newlines || parser->in_keyword_arg) {
9907 if (!lexed_comment) parser_lex_ignored_newline(parser);
9908 lex_state_set(parser, PM_LEX_STATE_BEG);
9909 parser->command_start = true;
9910 parser->current.type = PM_TOKEN_NEWLINE;
9911 return;
9912 }
9914 case PM_IGNORED_NEWLINE_ALL:
9915 if (!lexed_comment) parser_lex_ignored_newline(parser);
9916 lexed_comment = false;
9917 goto lex_next_token;
9918 }
9919
9920 // Here we need to look ahead and see if there is a call operator
9921 // (either . or &.) that starts the next line. If there is, then this
9922 // is going to become an ignored newline and we're going to instead
9923 // return the call operator.
9924 const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
9925 next_content += pm_strspn_inline_whitespace(next_content, parser->end - next_content);
9926
9927 if (next_content < parser->end) {
9928 // If we hit a comment after a newline, then we're going to check
9929 // if it's ignored or if it's followed by a method call ('.').
9930 // If it is, then we're going to call the
9931 // callback with an ignored newline and then continue lexing.
9932 // Otherwise we'll return a regular newline.
9933 if (next_content[0] == '#') {
9934 // Here we look for a "." or "&." following a "\n".
9935 const uint8_t *following = next_newline(next_content, parser->end - next_content);
9936
9937 while (following && (following + 1 < parser->end)) {
9938 following++;
9939 following += pm_strspn_inline_whitespace(following, parser->end - following);
9940
9941 // If this is not followed by a comment, then we can break out
9942 // of this loop.
9943 if (peek_at(parser, following) != '#') break;
9944
9945 // If there is a comment, then we need to find the end of the
9946 // comment and continue searching from there.
9947 following = next_newline(following, parser->end - following);
9948 }
9949
9950 // If the lex state was ignored, we will lex the
9951 // ignored newline.
9952 if (lex_state_ignored_p(parser)) {
9953 if (!lexed_comment) parser_lex_ignored_newline(parser);
9954 lexed_comment = false;
9955 goto lex_next_token;
9956 }
9957
9958 // If we hit a '.' or a '&.' we will lex the ignored
9959 // newline.
9960 if (following && (
9961 (peek_at(parser, following) == '.') ||
9962 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
9963 )) {
9964 if (!lexed_comment) parser_lex_ignored_newline(parser);
9965 lexed_comment = false;
9966 goto lex_next_token;
9967 }
9968
9969
9970 // If we are parsing as CRuby 4.0 or later and we
9971 // hit a '&&' or a '||' then we will lex the ignored
9972 // newline.
9973 if (
9975 following && (
9976 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '&') ||
9977 (peek_at(parser, following) == '|' && peek_at(parser, following + 1) == '|') ||
9978 (peek_at(parser, following) == 'a' && peek_at(parser, following + 1) == 'n' && peek_at(parser, following + 2) == 'd' && !char_is_identifier(parser, following + 3, parser->end - (following + 3))) ||
9979 (peek_at(parser, following) == 'o' && peek_at(parser, following + 1) == 'r' && !char_is_identifier(parser, following + 2, parser->end - (following + 2)))
9980 )
9981 ) {
9982 if (!lexed_comment) parser_lex_ignored_newline(parser);
9983 lexed_comment = false;
9984 goto lex_next_token;
9985 }
9986 }
9987
9988 // If we hit a . after a newline, then we're in a call chain and
9989 // we need to return the call operator.
9990 if (next_content[0] == '.') {
9991 // To match ripper, we need to emit an ignored newline even though
9992 // it's a real newline in the case that we have a beginless range
9993 // on a subsequent line.
9994 if (peek_at(parser, next_content + 1) == '.') {
9995 if (!lexed_comment) parser_lex_ignored_newline(parser);
9996 lex_state_set(parser, PM_LEX_STATE_BEG);
9997 parser->command_start = true;
9998 parser->current.type = PM_TOKEN_NEWLINE;
9999 return;
10000 }
10001
10002 if (!lexed_comment) parser_lex_ignored_newline(parser);
10003 lex_state_set(parser, PM_LEX_STATE_DOT);
10004 parser->current.start = next_content;
10005 parser->current.end = next_content + 1;
10006 parser->next_start = NULL;
10007 LEX(PM_TOKEN_DOT);
10008 }
10009
10010 // If we hit a &. after a newline, then we're in a call chain and
10011 // we need to return the call operator.
10012 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
10013 if (!lexed_comment) parser_lex_ignored_newline(parser);
10014 lex_state_set(parser, PM_LEX_STATE_DOT);
10015 parser->current.start = next_content;
10016 parser->current.end = next_content + 2;
10017 parser->next_start = NULL;
10018 LEX(PM_TOKEN_AMPERSAND_DOT);
10019 }
10020
10021 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
10022 // If we hit an && then we are in a logical chain
10023 // and we need to return the logical operator.
10024 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '&') {
10025 if (!lexed_comment) parser_lex_ignored_newline(parser);
10026 lex_state_set(parser, PM_LEX_STATE_BEG);
10027 parser->current.start = next_content;
10028 parser->current.end = next_content + 2;
10029 parser->next_start = NULL;
10030 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
10031 }
10032
10033 // If we hit a || then we are in a logical chain and
10034 // we need to return the logical operator.
10035 if (peek_at(parser, next_content) == '|' && peek_at(parser, next_content + 1) == '|') {
10036 if (!lexed_comment) parser_lex_ignored_newline(parser);
10037 lex_state_set(parser, PM_LEX_STATE_BEG);
10038 parser->current.start = next_content;
10039 parser->current.end = next_content + 2;
10040 parser->next_start = NULL;
10041 LEX(PM_TOKEN_PIPE_PIPE);
10042 }
10043
10044 // If we hit an 'and' then we are in a logical chain
10045 // and we need to return the logical operator.
10046 if (
10047 peek_at(parser, next_content) == 'a' &&
10048 peek_at(parser, next_content + 1) == 'n' &&
10049 peek_at(parser, next_content + 2) == 'd' &&
10050 !char_is_identifier(parser, next_content + 3, parser->end - (next_content + 3))
10051 ) {
10052 if (!lexed_comment) parser_lex_ignored_newline(parser);
10053 lex_state_set(parser, PM_LEX_STATE_BEG);
10054 parser->current.start = next_content;
10055 parser->current.end = next_content + 3;
10056 parser->next_start = NULL;
10057 parser->command_start = true;
10058 LEX(PM_TOKEN_KEYWORD_AND);
10059 }
10060
10061 // If we hit a 'or' then we are in a logical chain
10062 // and we need to return the logical operator.
10063 if (
10064 peek_at(parser, next_content) == 'o' &&
10065 peek_at(parser, next_content + 1) == 'r' &&
10066 !char_is_identifier(parser, next_content + 2, parser->end - (next_content + 2))
10067 ) {
10068 if (!lexed_comment) parser_lex_ignored_newline(parser);
10069 lex_state_set(parser, PM_LEX_STATE_BEG);
10070 parser->current.start = next_content;
10071 parser->current.end = next_content + 2;
10072 parser->next_start = NULL;
10073 parser->command_start = true;
10074 LEX(PM_TOKEN_KEYWORD_OR);
10075 }
10076 }
10077 }
10078
10079 // At this point we know this is a regular newline, and we can set the
10080 // necessary state and return the token.
10081 lex_state_set(parser, PM_LEX_STATE_BEG);
10082 parser->command_start = true;
10083 parser->current.type = PM_TOKEN_NEWLINE;
10084 if (!lexed_comment) parser_lex_callback(parser);
10085 return;
10086 }
10087
10088 // ,
10089 case ',':
10090 if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
10091 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
10092 }
10093
10094 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10095 LEX(PM_TOKEN_COMMA);
10096
10097 // (
10098 case '(': {
10099 pm_token_type_t type = PM_TOKEN_PARENTHESIS_LEFT;
10100
10101 if (space_seen && (lex_state_arg_p(parser) || parser->lex_state == (PM_LEX_STATE_END | PM_LEX_STATE_LABEL))) {
10102 type = PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES;
10103 }
10104
10105 parser->enclosure_nesting++;
10106 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10107 pm_do_loop_stack_push(parser, false);
10108 LEX(type);
10109 }
10110
10111 // )
10112 case ')':
10113 parser->enclosure_nesting--;
10114 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10115 pm_do_loop_stack_pop(parser);
10116 LEX(PM_TOKEN_PARENTHESIS_RIGHT);
10117
10118 // ;
10119 case ';':
10120 lex_state_set(parser, PM_LEX_STATE_BEG);
10121 parser->command_start = true;
10122 LEX(PM_TOKEN_SEMICOLON);
10123
10124 // [ [] []=
10125 case '[':
10126 parser->enclosure_nesting++;
10127 pm_token_type_t type = PM_TOKEN_BRACKET_LEFT;
10128
10129 if (lex_state_operator_p(parser)) {
10130 if (match(parser, ']')) {
10131 parser->enclosure_nesting--;
10132 lex_state_set(parser, PM_LEX_STATE_ARG);
10133 LEX(match(parser, '=') ? PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL : PM_TOKEN_BRACKET_LEFT_RIGHT);
10134 }
10135
10136 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABEL);
10137 LEX(type);
10138 }
10139
10140 if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, PM_LEX_STATE_LABELED)))) {
10141 type = PM_TOKEN_BRACKET_LEFT_ARRAY;
10142 }
10143
10144 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10145 pm_do_loop_stack_push(parser, false);
10146 LEX(type);
10147
10148 // ]
10149 case ']':
10150 parser->enclosure_nesting--;
10151 lex_state_set(parser, PM_LEX_STATE_END);
10152 pm_do_loop_stack_pop(parser);
10153 LEX(PM_TOKEN_BRACKET_RIGHT);
10154
10155 // {
10156 case '{': {
10157 pm_token_type_t type = PM_TOKEN_BRACE_LEFT;
10158
10159 if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
10160 // This { begins a lambda
10161 parser->command_start = true;
10162 lex_state_set(parser, PM_LEX_STATE_BEG);
10163 type = PM_TOKEN_LAMBDA_BEGIN;
10164 } else if (lex_state_p(parser, PM_LEX_STATE_LABELED)) {
10165 // This { begins a hash literal
10166 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10167 } else if (lex_state_p(parser, PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_END | PM_LEX_STATE_ENDFN)) {
10168 // This { begins a block
10169 parser->command_start = true;
10170 lex_state_set(parser, PM_LEX_STATE_BEG);
10171 } else if (lex_state_p(parser, PM_LEX_STATE_ENDARG)) {
10172 // This { begins a block on a command
10173 parser->command_start = true;
10174 lex_state_set(parser, PM_LEX_STATE_BEG);
10175 } else {
10176 // This { begins a hash literal
10177 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10178 }
10179
10180 parser->enclosure_nesting++;
10181 parser->brace_nesting++;
10182 pm_do_loop_stack_push(parser, false);
10183
10184 LEX(type);
10185 }
10186
10187 // }
10188 case '}':
10189 parser->enclosure_nesting--;
10190 pm_do_loop_stack_pop(parser);
10191
10192 if ((parser->lex_modes.current->mode == PM_LEX_EMBEXPR) && (parser->brace_nesting == 0)) {
10193 lex_mode_pop(parser);
10194 LEX(PM_TOKEN_EMBEXPR_END);
10195 }
10196
10197 parser->brace_nesting--;
10198 lex_state_set(parser, PM_LEX_STATE_END);
10199 LEX(PM_TOKEN_BRACE_RIGHT);
10200
10201 // * ** **= *=
10202 case '*': {
10203 if (match(parser, '*')) {
10204 if (match(parser, '=')) {
10205 lex_state_set(parser, PM_LEX_STATE_BEG);
10206 LEX(PM_TOKEN_STAR_STAR_EQUAL);
10207 }
10208
10209 pm_token_type_t type = PM_TOKEN_STAR_STAR;
10210
10211 if (lex_state_spcarg_p(parser, space_seen)) {
10212 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR);
10213 type = PM_TOKEN_USTAR_STAR;
10214 } else if (lex_state_beg_p(parser)) {
10215 type = PM_TOKEN_USTAR_STAR;
10216 } else if (ambiguous_operator_p(parser, space_seen)) {
10217 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
10218 }
10219
10220 if (lex_state_operator_p(parser)) {
10221 lex_state_set(parser, PM_LEX_STATE_ARG);
10222 } else {
10223 lex_state_set(parser, PM_LEX_STATE_BEG);
10224 }
10225
10226 LEX(type);
10227 }
10228
10229 if (match(parser, '=')) {
10230 lex_state_set(parser, PM_LEX_STATE_BEG);
10231 LEX(PM_TOKEN_STAR_EQUAL);
10232 }
10233
10234 pm_token_type_t type = PM_TOKEN_STAR;
10235
10236 if (lex_state_spcarg_p(parser, space_seen)) {
10237 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
10238 type = PM_TOKEN_USTAR;
10239 } else if (lex_state_beg_p(parser)) {
10240 type = PM_TOKEN_USTAR;
10241 } else if (ambiguous_operator_p(parser, space_seen)) {
10242 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
10243 }
10244
10245 if (lex_state_operator_p(parser)) {
10246 lex_state_set(parser, PM_LEX_STATE_ARG);
10247 } else {
10248 lex_state_set(parser, PM_LEX_STATE_BEG);
10249 }
10250
10251 LEX(type);
10252 }
10253
10254 // ! != !~ !@
10255 case '!':
10256 if (lex_state_operator_p(parser)) {
10257 lex_state_set(parser, PM_LEX_STATE_ARG);
10258 if (match(parser, '@')) {
10259 LEX(PM_TOKEN_BANG);
10260 }
10261 } else {
10262 lex_state_set(parser, PM_LEX_STATE_BEG);
10263 }
10264
10265 if (match(parser, '=')) {
10266 LEX(PM_TOKEN_BANG_EQUAL);
10267 }
10268
10269 if (match(parser, '~')) {
10270 LEX(PM_TOKEN_BANG_TILDE);
10271 }
10272
10273 LEX(PM_TOKEN_BANG);
10274
10275 // = => =~ == === =begin
10276 case '=':
10277 if (
10278 current_token_starts_line(parser) &&
10279 (parser->current.end + 5 <= parser->end) &&
10280 memcmp(parser->current.end, "begin", 5) == 0 &&
10281 (pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) == '\0'))
10282 ) {
10283 pm_token_type_t type = lex_embdoc(parser);
10284 if (type == PM_TOKEN_EOF) {
10285 LEX(type);
10286 }
10287
10288 goto lex_next_token;
10289 }
10290
10291 if (lex_state_operator_p(parser)) {
10292 lex_state_set(parser, PM_LEX_STATE_ARG);
10293 } else {
10294 lex_state_set(parser, PM_LEX_STATE_BEG);
10295 }
10296
10297 if (match(parser, '>')) {
10298 LEX(PM_TOKEN_EQUAL_GREATER);
10299 }
10300
10301 if (match(parser, '~')) {
10302 LEX(PM_TOKEN_EQUAL_TILDE);
10303 }
10304
10305 if (match(parser, '=')) {
10306 LEX(match(parser, '=') ? PM_TOKEN_EQUAL_EQUAL_EQUAL : PM_TOKEN_EQUAL_EQUAL);
10307 }
10308
10309 LEX(PM_TOKEN_EQUAL);
10310
10311 // < << <<= <= <=>
10312 case '<':
10313 if (match(parser, '<')) {
10314 if (
10315 !lex_state_p(parser, PM_LEX_STATE_DOT | PM_LEX_STATE_CLASS) &&
10316 !lex_state_end_p(parser) &&
10317 (!lex_state_p(parser, PM_LEX_STATE_ARG_ANY) || lex_state_p(parser, PM_LEX_STATE_LABELED) || space_seen)
10318 ) {
10319 const uint8_t *end = parser->current.end;
10320
10321 pm_heredoc_quote_t quote = PM_HEREDOC_QUOTE_NONE;
10322 pm_heredoc_indent_t indent = PM_HEREDOC_INDENT_NONE;
10323
10324 if (match(parser, '-')) {
10325 indent = PM_HEREDOC_INDENT_DASH;
10326 }
10327 else if (match(parser, '~')) {
10328 indent = PM_HEREDOC_INDENT_TILDE;
10329 }
10330
10331 if (match(parser, '`')) {
10332 quote = PM_HEREDOC_QUOTE_BACKTICK;
10333 }
10334 else if (match(parser, '"')) {
10335 quote = PM_HEREDOC_QUOTE_DOUBLE;
10336 }
10337 else if (match(parser, '\'')) {
10338 quote = PM_HEREDOC_QUOTE_SINGLE;
10339 }
10340
10341 const uint8_t *ident_start = parser->current.end;
10342 size_t width = 0;
10343
10344 if (parser->current.end >= parser->end) {
10345 parser->current.end = end;
10346 } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) == 0) {
10347 parser->current.end = end;
10348 } else {
10349 if (quote == PM_HEREDOC_QUOTE_NONE) {
10350 parser->current.end += width;
10351
10352 while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end))) {
10353 parser->current.end += width;
10354 }
10355 } else {
10356 // If we have quotes, then we're going to go until we find the
10357 // end quote.
10358 while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
10359 if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
10360 parser->current.end++;
10361 }
10362 }
10363
10364 size_t ident_length = (size_t) (parser->current.end - ident_start);
10365 bool ident_error = false;
10366
10367 if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
10368 pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
10369 ident_error = true;
10370 }
10371
10372 parser->explicit_encoding = NULL;
10373 lex_mode_push(parser, (pm_lex_mode_t) {
10374 .mode = PM_LEX_HEREDOC,
10375 .as.heredoc = {
10376 .base = {
10377 .ident_start = ident_start,
10378 .ident_length = ident_length,
10379 .quote = quote,
10380 .indent = indent
10381 },
10382 .next_start = parser->current.end,
10383 .common_whitespace = NULL,
10384 .line_continuation = false
10385 }
10386 });
10387
10388 if (parser->heredoc_end == NULL) {
10389 const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
10390
10391 if (body_start == NULL) {
10392 // If there is no newline after the heredoc identifier, then
10393 // this is not a valid heredoc declaration. In this case we
10394 // will add an error, but we will still return a heredoc
10395 // start.
10396 if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
10397 body_start = parser->end;
10398 } else {
10399 // Otherwise, we want to indicate that the body of the
10400 // heredoc starts on the character after the next newline.
10401 pm_newline_list_append(&parser->newline_list, body_start);
10402 body_start++;
10403 }
10404
10405 parser->next_start = body_start;
10406 } else {
10407 parser->next_start = parser->heredoc_end;
10408 }
10409
10410 LEX(PM_TOKEN_HEREDOC_START);
10411 }
10412 }
10413
10414 if (match(parser, '=')) {
10415 lex_state_set(parser, PM_LEX_STATE_BEG);
10416 LEX(PM_TOKEN_LESS_LESS_EQUAL);
10417 }
10418
10419 if (ambiguous_operator_p(parser, space_seen)) {
10420 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
10421 }
10422
10423 if (lex_state_operator_p(parser)) {
10424 lex_state_set(parser, PM_LEX_STATE_ARG);
10425 } else {
10426 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
10427 lex_state_set(parser, PM_LEX_STATE_BEG);
10428 }
10429
10430 LEX(PM_TOKEN_LESS_LESS);
10431 }
10432
10433 if (lex_state_operator_p(parser)) {
10434 lex_state_set(parser, PM_LEX_STATE_ARG);
10435 } else {
10436 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
10437 lex_state_set(parser, PM_LEX_STATE_BEG);
10438 }
10439
10440 if (match(parser, '=')) {
10441 if (match(parser, '>')) {
10442 LEX(PM_TOKEN_LESS_EQUAL_GREATER);
10443 }
10444
10445 LEX(PM_TOKEN_LESS_EQUAL);
10446 }
10447
10448 LEX(PM_TOKEN_LESS);
10449
10450 // > >> >>= >=
10451 case '>':
10452 if (match(parser, '>')) {
10453 if (lex_state_operator_p(parser)) {
10454 lex_state_set(parser, PM_LEX_STATE_ARG);
10455 } else {
10456 lex_state_set(parser, PM_LEX_STATE_BEG);
10457 }
10458 LEX(match(parser, '=') ? PM_TOKEN_GREATER_GREATER_EQUAL : PM_TOKEN_GREATER_GREATER);
10459 }
10460
10461 if (lex_state_operator_p(parser)) {
10462 lex_state_set(parser, PM_LEX_STATE_ARG);
10463 } else {
10464 lex_state_set(parser, PM_LEX_STATE_BEG);
10465 }
10466
10467 LEX(match(parser, '=') ? PM_TOKEN_GREATER_EQUAL : PM_TOKEN_GREATER);
10468
10469 // double-quoted string literal
10470 case '"': {
10471 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
10472 lex_mode_push_string(parser, true, label_allowed, '\0', '"');
10473 LEX(PM_TOKEN_STRING_BEGIN);
10474 }
10475
10476 // xstring literal
10477 case '`': {
10478 if (lex_state_p(parser, PM_LEX_STATE_FNAME)) {
10479 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10480 LEX(PM_TOKEN_BACKTICK);
10481 }
10482
10483 if (lex_state_p(parser, PM_LEX_STATE_DOT)) {
10484 if (previous_command_start) {
10485 lex_state_set(parser, PM_LEX_STATE_CMDARG);
10486 } else {
10487 lex_state_set(parser, PM_LEX_STATE_ARG);
10488 }
10489
10490 LEX(PM_TOKEN_BACKTICK);
10491 }
10492
10493 lex_mode_push_string(parser, true, false, '\0', '`');
10494 LEX(PM_TOKEN_BACKTICK);
10495 }
10496
10497 // single-quoted string literal
10498 case '\'': {
10499 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
10500 lex_mode_push_string(parser, false, label_allowed, '\0', '\'');
10501 LEX(PM_TOKEN_STRING_BEGIN);
10502 }
10503
10504 // ? character literal
10505 case '?':
10506 LEX(lex_question_mark(parser));
10507
10508 // & && &&= &=
10509 case '&': {
10510 if (match(parser, '&')) {
10511 lex_state_set(parser, PM_LEX_STATE_BEG);
10512
10513 if (match(parser, '=')) {
10514 LEX(PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
10515 }
10516
10517 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
10518 }
10519
10520 if (match(parser, '=')) {
10521 lex_state_set(parser, PM_LEX_STATE_BEG);
10522 LEX(PM_TOKEN_AMPERSAND_EQUAL);
10523 }
10524
10525 if (match(parser, '.')) {
10526 lex_state_set(parser, PM_LEX_STATE_DOT);
10527 LEX(PM_TOKEN_AMPERSAND_DOT);
10528 }
10529
10530 pm_token_type_t type = PM_TOKEN_AMPERSAND;
10531 if (lex_state_spcarg_p(parser, space_seen)) {
10532 if ((peek(parser) != ':') || (peek_offset(parser, 1) == '\0')) {
10533 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
10534 } else {
10535 const uint8_t delim = peek_offset(parser, 1);
10536
10537 if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1, parser->end - (parser->current.end + 1))) {
10538 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
10539 }
10540 }
10541
10542 type = PM_TOKEN_UAMPERSAND;
10543 } else if (lex_state_beg_p(parser)) {
10544 type = PM_TOKEN_UAMPERSAND;
10545 } else if (ambiguous_operator_p(parser, space_seen)) {
10546 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
10547 }
10548
10549 if (lex_state_operator_p(parser)) {
10550 lex_state_set(parser, PM_LEX_STATE_ARG);
10551 } else {
10552 lex_state_set(parser, PM_LEX_STATE_BEG);
10553 }
10554
10555 LEX(type);
10556 }
10557
10558 // | || ||= |=
10559 case '|':
10560 if (match(parser, '|')) {
10561 if (match(parser, '=')) {
10562 lex_state_set(parser, PM_LEX_STATE_BEG);
10563 LEX(PM_TOKEN_PIPE_PIPE_EQUAL);
10564 }
10565
10566 if (lex_state_p(parser, PM_LEX_STATE_BEG)) {
10567 parser->current.end--;
10568 LEX(PM_TOKEN_PIPE);
10569 }
10570
10571 lex_state_set(parser, PM_LEX_STATE_BEG);
10572 LEX(PM_TOKEN_PIPE_PIPE);
10573 }
10574
10575 if (match(parser, '=')) {
10576 lex_state_set(parser, PM_LEX_STATE_BEG);
10577 LEX(PM_TOKEN_PIPE_EQUAL);
10578 }
10579
10580 if (lex_state_operator_p(parser)) {
10581 lex_state_set(parser, PM_LEX_STATE_ARG);
10582 } else {
10583 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10584 }
10585
10586 LEX(PM_TOKEN_PIPE);
10587
10588 // + += +@
10589 case '+': {
10590 if (lex_state_operator_p(parser)) {
10591 lex_state_set(parser, PM_LEX_STATE_ARG);
10592
10593 if (match(parser, '@')) {
10594 LEX(PM_TOKEN_UPLUS);
10595 }
10596
10597 LEX(PM_TOKEN_PLUS);
10598 }
10599
10600 if (match(parser, '=')) {
10601 lex_state_set(parser, PM_LEX_STATE_BEG);
10602 LEX(PM_TOKEN_PLUS_EQUAL);
10603 }
10604
10605 if (
10606 lex_state_beg_p(parser) ||
10607 (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
10608 ) {
10609 lex_state_set(parser, PM_LEX_STATE_BEG);
10610
10611 if (pm_char_is_decimal_digit(peek(parser))) {
10612 parser->current.end++;
10613 pm_token_type_t type = lex_numeric(parser);
10614 lex_state_set(parser, PM_LEX_STATE_END);
10615 LEX(type);
10616 }
10617
10618 LEX(PM_TOKEN_UPLUS);
10619 }
10620
10621 if (ambiguous_operator_p(parser, space_seen)) {
10622 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
10623 }
10624
10625 lex_state_set(parser, PM_LEX_STATE_BEG);
10626 LEX(PM_TOKEN_PLUS);
10627 }
10628
10629 // - -= -@
10630 case '-': {
10631 if (lex_state_operator_p(parser)) {
10632 lex_state_set(parser, PM_LEX_STATE_ARG);
10633
10634 if (match(parser, '@')) {
10635 LEX(PM_TOKEN_UMINUS);
10636 }
10637
10638 LEX(PM_TOKEN_MINUS);
10639 }
10640
10641 if (match(parser, '=')) {
10642 lex_state_set(parser, PM_LEX_STATE_BEG);
10643 LEX(PM_TOKEN_MINUS_EQUAL);
10644 }
10645
10646 if (match(parser, '>')) {
10647 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10648 LEX(PM_TOKEN_MINUS_GREATER);
10649 }
10650
10651 bool spcarg = lex_state_spcarg_p(parser, space_seen);
10652 bool is_beg = lex_state_beg_p(parser);
10653 if (!is_beg && spcarg) {
10654 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
10655 }
10656
10657 if (is_beg || spcarg) {
10658 lex_state_set(parser, PM_LEX_STATE_BEG);
10659 LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
10660 }
10661
10662 if (ambiguous_operator_p(parser, space_seen)) {
10663 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
10664 }
10665
10666 lex_state_set(parser, PM_LEX_STATE_BEG);
10667 LEX(PM_TOKEN_MINUS);
10668 }
10669
10670 // . .. ...
10671 case '.': {
10672 bool beg_p = lex_state_beg_p(parser);
10673
10674 if (match(parser, '.')) {
10675 if (match(parser, '.')) {
10676 // If we're _not_ inside a range within default parameters
10677 if (!context_p(parser, PM_CONTEXT_DEFAULT_PARAMS) && context_p(parser, PM_CONTEXT_DEF_PARAMS)) {
10678 if (lex_state_p(parser, PM_LEX_STATE_END)) {
10679 lex_state_set(parser, PM_LEX_STATE_BEG);
10680 } else {
10681 lex_state_set(parser, PM_LEX_STATE_ENDARG);
10682 }
10683 LEX(PM_TOKEN_UDOT_DOT_DOT);
10684 }
10685
10686 if (parser->enclosure_nesting == 0 && parser_end_of_line_p(parser)) {
10687 pm_parser_warn_token(parser, &parser->current, PM_WARN_DOT_DOT_DOT_EOL);
10688 }
10689
10690 lex_state_set(parser, PM_LEX_STATE_BEG);
10691 LEX(beg_p ? PM_TOKEN_UDOT_DOT_DOT : PM_TOKEN_DOT_DOT_DOT);
10692 }
10693
10694 lex_state_set(parser, PM_LEX_STATE_BEG);
10695 LEX(beg_p ? PM_TOKEN_UDOT_DOT : PM_TOKEN_DOT_DOT);
10696 }
10697
10698 lex_state_set(parser, PM_LEX_STATE_DOT);
10699 LEX(PM_TOKEN_DOT);
10700 }
10701
10702 // integer
10703 case '0':
10704 case '1':
10705 case '2':
10706 case '3':
10707 case '4':
10708 case '5':
10709 case '6':
10710 case '7':
10711 case '8':
10712 case '9': {
10713 pm_token_type_t type = lex_numeric(parser);
10714 lex_state_set(parser, PM_LEX_STATE_END);
10715 LEX(type);
10716 }
10717
10718 // :: symbol
10719 case ':':
10720 if (match(parser, ':')) {
10721 if (lex_state_beg_p(parser) || lex_state_p(parser, PM_LEX_STATE_CLASS) || (lex_state_p(parser, PM_LEX_STATE_ARG_ANY) && space_seen)) {
10722 lex_state_set(parser, PM_LEX_STATE_BEG);
10723 LEX(PM_TOKEN_UCOLON_COLON);
10724 }
10725
10726 lex_state_set(parser, PM_LEX_STATE_DOT);
10727 LEX(PM_TOKEN_COLON_COLON);
10728 }
10729
10730 if (lex_state_end_p(parser) || pm_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
10731 lex_state_set(parser, PM_LEX_STATE_BEG);
10732 LEX(PM_TOKEN_COLON);
10733 }
10734
10735 if (peek(parser) == '"' || peek(parser) == '\'') {
10736 lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
10737 parser->current.end++;
10738 }
10739
10740 lex_state_set(parser, PM_LEX_STATE_FNAME);
10741 LEX(PM_TOKEN_SYMBOL_BEGIN);
10742
10743 // / /=
10744 case '/':
10745 if (lex_state_beg_p(parser)) {
10746 lex_mode_push_regexp(parser, '\0', '/');
10747 LEX(PM_TOKEN_REGEXP_BEGIN);
10748 }
10749
10750 if (match(parser, '=')) {
10751 lex_state_set(parser, PM_LEX_STATE_BEG);
10752 LEX(PM_TOKEN_SLASH_EQUAL);
10753 }
10754
10755 if (lex_state_spcarg_p(parser, space_seen)) {
10756 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
10757 lex_mode_push_regexp(parser, '\0', '/');
10758 LEX(PM_TOKEN_REGEXP_BEGIN);
10759 }
10760
10761 if (ambiguous_operator_p(parser, space_seen)) {
10762 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
10763 }
10764
10765 if (lex_state_operator_p(parser)) {
10766 lex_state_set(parser, PM_LEX_STATE_ARG);
10767 } else {
10768 lex_state_set(parser, PM_LEX_STATE_BEG);
10769 }
10770
10771 LEX(PM_TOKEN_SLASH);
10772
10773 // ^ ^=
10774 case '^':
10775 if (lex_state_operator_p(parser)) {
10776 lex_state_set(parser, PM_LEX_STATE_ARG);
10777 } else {
10778 lex_state_set(parser, PM_LEX_STATE_BEG);
10779 }
10780 LEX(match(parser, '=') ? PM_TOKEN_CARET_EQUAL : PM_TOKEN_CARET);
10781
10782 // ~ ~@
10783 case '~':
10784 if (lex_state_operator_p(parser)) {
10785 (void) match(parser, '@');
10786 lex_state_set(parser, PM_LEX_STATE_ARG);
10787 } else {
10788 lex_state_set(parser, PM_LEX_STATE_BEG);
10789 }
10790
10791 LEX(PM_TOKEN_TILDE);
10792
10793 // % %= %i %I %q %Q %w %W
10794 case '%': {
10795 // If there is no subsequent character then we have an
10796 // invalid token. We're going to say it's the percent
10797 // operator because we don't want to move into the string
10798 // lex mode unnecessarily.
10799 if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
10800 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
10801 LEX(PM_TOKEN_PERCENT);
10802 }
10803
10804 if (!lex_state_beg_p(parser) && match(parser, '=')) {
10805 lex_state_set(parser, PM_LEX_STATE_BEG);
10806 LEX(PM_TOKEN_PERCENT_EQUAL);
10807 } else if (
10808 lex_state_beg_p(parser) ||
10809 (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
10810 lex_state_spcarg_p(parser, space_seen)
10811 ) {
10812 if (!parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end)) {
10813 if (*parser->current.end >= 0x80) {
10814 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
10815 }
10816
10817 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10818 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10819 LEX(PM_TOKEN_STRING_BEGIN);
10820 }
10821
10822 // Delimiters for %-literals cannot be alphanumeric. We
10823 // validate that here.
10824 uint8_t delimiter = peek_offset(parser, 1);
10825 if (delimiter >= 0x80 || parser->encoding->alnum_char(&delimiter, 1)) {
10826 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
10827 goto lex_next_token;
10828 }
10829
10830 switch (peek(parser)) {
10831 case 'i': {
10832 parser->current.end++;
10833
10834 if (parser->current.end < parser->end) {
10835 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
10836 } else {
10837 lex_mode_push_list_eof(parser);
10838 }
10839
10840 LEX(PM_TOKEN_PERCENT_LOWER_I);
10841 }
10842 case 'I': {
10843 parser->current.end++;
10844
10845 if (parser->current.end < parser->end) {
10846 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
10847 } else {
10848 lex_mode_push_list_eof(parser);
10849 }
10850
10851 LEX(PM_TOKEN_PERCENT_UPPER_I);
10852 }
10853 case 'r': {
10854 parser->current.end++;
10855
10856 if (parser->current.end < parser->end) {
10857 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10858 lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10859 } else {
10860 lex_mode_push_regexp(parser, '\0', '\0');
10861 }
10862
10863 LEX(PM_TOKEN_REGEXP_BEGIN);
10864 }
10865 case 'q': {
10866 parser->current.end++;
10867
10868 if (parser->current.end < parser->end) {
10869 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10870 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10871 } else {
10872 lex_mode_push_string_eof(parser);
10873 }
10874
10875 LEX(PM_TOKEN_STRING_BEGIN);
10876 }
10877 case 'Q': {
10878 parser->current.end++;
10879
10880 if (parser->current.end < parser->end) {
10881 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10882 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10883 } else {
10884 lex_mode_push_string_eof(parser);
10885 }
10886
10887 LEX(PM_TOKEN_STRING_BEGIN);
10888 }
10889 case 's': {
10890 parser->current.end++;
10891
10892 if (parser->current.end < parser->end) {
10893 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10894 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10895 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
10896 } else {
10897 lex_mode_push_string_eof(parser);
10898 }
10899
10900 LEX(PM_TOKEN_SYMBOL_BEGIN);
10901 }
10902 case 'w': {
10903 parser->current.end++;
10904
10905 if (parser->current.end < parser->end) {
10906 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
10907 } else {
10908 lex_mode_push_list_eof(parser);
10909 }
10910
10911 LEX(PM_TOKEN_PERCENT_LOWER_W);
10912 }
10913 case 'W': {
10914 parser->current.end++;
10915
10916 if (parser->current.end < parser->end) {
10917 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
10918 } else {
10919 lex_mode_push_list_eof(parser);
10920 }
10921
10922 LEX(PM_TOKEN_PERCENT_UPPER_W);
10923 }
10924 case 'x': {
10925 parser->current.end++;
10926
10927 if (parser->current.end < parser->end) {
10928 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10929 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10930 } else {
10931 lex_mode_push_string_eof(parser);
10932 }
10933
10934 LEX(PM_TOKEN_PERCENT_LOWER_X);
10935 }
10936 default:
10937 // If we get to this point, then we have a % that is completely
10938 // unparsable. In this case we'll just drop it from the parser
10939 // and skip past it and hope that the next token is something
10940 // that we can parse.
10941 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
10942 goto lex_next_token;
10943 }
10944 }
10945
10946 if (ambiguous_operator_p(parser, space_seen)) {
10947 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
10948 }
10949
10950 lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
10951 LEX(PM_TOKEN_PERCENT);
10952 }
10953
10954 // global variable
10955 case '$': {
10956 pm_token_type_t type = lex_global_variable(parser);
10957
10958 // If we're lexing an embedded variable, then we need to pop back into
10959 // the parent lex context.
10960 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
10961 lex_mode_pop(parser);
10962 }
10963
10964 lex_state_set(parser, PM_LEX_STATE_END);
10965 LEX(type);
10966 }
10967
10968 // instance variable, class variable
10969 case '@':
10970 lex_state_set(parser, parser->lex_state & PM_LEX_STATE_FNAME ? PM_LEX_STATE_ENDFN : PM_LEX_STATE_END);
10971 LEX(lex_at_variable(parser));
10972
10973 default: {
10974 if (*parser->current.start != '_') {
10975 size_t width = char_is_identifier_start(parser, parser->current.start, parser->end - parser->current.start);
10976
10977 // If this isn't the beginning of an identifier, then
10978 // it's an invalid token as we've exhausted all of the
10979 // other options. We'll skip past it and return the next
10980 // token after adding an appropriate error message.
10981 if (!width) {
10982 if (*parser->current.start >= 0x80) {
10983 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
10984 } else if (*parser->current.start == '\\') {
10985 switch (peek_at(parser, parser->current.start + 1)) {
10986 case ' ':
10987 parser->current.end++;
10988 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
10989 break;
10990 case '\f':
10991 parser->current.end++;
10992 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
10993 break;
10994 case '\t':
10995 parser->current.end++;
10996 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
10997 break;
10998 case '\v':
10999 parser->current.end++;
11000 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
11001 break;
11002 case '\r':
11003 if (peek_at(parser, parser->current.start + 2) != '\n') {
11004 parser->current.end++;
11005 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
11006 break;
11007 }
11009 default:
11010 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
11011 break;
11012 }
11013 } else if (char_is_ascii_printable(*parser->current.start)) {
11014 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
11015 } else {
11016 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
11017 }
11018
11019 goto lex_next_token;
11020 }
11021
11022 parser->current.end = parser->current.start + width;
11023 }
11024
11025 pm_token_type_t type = lex_identifier(parser, previous_command_start);
11026
11027 // If we've hit a __END__ and it was at the start of the
11028 // line or the start of the file and it is followed by
11029 // either a \n or a \r\n, then this is the last token of the
11030 // file.
11031 if (
11032 ((parser->current.end - parser->current.start) == 7) &&
11033 current_token_starts_line(parser) &&
11034 (memcmp(parser->current.start, "__END__", 7) == 0) &&
11035 (parser->current.end == parser->end || match_eol(parser))
11036 ) {
11037 // Since we know we're about to add an __END__ comment,
11038 // we know we need to add all of the newlines to get the
11039 // correct column information for it.
11040 const uint8_t *cursor = parser->current.end;
11041 while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
11042 pm_newline_list_append(&parser->newline_list, cursor++);
11043 }
11044
11045 parser->current.end = parser->end;
11046 parser->current.type = PM_TOKEN___END__;
11047 parser_lex_callback(parser);
11048
11049 parser->data_loc.start = parser->current.start;
11050 parser->data_loc.end = parser->current.end;
11051
11052 LEX(PM_TOKEN_EOF);
11053 }
11054
11055 pm_lex_state_t last_state = parser->lex_state;
11056
11057 if (type == PM_TOKEN_IDENTIFIER || type == PM_TOKEN_CONSTANT || type == PM_TOKEN_METHOD_NAME) {
11058 if (lex_state_p(parser, PM_LEX_STATE_BEG_ANY | PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_DOT)) {
11059 if (previous_command_start) {
11060 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11061 } else {
11062 lex_state_set(parser, PM_LEX_STATE_ARG);
11063 }
11064 } else if (parser->lex_state == PM_LEX_STATE_FNAME) {
11065 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11066 } else {
11067 lex_state_set(parser, PM_LEX_STATE_END);
11068 }
11069 }
11070
11071 if (
11072 !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
11073 (type == PM_TOKEN_IDENTIFIER) &&
11074 ((pm_parser_local_depth(parser, &parser->current) != -1) ||
11075 pm_token_is_numbered_parameter(parser->current.start, parser->current.end))
11076 ) {
11077 lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
11078 }
11079
11080 LEX(type);
11081 }
11082 }
11083 }
11084 case PM_LEX_LIST: {
11085 if (parser->next_start != NULL) {
11086 parser->current.end = parser->next_start;
11087 parser->next_start = NULL;
11088 }
11089
11090 // First we'll set the beginning of the token.
11091 parser->current.start = parser->current.end;
11092
11093 // If there's any whitespace at the start of the list, then we're
11094 // going to trim it off the beginning and create a new token.
11095 size_t whitespace;
11096
11097 if (parser->heredoc_end) {
11098 whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
11099 if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') {
11100 whitespace += 1;
11101 }
11102 } else {
11103 whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list);
11104 }
11105
11106 if (whitespace > 0) {
11107 parser->current.end += whitespace;
11108 if (peek_offset(parser, -1) == '\n') {
11109 // mutates next_start
11110 parser_flush_heredoc_end(parser);
11111 }
11112 LEX(PM_TOKEN_WORDS_SEP);
11113 }
11114
11115 // We'll check if we're at the end of the file. If we are, then we
11116 // need to return the EOF token.
11117 if (parser->current.end >= parser->end) {
11118 LEX(PM_TOKEN_EOF);
11119 }
11120
11121 // Here we'll get a list of the places where strpbrk should break,
11122 // and then find the first one.
11123 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11124 const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
11125 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11126
11127 // If we haven't found an escape yet, then this buffer will be
11128 // unallocated since we can refer directly to the source string.
11129 pm_token_buffer_t token_buffer = { 0 };
11130
11131 while (breakpoint != NULL) {
11132 // If we hit whitespace, then we must have received content by
11133 // now, so we can return an element of the list.
11134 if (pm_char_is_whitespace(*breakpoint)) {
11135 parser->current.end = breakpoint;
11136 pm_token_buffer_flush(parser, &token_buffer);
11137 LEX(PM_TOKEN_STRING_CONTENT);
11138 }
11139
11140 // If we hit the terminator, we need to check which token to
11141 // return.
11142 if (*breakpoint == lex_mode->as.list.terminator) {
11143 // If this terminator doesn't actually close the list, then
11144 // we need to continue on past it.
11145 if (lex_mode->as.list.nesting > 0) {
11146 parser->current.end = breakpoint + 1;
11147 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11148 lex_mode->as.list.nesting--;
11149 continue;
11150 }
11151
11152 // If we've hit the terminator and we've already skipped
11153 // past content, then we can return a list node.
11154 if (breakpoint > parser->current.start) {
11155 parser->current.end = breakpoint;
11156 pm_token_buffer_flush(parser, &token_buffer);
11157 LEX(PM_TOKEN_STRING_CONTENT);
11158 }
11159
11160 // Otherwise, switch back to the default state and return
11161 // the end of the list.
11162 parser->current.end = breakpoint + 1;
11163 lex_mode_pop(parser);
11164 lex_state_set(parser, PM_LEX_STATE_END);
11165 LEX(PM_TOKEN_STRING_END);
11166 }
11167
11168 // If we hit a null byte, skip directly past it.
11169 if (*breakpoint == '\0') {
11170 breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
11171 continue;
11172 }
11173
11174 // If we hit escapes, then we need to treat the next token
11175 // literally. In this case we'll skip past the next character
11176 // and find the next breakpoint.
11177 if (*breakpoint == '\\') {
11178 parser->current.end = breakpoint + 1;
11179
11180 // If we've hit the end of the file, then break out of the
11181 // loop by setting the breakpoint to NULL.
11182 if (parser->current.end == parser->end) {
11183 breakpoint = NULL;
11184 continue;
11185 }
11186
11187 pm_token_buffer_escape(parser, &token_buffer);
11188 uint8_t peeked = peek(parser);
11189
11190 switch (peeked) {
11191 case ' ':
11192 case '\f':
11193 case '\t':
11194 case '\v':
11195 case '\\':
11196 pm_token_buffer_push_byte(&token_buffer, peeked);
11197 parser->current.end++;
11198 break;
11199 case '\r':
11200 parser->current.end++;
11201 if (peek(parser) != '\n') {
11202 pm_token_buffer_push_byte(&token_buffer, '\r');
11203 break;
11204 }
11206 case '\n':
11207 pm_token_buffer_push_byte(&token_buffer, '\n');
11208
11209 if (parser->heredoc_end) {
11210 // ... if we are on the same line as a heredoc,
11211 // flush the heredoc and continue parsing after
11212 // heredoc_end.
11213 parser_flush_heredoc_end(parser);
11214 pm_token_buffer_copy(parser, &token_buffer);
11215 LEX(PM_TOKEN_STRING_CONTENT);
11216 } else {
11217 // ... else track the newline.
11218 pm_newline_list_append(&parser->newline_list, parser->current.end);
11219 }
11220
11221 parser->current.end++;
11222 break;
11223 default:
11224 if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
11225 pm_token_buffer_push_byte(&token_buffer, peeked);
11226 parser->current.end++;
11227 } else if (lex_mode->as.list.interpolation) {
11228 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
11229 } else {
11230 pm_token_buffer_push_byte(&token_buffer, '\\');
11231 pm_token_buffer_push_escaped(&token_buffer, parser);
11232 }
11233
11234 break;
11235 }
11236
11237 token_buffer.cursor = parser->current.end;
11238 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11239 continue;
11240 }
11241
11242 // If we hit a #, then we will attempt to lex interpolation.
11243 if (*breakpoint == '#') {
11244 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11245
11246 if (type == PM_TOKEN_NOT_PROVIDED) {
11247 // If we haven't returned at this point then we had something
11248 // that looked like an interpolated class or instance variable
11249 // like "#@" but wasn't actually. In this case we'll just skip
11250 // to the next breakpoint.
11251 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11252 continue;
11253 }
11254
11255 if (type == PM_TOKEN_STRING_CONTENT) {
11256 pm_token_buffer_flush(parser, &token_buffer);
11257 }
11258
11259 LEX(type);
11260 }
11261
11262 // If we've hit the incrementor, then we need to skip past it
11263 // and find the next breakpoint.
11264 assert(*breakpoint == lex_mode->as.list.incrementor);
11265 parser->current.end = breakpoint + 1;
11266 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11267 lex_mode->as.list.nesting++;
11268 continue;
11269 }
11270
11271 if (parser->current.end > parser->current.start) {
11272 pm_token_buffer_flush(parser, &token_buffer);
11273 LEX(PM_TOKEN_STRING_CONTENT);
11274 }
11275
11276 // If we were unable to find a breakpoint, then this token hits the
11277 // end of the file.
11278 parser->current.end = parser->end;
11279 pm_token_buffer_flush(parser, &token_buffer);
11280 LEX(PM_TOKEN_STRING_CONTENT);
11281 }
11282 case PM_LEX_REGEXP: {
11283 // First, we'll set to start of this token to be the current end.
11284 if (parser->next_start == NULL) {
11285 parser->current.start = parser->current.end;
11286 } else {
11287 parser->current.start = parser->next_start;
11288 parser->current.end = parser->next_start;
11289 parser->next_start = NULL;
11290 }
11291
11292 // We'll check if we're at the end of the file. If we are, then we
11293 // need to return the EOF token.
11294 if (parser->current.end >= parser->end) {
11295 LEX(PM_TOKEN_EOF);
11296 }
11297
11298 // Get a reference to the current mode.
11299 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11300
11301 // These are the places where we need to split up the content of the
11302 // regular expression. We'll use strpbrk to find the first of these
11303 // characters.
11304 const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
11305 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11306 pm_regexp_token_buffer_t token_buffer = { 0 };
11307
11308 while (breakpoint != NULL) {
11309 uint8_t term = lex_mode->as.regexp.terminator;
11310 bool is_terminator = (*breakpoint == term);
11311
11312 // If the terminator is newline, we need to consider \r\n _also_ a newline
11313 // For example: `%\nfoo\r\n`
11314 // The string should be "foo", not "foo\r"
11315 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
11316 if (term == '\n') {
11317 is_terminator = true;
11318 }
11319
11320 // If the terminator is a CR, but we see a CRLF, we need to
11321 // treat the CRLF as a newline, meaning this is _not_ the
11322 // terminator
11323 if (term == '\r') {
11324 is_terminator = false;
11325 }
11326 }
11327
11328 // If we hit the terminator, we need to determine what kind of
11329 // token to return.
11330 if (is_terminator) {
11331 if (lex_mode->as.regexp.nesting > 0) {
11332 parser->current.end = breakpoint + 1;
11333 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11334 lex_mode->as.regexp.nesting--;
11335 continue;
11336 }
11337
11338 // Here we've hit the terminator. If we have already consumed
11339 // content then we need to return that content as string content
11340 // first.
11341 if (breakpoint > parser->current.start) {
11342 parser->current.end = breakpoint;
11343 pm_regexp_token_buffer_flush(parser, &token_buffer);
11344 LEX(PM_TOKEN_STRING_CONTENT);
11345 }
11346
11347 // Check here if we need to track the newline.
11348 size_t eol_length = match_eol_at(parser, breakpoint);
11349 if (eol_length) {
11350 parser->current.end = breakpoint + eol_length;
11351
11352 // Track the newline if we're not in a heredoc that
11353 // would have already have added the newline to the
11354 // list.
11355 if (parser->heredoc_end == NULL) {
11356 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
11357 }
11358 } else {
11359 parser->current.end = breakpoint + 1;
11360 }
11361
11362 // Since we've hit the terminator of the regular expression,
11363 // we now need to parse the options.
11364 parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
11365
11366 lex_mode_pop(parser);
11367 lex_state_set(parser, PM_LEX_STATE_END);
11368 LEX(PM_TOKEN_REGEXP_END);
11369 }
11370
11371 // If we've hit the incrementor, then we need to skip past it
11372 // and find the next breakpoint.
11373 if (*breakpoint && *breakpoint == lex_mode->as.regexp.incrementor) {
11374 parser->current.end = breakpoint + 1;
11375 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11376 lex_mode->as.regexp.nesting++;
11377 continue;
11378 }
11379
11380 switch (*breakpoint) {
11381 case '\0':
11382 // If we hit a null byte, skip directly past it.
11383 parser->current.end = breakpoint + 1;
11384 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11385 break;
11386 case '\r':
11387 if (peek_at(parser, breakpoint + 1) != '\n') {
11388 parser->current.end = breakpoint + 1;
11389 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11390 break;
11391 }
11392
11393 breakpoint++;
11394 parser->current.end = breakpoint;
11395 pm_regexp_token_buffer_escape(parser, &token_buffer);
11396 token_buffer.base.cursor = breakpoint;
11397
11399 case '\n':
11400 // If we've hit a newline, then we need to track that in
11401 // the list of newlines.
11402 if (parser->heredoc_end == NULL) {
11403 pm_newline_list_append(&parser->newline_list, breakpoint);
11404 parser->current.end = breakpoint + 1;
11405 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11406 break;
11407 }
11408
11409 parser->current.end = breakpoint + 1;
11410 parser_flush_heredoc_end(parser);
11411 pm_regexp_token_buffer_flush(parser, &token_buffer);
11412 LEX(PM_TOKEN_STRING_CONTENT);
11413 case '\\': {
11414 // If we hit escapes, then we need to treat the next
11415 // token literally. In this case we'll skip past the
11416 // next character and find the next breakpoint.
11417 parser->current.end = breakpoint + 1;
11418
11419 // If we've hit the end of the file, then break out of
11420 // the loop by setting the breakpoint to NULL.
11421 if (parser->current.end == parser->end) {
11422 breakpoint = NULL;
11423 break;
11424 }
11425
11426 pm_regexp_token_buffer_escape(parser, &token_buffer);
11427 uint8_t peeked = peek(parser);
11428
11429 switch (peeked) {
11430 case '\r':
11431 parser->current.end++;
11432 if (peek(parser) != '\n') {
11433 if (lex_mode->as.regexp.terminator != '\r') {
11434 pm_token_buffer_push_byte(&token_buffer.base, '\\');
11435 }
11436 pm_regexp_token_buffer_push_byte(&token_buffer, '\r');
11437 pm_token_buffer_push_byte(&token_buffer.base, '\r');
11438 break;
11439 }
11441 case '\n':
11442 if (parser->heredoc_end) {
11443 // ... if we are on the same line as a heredoc,
11444 // flush the heredoc and continue parsing after
11445 // heredoc_end.
11446 parser_flush_heredoc_end(parser);
11447 pm_regexp_token_buffer_copy(parser, &token_buffer);
11448 LEX(PM_TOKEN_STRING_CONTENT);
11449 } else {
11450 // ... else track the newline.
11451 pm_newline_list_append(&parser->newline_list, parser->current.end);
11452 }
11453
11454 parser->current.end++;
11455 break;
11456 case 'c':
11457 case 'C':
11458 case 'M':
11459 case 'u':
11460 case 'x':
11461 escape_read(parser, &token_buffer.regexp_buffer, &token_buffer.base.buffer, PM_ESCAPE_FLAG_REGEXP);
11462 break;
11463 default:
11464 if (lex_mode->as.regexp.terminator == peeked) {
11465 // Some characters when they are used as the
11466 // terminator also receive an escape. They are
11467 // enumerated here.
11468 switch (peeked) {
11469 case '$': case ')': case '*': case '+':
11470 case '.': case '>': case '?': case ']':
11471 case '^': case '|': case '}':
11472 pm_token_buffer_push_byte(&token_buffer.base, '\\');
11473 break;
11474 default:
11475 break;
11476 }
11477
11478 pm_regexp_token_buffer_push_byte(&token_buffer, peeked);
11479 pm_token_buffer_push_byte(&token_buffer.base, peeked);
11480 parser->current.end++;
11481 break;
11482 }
11483
11484 if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer.base, '\\');
11485 pm_regexp_token_buffer_push_escaped(&token_buffer, parser);
11486 break;
11487 }
11488
11489 token_buffer.base.cursor = parser->current.end;
11490 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11491 break;
11492 }
11493 case '#': {
11494 // If we hit a #, then we will attempt to lex
11495 // interpolation.
11496 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11497
11498 if (type == PM_TOKEN_NOT_PROVIDED) {
11499 // If we haven't returned at this point then we had
11500 // something that looked like an interpolated class or
11501 // instance variable like "#@" but wasn't actually. In
11502 // this case we'll just skip to the next breakpoint.
11503 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11504 break;
11505 }
11506
11507 if (type == PM_TOKEN_STRING_CONTENT) {
11508 pm_regexp_token_buffer_flush(parser, &token_buffer);
11509 }
11510
11511 LEX(type);
11512 }
11513 default:
11514 assert(false && "unreachable");
11515 break;
11516 }
11517 }
11518
11519 if (parser->current.end > parser->current.start) {
11520 pm_regexp_token_buffer_flush(parser, &token_buffer);
11521 LEX(PM_TOKEN_STRING_CONTENT);
11522 }
11523
11524 // If we were unable to find a breakpoint, then this token hits the
11525 // end of the file.
11526 parser->current.end = parser->end;
11527 pm_regexp_token_buffer_flush(parser, &token_buffer);
11528 LEX(PM_TOKEN_STRING_CONTENT);
11529 }
11530 case PM_LEX_STRING: {
11531 // First, we'll set to start of this token to be the current end.
11532 if (parser->next_start == NULL) {
11533 parser->current.start = parser->current.end;
11534 } else {
11535 parser->current.start = parser->next_start;
11536 parser->current.end = parser->next_start;
11537 parser->next_start = NULL;
11538 }
11539
11540 // We'll check if we're at the end of the file. If we are, then we need to
11541 // return the EOF token.
11542 if (parser->current.end >= parser->end) {
11543 LEX(PM_TOKEN_EOF);
11544 }
11545
11546 // These are the places where we need to split up the content of the
11547 // string. We'll use strpbrk to find the first of these characters.
11548 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11549 const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
11550 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11551
11552 // If we haven't found an escape yet, then this buffer will be
11553 // unallocated since we can refer directly to the source string.
11554 pm_token_buffer_t token_buffer = { 0 };
11555
11556 while (breakpoint != NULL) {
11557 // If we hit the incrementor, then we'll increment then nesting and
11558 // continue lexing.
11559 if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
11560 lex_mode->as.string.nesting++;
11561 parser->current.end = breakpoint + 1;
11562 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11563 continue;
11564 }
11565
11566 uint8_t term = lex_mode->as.string.terminator;
11567 bool is_terminator = (*breakpoint == term);
11568
11569 // If the terminator is newline, we need to consider \r\n _also_ a newline
11570 // For example: `%r\nfoo\r\n`
11571 // The string should be /foo/, not /foo\r/
11572 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
11573 if (term == '\n') {
11574 is_terminator = true;
11575 }
11576
11577 // If the terminator is a CR, but we see a CRLF, we need to
11578 // treat the CRLF as a newline, meaning this is _not_ the
11579 // terminator
11580 if (term == '\r') {
11581 is_terminator = false;
11582 }
11583 }
11584
11585 // Note that we have to check the terminator here first because we could
11586 // potentially be parsing a % string that has a # character as the
11587 // terminator.
11588 if (is_terminator) {
11589 // If this terminator doesn't actually close the string, then we need
11590 // to continue on past it.
11591 if (lex_mode->as.string.nesting > 0) {
11592 parser->current.end = breakpoint + 1;
11593 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11594 lex_mode->as.string.nesting--;
11595 continue;
11596 }
11597
11598 // Here we've hit the terminator. If we have already consumed content
11599 // then we need to return that content as string content first.
11600 if (breakpoint > parser->current.start) {
11601 parser->current.end = breakpoint;
11602 pm_token_buffer_flush(parser, &token_buffer);
11603 LEX(PM_TOKEN_STRING_CONTENT);
11604 }
11605
11606 // Otherwise we need to switch back to the parent lex mode and
11607 // return the end of the string.
11608 size_t eol_length = match_eol_at(parser, breakpoint);
11609 if (eol_length) {
11610 parser->current.end = breakpoint + eol_length;
11611
11612 // Track the newline if we're not in a heredoc that
11613 // would have already have added the newline to the
11614 // list.
11615 if (parser->heredoc_end == NULL) {
11616 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
11617 }
11618 } else {
11619 parser->current.end = breakpoint + 1;
11620 }
11621
11622 if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
11623 parser->current.end++;
11624 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
11625 lex_mode_pop(parser);
11626 LEX(PM_TOKEN_LABEL_END);
11627 }
11628
11629 // When the delimiter itself is a newline, we won't
11630 // get a chance to flush heredocs in the usual places since
11631 // the newline is already consumed.
11632 if (term == '\n' && parser->heredoc_end) {
11633 parser_flush_heredoc_end(parser);
11634 }
11635
11636 lex_state_set(parser, PM_LEX_STATE_END);
11637 lex_mode_pop(parser);
11638 LEX(PM_TOKEN_STRING_END);
11639 }
11640
11641 switch (*breakpoint) {
11642 case '\0':
11643 // Skip directly past the null character.
11644 parser->current.end = breakpoint + 1;
11645 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11646 break;
11647 case '\r':
11648 if (peek_at(parser, breakpoint + 1) != '\n') {
11649 parser->current.end = breakpoint + 1;
11650 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11651 break;
11652 }
11653
11654 // If we hit a \r\n sequence, then we need to treat it
11655 // as a newline.
11656 breakpoint++;
11657 parser->current.end = breakpoint;
11658 pm_token_buffer_escape(parser, &token_buffer);
11659 token_buffer.cursor = breakpoint;
11660
11662 case '\n':
11663 // When we hit a newline, we need to flush any potential
11664 // heredocs. Note that this has to happen after we check
11665 // for the terminator in case the terminator is a
11666 // newline character.
11667 if (parser->heredoc_end == NULL) {
11668 pm_newline_list_append(&parser->newline_list, breakpoint);
11669 parser->current.end = breakpoint + 1;
11670 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11671 break;
11672 }
11673
11674 parser->current.end = breakpoint + 1;
11675 parser_flush_heredoc_end(parser);
11676 pm_token_buffer_flush(parser, &token_buffer);
11677 LEX(PM_TOKEN_STRING_CONTENT);
11678 case '\\': {
11679 // Here we hit escapes.
11680 parser->current.end = breakpoint + 1;
11681
11682 // If we've hit the end of the file, then break out of
11683 // the loop by setting the breakpoint to NULL.
11684 if (parser->current.end == parser->end) {
11685 breakpoint = NULL;
11686 continue;
11687 }
11688
11689 pm_token_buffer_escape(parser, &token_buffer);
11690 uint8_t peeked = peek(parser);
11691
11692 switch (peeked) {
11693 case '\\':
11694 pm_token_buffer_push_byte(&token_buffer, '\\');
11695 parser->current.end++;
11696 break;
11697 case '\r':
11698 parser->current.end++;
11699 if (peek(parser) != '\n') {
11700 if (!lex_mode->as.string.interpolation) {
11701 pm_token_buffer_push_byte(&token_buffer, '\\');
11702 }
11703 pm_token_buffer_push_byte(&token_buffer, '\r');
11704 break;
11705 }
11707 case '\n':
11708 if (!lex_mode->as.string.interpolation) {
11709 pm_token_buffer_push_byte(&token_buffer, '\\');
11710 pm_token_buffer_push_byte(&token_buffer, '\n');
11711 }
11712
11713 if (parser->heredoc_end) {
11714 // ... if we are on the same line as a heredoc,
11715 // flush the heredoc and continue parsing after
11716 // heredoc_end.
11717 parser_flush_heredoc_end(parser);
11718 pm_token_buffer_copy(parser, &token_buffer);
11719 LEX(PM_TOKEN_STRING_CONTENT);
11720 } else {
11721 // ... else track the newline.
11722 pm_newline_list_append(&parser->newline_list, parser->current.end);
11723 }
11724
11725 parser->current.end++;
11726 break;
11727 default:
11728 if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
11729 pm_token_buffer_push_byte(&token_buffer, peeked);
11730 parser->current.end++;
11731 } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
11732 pm_token_buffer_push_byte(&token_buffer, peeked);
11733 parser->current.end++;
11734 } else if (lex_mode->as.string.interpolation) {
11735 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
11736 } else {
11737 pm_token_buffer_push_byte(&token_buffer, '\\');
11738 pm_token_buffer_push_escaped(&token_buffer, parser);
11739 }
11740
11741 break;
11742 }
11743
11744 token_buffer.cursor = parser->current.end;
11745 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11746 break;
11747 }
11748 case '#': {
11749 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11750
11751 if (type == PM_TOKEN_NOT_PROVIDED) {
11752 // If we haven't returned at this point then we had something that
11753 // looked like an interpolated class or instance variable like "#@"
11754 // but wasn't actually. In this case we'll just skip to the next
11755 // breakpoint.
11756 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11757 break;
11758 }
11759
11760 if (type == PM_TOKEN_STRING_CONTENT) {
11761 pm_token_buffer_flush(parser, &token_buffer);
11762 }
11763
11764 LEX(type);
11765 }
11766 default:
11767 assert(false && "unreachable");
11768 }
11769 }
11770
11771 if (parser->current.end > parser->current.start) {
11772 pm_token_buffer_flush(parser, &token_buffer);
11773 LEX(PM_TOKEN_STRING_CONTENT);
11774 }
11775
11776 // If we've hit the end of the string, then this is an unterminated
11777 // string. In that case we'll return a string content token.
11778 parser->current.end = parser->end;
11779 pm_token_buffer_flush(parser, &token_buffer);
11780 LEX(PM_TOKEN_STRING_CONTENT);
11781 }
11782 case PM_LEX_HEREDOC: {
11783 // First, we'll set to start of this token.
11784 if (parser->next_start == NULL) {
11785 parser->current.start = parser->current.end;
11786 } else {
11787 parser->current.start = parser->next_start;
11788 parser->current.end = parser->next_start;
11789 parser->heredoc_end = NULL;
11790 parser->next_start = NULL;
11791 }
11792
11793 // Now let's grab the information about the identifier off of the
11794 // current lex mode.
11795 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11796 pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
11797
11798 bool line_continuation = lex_mode->as.heredoc.line_continuation;
11799 lex_mode->as.heredoc.line_continuation = false;
11800
11801 // We'll check if we're at the end of the file. If we are, then we
11802 // will add an error (because we weren't able to find the
11803 // terminator) but still continue parsing so that content after the
11804 // declaration of the heredoc can be parsed.
11805 if (parser->current.end >= parser->end) {
11806 pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
11807 parser->next_start = lex_mode->as.heredoc.next_start;
11808 parser->heredoc_end = parser->current.end;
11809 lex_state_set(parser, PM_LEX_STATE_END);
11810 lex_mode_pop(parser);
11811 LEX(PM_TOKEN_HEREDOC_END);
11812 }
11813
11814 const uint8_t *ident_start = heredoc_lex_mode->ident_start;
11815 size_t ident_length = heredoc_lex_mode->ident_length;
11816
11817 // If we are immediately following a newline and we have hit the
11818 // terminator, then we need to return the ending of the heredoc.
11819 if (current_token_starts_line(parser)) {
11820 const uint8_t *start = parser->current.start;
11821
11822 if (!line_continuation && (start + ident_length <= parser->end)) {
11823 const uint8_t *newline = next_newline(start, parser->end - start);
11824 const uint8_t *ident_end = newline;
11825 const uint8_t *terminator_end = newline;
11826
11827 if (newline == NULL) {
11828 terminator_end = parser->end;
11829 ident_end = parser->end;
11830 } else {
11831 terminator_end++;
11832 if (newline[-1] == '\r') {
11833 ident_end--; // Remove \r
11834 }
11835 }
11836
11837 const uint8_t *terminator_start = ident_end - ident_length;
11838 const uint8_t *cursor = start;
11839
11840 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
11841 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
11842 cursor++;
11843 }
11844 }
11845
11846 if (
11847 (cursor == terminator_start) &&
11848 (memcmp(terminator_start, ident_start, ident_length) == 0)
11849 ) {
11850 if (newline != NULL) {
11851 pm_newline_list_append(&parser->newline_list, newline);
11852 }
11853
11854 parser->current.end = terminator_end;
11855 if (*lex_mode->as.heredoc.next_start == '\\') {
11856 parser->next_start = NULL;
11857 } else {
11858 parser->next_start = lex_mode->as.heredoc.next_start;
11859 parser->heredoc_end = parser->current.end;
11860 }
11861
11862 lex_state_set(parser, PM_LEX_STATE_END);
11863 lex_mode_pop(parser);
11864 LEX(PM_TOKEN_HEREDOC_END);
11865 }
11866 }
11867
11868 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
11869 if (
11870 heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
11871 lex_mode->as.heredoc.common_whitespace != NULL &&
11872 (*lex_mode->as.heredoc.common_whitespace > whitespace) &&
11873 peek_at(parser, start) != '\n'
11874 ) {
11875 *lex_mode->as.heredoc.common_whitespace = whitespace;
11876 }
11877 }
11878
11879 // Otherwise we'll be parsing string content. These are the places
11880 // where we need to split up the content of the heredoc. We'll use
11881 // strpbrk to find the first of these characters.
11882 uint8_t breakpoints[] = "\r\n\\#";
11883
11884 pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
11885 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
11886 breakpoints[3] = '\0';
11887 }
11888
11889 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11890 pm_token_buffer_t token_buffer = { 0 };
11891 bool was_line_continuation = false;
11892
11893 while (breakpoint != NULL) {
11894 switch (*breakpoint) {
11895 case '\0':
11896 // Skip directly past the null character.
11897 parser->current.end = breakpoint + 1;
11898 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11899 break;
11900 case '\r':
11901 parser->current.end = breakpoint + 1;
11902
11903 if (peek_at(parser, breakpoint + 1) != '\n') {
11904 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11905 break;
11906 }
11907
11908 // If we hit a \r\n sequence, then we want to replace it
11909 // with a single \n character in the final string.
11910 breakpoint++;
11911 pm_token_buffer_escape(parser, &token_buffer);
11912 token_buffer.cursor = breakpoint;
11913
11915 case '\n': {
11916 if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
11917 parser_flush_heredoc_end(parser);
11918 parser->current.end = breakpoint + 1;
11919 pm_token_buffer_flush(parser, &token_buffer);
11920 LEX(PM_TOKEN_STRING_CONTENT);
11921 }
11922
11923 pm_newline_list_append(&parser->newline_list, breakpoint);
11924
11925 // If we have a - or ~ heredoc, then we can match after
11926 // some leading whitespace.
11927 const uint8_t *start = breakpoint + 1;
11928
11929 if (!was_line_continuation && (start + ident_length <= parser->end)) {
11930 // We want to match the terminator starting from the end of the line in case
11931 // there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
11932 const uint8_t *newline = next_newline(start, parser->end - start);
11933
11934 if (newline == NULL) {
11935 newline = parser->end;
11936 } else if (newline[-1] == '\r') {
11937 newline--; // Remove \r
11938 }
11939
11940 // Start of a possible terminator.
11941 const uint8_t *terminator_start = newline - ident_length;
11942
11943 // Cursor to check for the leading whitespace. We skip the
11944 // leading whitespace if we have a - or ~ heredoc.
11945 const uint8_t *cursor = start;
11946
11947 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
11948 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
11949 cursor++;
11950 }
11951 }
11952
11953 if (
11954 cursor == terminator_start &&
11955 (memcmp(terminator_start, ident_start, ident_length) == 0)
11956 ) {
11957 parser->current.end = breakpoint + 1;
11958 pm_token_buffer_flush(parser, &token_buffer);
11959 LEX(PM_TOKEN_STRING_CONTENT);
11960 }
11961 }
11962
11963 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
11964
11965 // If we have hit a newline that is followed by a valid
11966 // terminator, then we need to return the content of the
11967 // heredoc here as string content. Then, the next time a
11968 // token is lexed, it will match again and return the
11969 // end of the heredoc.
11970 if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
11971 if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
11972 *lex_mode->as.heredoc.common_whitespace = whitespace;
11973 }
11974
11975 parser->current.end = breakpoint + 1;
11976 pm_token_buffer_flush(parser, &token_buffer);
11977 LEX(PM_TOKEN_STRING_CONTENT);
11978 }
11979
11980 // Otherwise we hit a newline and it wasn't followed by
11981 // a terminator, so we can continue parsing.
11982 parser->current.end = breakpoint + 1;
11983 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11984 break;
11985 }
11986 case '\\': {
11987 // If we hit an escape, then we need to skip past
11988 // however many characters the escape takes up. However
11989 // it's important that if \n or \r\n are escaped, we
11990 // stop looping before the newline and not after the
11991 // newline so that we can still potentially find the
11992 // terminator of the heredoc.
11993 parser->current.end = breakpoint + 1;
11994
11995 // If we've hit the end of the file, then break out of
11996 // the loop by setting the breakpoint to NULL.
11997 if (parser->current.end == parser->end) {
11998 breakpoint = NULL;
11999 continue;
12000 }
12001
12002 pm_token_buffer_escape(parser, &token_buffer);
12003 uint8_t peeked = peek(parser);
12004
12005 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12006 switch (peeked) {
12007 case '\r':
12008 parser->current.end++;
12009 if (peek(parser) != '\n') {
12010 pm_token_buffer_push_byte(&token_buffer, '\\');
12011 pm_token_buffer_push_byte(&token_buffer, '\r');
12012 break;
12013 }
12015 case '\n':
12016 pm_token_buffer_push_byte(&token_buffer, '\\');
12017 pm_token_buffer_push_byte(&token_buffer, '\n');
12018 token_buffer.cursor = parser->current.end + 1;
12019 breakpoint = parser->current.end;
12020 continue;
12021 default:
12022 pm_token_buffer_push_byte(&token_buffer, '\\');
12023 pm_token_buffer_push_escaped(&token_buffer, parser);
12024 break;
12025 }
12026 } else {
12027 switch (peeked) {
12028 case '\r':
12029 parser->current.end++;
12030 if (peek(parser) != '\n') {
12031 pm_token_buffer_push_byte(&token_buffer, '\r');
12032 break;
12033 }
12035 case '\n':
12036 // If we are in a tilde here, we should
12037 // break out of the loop and return the
12038 // string content.
12039 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12040 const uint8_t *end = parser->current.end;
12041
12042 if (parser->heredoc_end == NULL) {
12043 pm_newline_list_append(&parser->newline_list, end);
12044 }
12045
12046 // Here we want the buffer to only
12047 // include up to the backslash.
12048 parser->current.end = breakpoint;
12049 pm_token_buffer_flush(parser, &token_buffer);
12050
12051 // Now we can advance the end of the
12052 // token past the newline.
12053 parser->current.end = end + 1;
12054 lex_mode->as.heredoc.line_continuation = true;
12055 LEX(PM_TOKEN_STRING_CONTENT);
12056 }
12057
12058 was_line_continuation = true;
12059 token_buffer.cursor = parser->current.end + 1;
12060 breakpoint = parser->current.end;
12061 continue;
12062 default:
12063 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12064 break;
12065 }
12066 }
12067
12068 token_buffer.cursor = parser->current.end;
12069 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12070 break;
12071 }
12072 case '#': {
12073 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12074
12075 if (type == PM_TOKEN_NOT_PROVIDED) {
12076 // If we haven't returned at this point then we had
12077 // something that looked like an interpolated class
12078 // or instance variable like "#@" but wasn't
12079 // actually. In this case we'll just skip to the
12080 // next breakpoint.
12081 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12082 break;
12083 }
12084
12085 if (type == PM_TOKEN_STRING_CONTENT) {
12086 pm_token_buffer_flush(parser, &token_buffer);
12087 }
12088
12089 LEX(type);
12090 }
12091 default:
12092 assert(false && "unreachable");
12093 }
12094
12095 was_line_continuation = false;
12096 }
12097
12098 if (parser->current.end > parser->current.start) {
12099 parser->current.end = parser->end;
12100 pm_token_buffer_flush(parser, &token_buffer);
12101 LEX(PM_TOKEN_STRING_CONTENT);
12102 }
12103
12104 // If we've hit the end of the string, then this is an unterminated
12105 // heredoc. In that case we'll return a string content token.
12106 parser->current.end = parser->end;
12107 pm_token_buffer_flush(parser, &token_buffer);
12108 LEX(PM_TOKEN_STRING_CONTENT);
12109 }
12110 }
12111
12112 assert(false && "unreachable");
12113}
12114
12115#undef LEX
12116
12117/******************************************************************************/
12118/* Parse functions */
12119/******************************************************************************/
12120
12129typedef enum {
12130 PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
12131 PM_BINDING_POWER_STATEMENT = 2,
12132 PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
12133 PM_BINDING_POWER_MODIFIER = 6, // if unless until while
12134 PM_BINDING_POWER_COMPOSITION = 8, // and or
12135 PM_BINDING_POWER_NOT = 10, // not
12136 PM_BINDING_POWER_MATCH = 12, // => in
12137 PM_BINDING_POWER_DEFINED = 14, // defined?
12138 PM_BINDING_POWER_MULTI_ASSIGNMENT = 16, // =
12139 PM_BINDING_POWER_ASSIGNMENT = 18, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
12140 PM_BINDING_POWER_TERNARY = 20, // ?:
12141 PM_BINDING_POWER_RANGE = 22, // .. ...
12142 PM_BINDING_POWER_LOGICAL_OR = 24, // ||
12143 PM_BINDING_POWER_LOGICAL_AND = 26, // &&
12144 PM_BINDING_POWER_EQUALITY = 28, // <=> == === != =~ !~
12145 PM_BINDING_POWER_COMPARISON = 30, // > >= < <=
12146 PM_BINDING_POWER_BITWISE_OR = 32, // | ^
12147 PM_BINDING_POWER_BITWISE_AND = 34, // &
12148 PM_BINDING_POWER_SHIFT = 36, // << >>
12149 PM_BINDING_POWER_TERM = 38, // + -
12150 PM_BINDING_POWER_FACTOR = 40, // * / %
12151 PM_BINDING_POWER_UMINUS = 42, // -@
12152 PM_BINDING_POWER_EXPONENT = 44, // **
12153 PM_BINDING_POWER_UNARY = 46, // ! ~ +@
12154 PM_BINDING_POWER_INDEX = 48, // [] []=
12155 PM_BINDING_POWER_CALL = 50, // :: .
12156 PM_BINDING_POWER_MAX = 52
12157} pm_binding_power_t;
12158
12163typedef struct {
12165 pm_binding_power_t left;
12166
12168 pm_binding_power_t right;
12169
12172
12179
12180#define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
12181#define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
12182#define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
12183#define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
12184#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
12185
12186pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
12187 // rescue
12188 [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = { PM_BINDING_POWER_MODIFIER_RESCUE, PM_BINDING_POWER_COMPOSITION, true, false },
12189
12190 // if unless until while
12191 [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12192 [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12193 [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12194 [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12195
12196 // and or
12197 [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12198 [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12199
12200 // => in
12201 [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12202 [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12203
12204 // &&= &= ^= = >>= <<= -= %= |= ||= += /= *= **=
12205 [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12206 [PM_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12207 [PM_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
12208 [PM_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT,
12209 [PM_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT,
12210 [PM_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12211 [PM_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12212 [PM_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT,
12213 [PM_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12214 [PM_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12215 [PM_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12216 [PM_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT,
12217 [PM_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12218 [PM_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12219
12220 // ?:
12221 [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
12222
12223 // .. ...
12224 [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12225 [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12226 [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12227 [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12228
12229 // ||
12230 [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
12231
12232 // &&
12233 [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
12234
12235 // != !~ == === =~ <=>
12236 [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12237 [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12238 [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12239 [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12240 [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12241 [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12242
12243 // > >= < <=
12244 [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12245 [PM_TOKEN_GREATER_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12246 [PM_TOKEN_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12247 [PM_TOKEN_LESS_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12248
12249 // ^ |
12250 [PM_TOKEN_CARET] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12251 [PM_TOKEN_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12252
12253 // &
12254 [PM_TOKEN_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_AND),
12255
12256 // >> <<
12257 [PM_TOKEN_GREATER_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12258 [PM_TOKEN_LESS_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12259
12260 // - +
12261 [PM_TOKEN_MINUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12262 [PM_TOKEN_PLUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12263
12264 // % / *
12265 [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12266 [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12267 [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12268 [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
12269
12270 // -@
12271 [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
12272 [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false, false },
12273
12274 // **
12275 [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
12276 [PM_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12277
12278 // ! ~ +@
12279 [PM_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12280 [PM_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12281 [PM_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12282
12283 // [
12284 [PM_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_INDEX),
12285
12286 // :: . &.
12287 [PM_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
12288 [PM_TOKEN_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
12289 [PM_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL)
12290};
12291
12292#undef BINDING_POWER_ASSIGNMENT
12293#undef LEFT_ASSOCIATIVE
12294#undef RIGHT_ASSOCIATIVE
12295#undef RIGHT_ASSOCIATIVE_UNARY
12296
12300static inline bool
12301match1(const pm_parser_t *parser, pm_token_type_t type) {
12302 return parser->current.type == type;
12303}
12304
12308static inline bool
12309match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
12310 return match1(parser, type1) || match1(parser, type2);
12311}
12312
12316static inline bool
12317match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
12318 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
12319}
12320
12324static inline bool
12325match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
12326 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
12327}
12328
12332static inline bool
12333match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
12334 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
12335}
12336
12340static inline bool
12341match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
12342 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
12343}
12344
12351static bool
12352accept1(pm_parser_t *parser, pm_token_type_t type) {
12353 if (match1(parser, type)) {
12354 parser_lex(parser);
12355 return true;
12356 }
12357 return false;
12358}
12359
12364static inline bool
12365accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
12366 if (match2(parser, type1, type2)) {
12367 parser_lex(parser);
12368 return true;
12369 }
12370 return false;
12371}
12372
12384static void
12385expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
12386 if (accept1(parser, type)) return;
12387
12388 const uint8_t *location = parser->previous.end;
12389 pm_parser_err(parser, location, location, diag_id);
12390
12391 parser->previous.start = location;
12392 parser->previous.type = PM_TOKEN_MISSING;
12393}
12394
12399static void
12400expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_diagnostic_id_t diag_id) {
12401 if (accept2(parser, type1, type2)) return;
12402
12403 const uint8_t *location = parser->previous.end;
12404 pm_parser_err(parser, location, location, diag_id);
12405
12406 parser->previous.start = location;
12407 parser->previous.type = PM_TOKEN_MISSING;
12408}
12409
12414static void
12415expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
12416 if (match1(parser, PM_TOKEN_HEREDOC_END)) {
12417 parser_lex(parser);
12418 } else {
12419 pm_parser_err_heredoc_term(parser, ident_start, ident_length);
12420 parser->previous.start = parser->previous.end;
12421 parser->previous.type = PM_TOKEN_MISSING;
12422 }
12423}
12424
12425static pm_node_t *
12426parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth);
12427
12432static pm_node_t *
12433parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
12434 pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
12435 pm_assert_value_expression(parser, node);
12436 return node;
12437}
12438
12457static inline bool
12458token_begins_expression_p(pm_token_type_t type) {
12459 switch (type) {
12460 case PM_TOKEN_EQUAL_GREATER:
12461 case PM_TOKEN_KEYWORD_IN:
12462 // We need to special case this because it is a binary operator that
12463 // should not be marked as beginning an expression.
12464 return false;
12465 case PM_TOKEN_BRACE_RIGHT:
12466 case PM_TOKEN_BRACKET_RIGHT:
12467 case PM_TOKEN_COLON:
12468 case PM_TOKEN_COMMA:
12469 case PM_TOKEN_EMBEXPR_END:
12470 case PM_TOKEN_EOF:
12471 case PM_TOKEN_LAMBDA_BEGIN:
12472 case PM_TOKEN_KEYWORD_DO:
12473 case PM_TOKEN_KEYWORD_DO_LOOP:
12474 case PM_TOKEN_KEYWORD_END:
12475 case PM_TOKEN_KEYWORD_ELSE:
12476 case PM_TOKEN_KEYWORD_ELSIF:
12477 case PM_TOKEN_KEYWORD_ENSURE:
12478 case PM_TOKEN_KEYWORD_THEN:
12479 case PM_TOKEN_KEYWORD_RESCUE:
12480 case PM_TOKEN_KEYWORD_WHEN:
12481 case PM_TOKEN_NEWLINE:
12482 case PM_TOKEN_PARENTHESIS_RIGHT:
12483 case PM_TOKEN_SEMICOLON:
12484 // The reason we need this short-circuit is because we're using the
12485 // binding powers table to tell us if the subsequent token could
12486 // potentially be the start of an expression. If there _is_ a binding
12487 // power for one of these tokens, then we should remove it from this list
12488 // and let it be handled by the default case below.
12489 assert(pm_binding_powers[type].left == PM_BINDING_POWER_UNSET);
12490 return false;
12491 case PM_TOKEN_UAMPERSAND:
12492 // This is a special case because this unary operator cannot appear
12493 // as a general operator, it only appears in certain circumstances.
12494 return false;
12495 case PM_TOKEN_UCOLON_COLON:
12496 case PM_TOKEN_UMINUS:
12497 case PM_TOKEN_UMINUS_NUM:
12498 case PM_TOKEN_UPLUS:
12499 case PM_TOKEN_BANG:
12500 case PM_TOKEN_TILDE:
12501 case PM_TOKEN_UDOT_DOT:
12502 case PM_TOKEN_UDOT_DOT_DOT:
12503 // These unary tokens actually do have binding power associated with them
12504 // so that we can correctly place them into the precedence order. But we
12505 // want them to be marked as beginning an expression, so we need to
12506 // special case them here.
12507 return true;
12508 default:
12509 return pm_binding_powers[type].left == PM_BINDING_POWER_UNSET;
12510 }
12511}
12512
12517static pm_node_t *
12518parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
12519 if (accept1(parser, PM_TOKEN_USTAR)) {
12520 pm_token_t operator = parser->previous;
12521 pm_node_t *expression = parse_value_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
12522 return UP(pm_splat_node_create(parser, &operator, expression));
12523 }
12524
12525 return parse_value_expression(parser, binding_power, accepts_command_call, false, diag_id, depth);
12526}
12527
12528static bool
12529pm_node_unreference_each(const pm_node_t *node, void *data) {
12530 switch (PM_NODE_TYPE(node)) {
12531 /* When we are about to destroy a set of nodes that could potentially
12532 * contain block exits for the current scope, we need to check if they
12533 * are contained in the list of block exits and remove them if they are.
12534 */
12535 case PM_BREAK_NODE:
12536 case PM_NEXT_NODE:
12537 case PM_REDO_NODE: {
12538 pm_parser_t *parser = (pm_parser_t *) data;
12539 size_t index = 0;
12540
12541 while (index < parser->current_block_exits->size) {
12542 pm_node_t *block_exit = parser->current_block_exits->nodes[index];
12543
12544 if (block_exit == node) {
12545 if (index + 1 < parser->current_block_exits->size) {
12546 memmove(
12547 &parser->current_block_exits->nodes[index],
12548 &parser->current_block_exits->nodes[index + 1],
12549 (parser->current_block_exits->size - index - 1) * sizeof(pm_node_t *)
12550 );
12551 }
12552 parser->current_block_exits->size--;
12553
12554 /* Note returning true here because these nodes could have
12555 * arguments that are themselves block exits. */
12556 return true;
12557 }
12558
12559 index++;
12560 }
12561
12562 return true;
12563 }
12564 /* When an implicit local variable is written to or targeted, it becomes
12565 * a regular, named local variable. This branch removes it from the list
12566 * of implicit parameters when that happens. */
12567 case PM_LOCAL_VARIABLE_READ_NODE:
12568 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12569 pm_parser_t *parser = (pm_parser_t *) data;
12570 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
12571
12572 for (size_t index = 0; index < implicit_parameters->size; index++) {
12573 if (implicit_parameters->nodes[index] == node) {
12574 /* If the node is not the last one in the list, we need to
12575 * shift the remaining nodes down to fill the gap. This is
12576 * extremely unlikely to happen. */
12577 if (index != implicit_parameters->size - 1) {
12578 memmove(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
12579 }
12580
12581 implicit_parameters->size--;
12582 break;
12583 }
12584 }
12585
12586 return false;
12587 }
12588 default:
12589 return true;
12590 }
12591}
12592
12598static void
12599pm_node_unreference(pm_parser_t *parser, const pm_node_t *node) {
12600 pm_visit_node(node, pm_node_unreference_each, parser);
12601}
12602
12607static void
12608parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
12609 // The method name needs to change. If we previously had
12610 // foo, we now need foo=. In this case we'll allocate a new
12611 // owned string, copy the previous method name in, and
12612 // append an =.
12613 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
12614 size_t length = constant->length;
12615 uint8_t *name = xcalloc(length + 1, sizeof(uint8_t));
12616 if (name == NULL) return;
12617
12618 memcpy(name, constant->start, length);
12619 name[length] = '=';
12620
12621 // Now switch the name to the new string.
12622 // This silences clang analyzer warning about leak of memory pointed by `name`.
12623 // NOLINTNEXTLINE(clang-analyzer-*)
12624 *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
12625}
12626
12633static pm_node_t *
12634parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
12635 switch (PM_NODE_TYPE(target)) {
12636 case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
12637 case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
12638 case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
12639 case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
12640 case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
12641 case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
12642 case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
12643 default: break;
12644 }
12645
12646 pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
12647 pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
12648
12649 pm_node_destroy(parser, target);
12650 return UP(result);
12651}
12652
12661static pm_node_t *
12662parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
12663 switch (PM_NODE_TYPE(target)) {
12664 case PM_MISSING_NODE:
12665 return target;
12666 case PM_SOURCE_ENCODING_NODE:
12667 case PM_FALSE_NODE:
12668 case PM_SOURCE_FILE_NODE:
12669 case PM_SOURCE_LINE_NODE:
12670 case PM_NIL_NODE:
12671 case PM_SELF_NODE:
12672 case PM_TRUE_NODE: {
12673 // In these special cases, we have specific error messages and we
12674 // will replace them with local variable writes.
12675 return parse_unwriteable_target(parser, target);
12676 }
12677 case PM_CLASS_VARIABLE_READ_NODE:
12679 target->type = PM_CLASS_VARIABLE_TARGET_NODE;
12680 return target;
12681 case PM_CONSTANT_PATH_NODE:
12682 if (context_def_p(parser)) {
12683 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
12684 }
12685
12687 target->type = PM_CONSTANT_PATH_TARGET_NODE;
12688
12689 return target;
12690 case PM_CONSTANT_READ_NODE:
12691 if (context_def_p(parser)) {
12692 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
12693 }
12694
12695 assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
12696 target->type = PM_CONSTANT_TARGET_NODE;
12697
12698 return target;
12699 case PM_BACK_REFERENCE_READ_NODE:
12700 case PM_NUMBERED_REFERENCE_READ_NODE:
12701 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
12702 return target;
12703 case PM_GLOBAL_VARIABLE_READ_NODE:
12705 target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
12706 return target;
12707 case PM_LOCAL_VARIABLE_READ_NODE: {
12708 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
12709 PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
12710 pm_node_unreference(parser, target);
12711 }
12712
12713 const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
12714 uint32_t name = cast->name;
12715 uint32_t depth = cast->depth;
12716 pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
12717
12719 target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
12720
12721 return target;
12722 }
12723 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12724 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
12725 pm_node_t *node = UP(pm_local_variable_target_node_create(parser, &target->location, name, 0));
12726
12727 pm_node_unreference(parser, target);
12728 pm_node_destroy(parser, target);
12729
12730 return node;
12731 }
12732 case PM_INSTANCE_VARIABLE_READ_NODE:
12734 target->type = PM_INSTANCE_VARIABLE_TARGET_NODE;
12735 return target;
12736 case PM_MULTI_TARGET_NODE:
12737 if (splat_parent) {
12738 // Multi target is not accepted in all positions. If this is one
12739 // of them, then we need to add an error.
12740 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
12741 }
12742
12743 return target;
12744 case PM_SPLAT_NODE: {
12745 pm_splat_node_t *splat = (pm_splat_node_t *) target;
12746
12747 if (splat->expression != NULL) {
12748 splat->expression = parse_target(parser, splat->expression, multiple, true);
12749 }
12750
12751 return UP(splat);
12752 }
12753 case PM_CALL_NODE: {
12754 pm_call_node_t *call = (pm_call_node_t *) target;
12755
12756 // If we have no arguments to the call node and we need this to be a
12757 // target then this is either a method call or a local variable
12758 // write.
12759 if (
12760 (call->message_loc.start != NULL) &&
12761 (call->message_loc.end[-1] != '!') &&
12762 (call->message_loc.end[-1] != '?') &&
12763 (call->opening_loc.start == NULL) &&
12764 (call->arguments == NULL) &&
12765 (call->block == NULL)
12766 ) {
12767 if (call->receiver == NULL) {
12768 // When we get here, we have a local variable write, because it
12769 // was previously marked as a method call but now we have an =.
12770 // This looks like:
12771 //
12772 // foo = 1
12773 //
12774 // When it was parsed in the prefix position, foo was seen as a
12775 // method call with no receiver and no arguments. Now we have an
12776 // =, so we know it's a local variable write.
12777 const pm_location_t message_loc = call->message_loc;
12778
12779 pm_constant_id_t name = pm_parser_local_add_location(parser, message_loc.start, message_loc.end, 0);
12780 pm_node_destroy(parser, target);
12781
12782 return UP(pm_local_variable_target_node_create(parser, &message_loc, name, 0));
12783 }
12784
12785 if (peek_at(parser, call->message_loc.start) == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
12786 if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
12787 pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
12788 }
12789
12790 parse_write_name(parser, &call->name);
12791 return UP(pm_call_target_node_create(parser, call));
12792 }
12793 }
12794
12795 // If there is no call operator and the message is "[]" then this is
12796 // an aref expression, and we can transform it into an aset
12797 // expression.
12798 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
12799 return UP(pm_index_target_node_create(parser, call));
12800 }
12801 }
12803 default:
12804 // In this case we have a node that we don't know how to convert
12805 // into a target. We need to treat it as an error. For now, we'll
12806 // mark it as an error and just skip right past it.
12807 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
12808 return target;
12809 }
12810}
12811
12816static pm_node_t *
12817parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
12818 pm_node_t *result = parse_target(parser, target, multiple, false);
12819
12820 // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
12821 // parens after the targets.
12822 if (
12823 !match1(parser, PM_TOKEN_EQUAL) &&
12824 !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
12825 !(context_p(parser, PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
12826 ) {
12827 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
12828 }
12829
12830 return result;
12831}
12832
12837static pm_node_t *
12838parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
12839 pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
12840
12841 if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
12842 return UP(pm_shareable_constant_node_create(parser, write, shareable_constant));
12843 }
12844
12845 return write;
12846}
12847
12851static pm_node_t *
12852parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
12853 switch (PM_NODE_TYPE(target)) {
12854 case PM_MISSING_NODE:
12855 pm_node_destroy(parser, value);
12856 return target;
12857 case PM_CLASS_VARIABLE_READ_NODE: {
12858 pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
12859 pm_node_destroy(parser, target);
12860 return UP(node);
12861 }
12862 case PM_CONSTANT_PATH_NODE: {
12863 pm_node_t *node = UP(pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value));
12864
12865 if (context_def_p(parser)) {
12866 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
12867 }
12868
12869 return parse_shareable_constant_write(parser, node);
12870 }
12871 case PM_CONSTANT_READ_NODE: {
12872 pm_node_t *node = UP(pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value));
12873
12874 if (context_def_p(parser)) {
12875 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
12876 }
12877
12878 pm_node_destroy(parser, target);
12879 return parse_shareable_constant_write(parser, node);
12880 }
12881 case PM_BACK_REFERENCE_READ_NODE:
12882 case PM_NUMBERED_REFERENCE_READ_NODE:
12883 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
12885 case PM_GLOBAL_VARIABLE_READ_NODE: {
12886 pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
12887 pm_node_destroy(parser, target);
12888 return UP(node);
12889 }
12890 case PM_LOCAL_VARIABLE_READ_NODE: {
12892
12893 pm_constant_id_t name = local_read->name;
12894 pm_location_t name_loc = target->location;
12895
12896 uint32_t depth = local_read->depth;
12897 pm_scope_t *scope = pm_parser_scope_find(parser, depth);
12898
12899 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
12900 pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
12901 PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
12902 pm_node_unreference(parser, target);
12903 }
12904
12905 pm_locals_unread(&scope->locals, name);
12906 pm_node_destroy(parser, target);
12907
12908 return UP(pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator));
12909 }
12910 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12911 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
12912 pm_node_t *node = UP(pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator));
12913
12914 pm_node_unreference(parser, target);
12915 pm_node_destroy(parser, target);
12916
12917 return node;
12918 }
12919 case PM_INSTANCE_VARIABLE_READ_NODE: {
12920 pm_node_t *write_node = UP(pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value));
12921 pm_node_destroy(parser, target);
12922 return write_node;
12923 }
12924 case PM_MULTI_TARGET_NODE:
12925 return UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value));
12926 case PM_SPLAT_NODE: {
12927 pm_splat_node_t *splat = (pm_splat_node_t *) target;
12928
12929 if (splat->expression != NULL) {
12930 splat->expression = parse_write(parser, splat->expression, operator, value);
12931 }
12932
12933 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
12934 pm_multi_target_node_targets_append(parser, multi_target, UP(splat));
12935
12936 return UP(pm_multi_write_node_create(parser, multi_target, operator, value));
12937 }
12938 case PM_CALL_NODE: {
12939 pm_call_node_t *call = (pm_call_node_t *) target;
12940
12941 // If we have no arguments to the call node and we need this to be a
12942 // target then this is either a method call or a local variable
12943 // write.
12944 if (
12945 (call->message_loc.start != NULL) &&
12946 (call->message_loc.end[-1] != '!') &&
12947 (call->message_loc.end[-1] != '?') &&
12948 (call->opening_loc.start == NULL) &&
12949 (call->arguments == NULL) &&
12950 (call->block == NULL)
12951 ) {
12952 if (call->receiver == NULL) {
12953 // When we get here, we have a local variable write, because it
12954 // was previously marked as a method call but now we have an =.
12955 // This looks like:
12956 //
12957 // foo = 1
12958 //
12959 // When it was parsed in the prefix position, foo was seen as a
12960 // method call with no receiver and no arguments. Now we have an
12961 // =, so we know it's a local variable write.
12962 const pm_location_t message = call->message_loc;
12963
12964 pm_parser_local_add_location(parser, message.start, message.end, 0);
12965 pm_node_destroy(parser, target);
12966
12967 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end);
12968 target = UP(pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator));
12969
12970 pm_refute_numbered_parameter(parser, message.start, message.end);
12971 return target;
12972 }
12973
12974 if (char_is_identifier_start(parser, call->message_loc.start, parser->end - call->message_loc.start)) {
12975 // When we get here, we have a method call, because it was
12976 // previously marked as a method call but now we have an =. This
12977 // looks like:
12978 //
12979 // foo.bar = 1
12980 //
12981 // When it was parsed in the prefix position, foo.bar was seen as a
12982 // method call with no arguments. Now we have an =, so we know it's
12983 // a method call with an argument. In this case we will create the
12984 // arguments node, parse the argument, and add it to the list.
12985 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
12986 call->arguments = arguments;
12987
12988 pm_arguments_node_arguments_append(arguments, value);
12989 call->base.location.end = arguments->base.location.end;
12990 call->equal_loc = PM_LOCATION_TOKEN_VALUE(operator);
12991
12992 parse_write_name(parser, &call->name);
12993 pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
12994
12995 return UP(call);
12996 }
12997 }
12998
12999 // If there is no call operator and the message is "[]" then this is
13000 // an aref expression, and we can transform it into an aset
13001 // expression.
13002 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13003 if (call->arguments == NULL) {
13004 call->arguments = pm_arguments_node_create(parser);
13005 }
13006
13007 pm_arguments_node_arguments_append(call->arguments, value);
13008 target->location.end = value->location.end;
13009
13010 // Replace the name with "[]=".
13011 call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
13012 call->equal_loc = PM_LOCATION_TOKEN_VALUE(operator);
13013
13014 // Ensure that the arguments for []= don't contain keywords
13015 pm_index_arguments_check(parser, call->arguments, call->block);
13016 pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13017
13018 return target;
13019 }
13020
13021 // If there are arguments on the call node, then it can't be a
13022 // method call ending with = or a local variable write, so it must
13023 // be a syntax error. In this case we'll fall through to our default
13024 // handling. We need to free the value that we parsed because there
13025 // is no way for us to attach it to the tree at this point.
13026 //
13027 // Since it is possible for the value to contain an implicit
13028 // parameter somewhere in its subtree, we need to walk it and remove
13029 // any implicit parameters from the list of implicit parameters for
13030 // the current scope.
13031 pm_node_unreference(parser, value);
13032 pm_node_destroy(parser, value);
13033 }
13035 default:
13036 // In this case we have a node that we don't know how to convert into a
13037 // target. We need to treat it as an error. For now, we'll mark it as an
13038 // error and just skip right past it.
13039 pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
13040 return target;
13041 }
13042}
13043
13050static pm_node_t *
13051parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t *equals, pm_node_t *value) {
13052 switch (PM_NODE_TYPE(target)) {
13053 case PM_SOURCE_ENCODING_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13054 case PM_FALSE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13055 case PM_SOURCE_FILE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13056 case PM_SOURCE_LINE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13057 case PM_NIL_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13058 case PM_SELF_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13059 case PM_TRUE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13060 default: break;
13061 }
13062
13063 pm_constant_id_t name = pm_parser_local_add_location(parser, target->location.start, target->location.end, 1);
13064 pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
13065
13066 pm_node_destroy(parser, target);
13067 return UP(result);
13068}
13069
13080static pm_node_t *
13081parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13082 bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13083
13084 pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13085 pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
13086
13087 while (accept1(parser, PM_TOKEN_COMMA)) {
13088 if (accept1(parser, PM_TOKEN_USTAR)) {
13089 // Here we have a splat operator. It can have a name or be
13090 // anonymous. It can be the final target or be in the middle if
13091 // there haven't been any others yet.
13092 if (has_rest) {
13093 pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
13094 }
13095
13096 pm_token_t star_operator = parser->previous;
13097 pm_node_t *name = NULL;
13098
13099 if (token_begins_expression_p(parser->current.type)) {
13100 name = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13101 name = parse_target(parser, name, true, true);
13102 }
13103
13104 pm_node_t *splat = UP(pm_splat_node_create(parser, &star_operator, name));
13105 pm_multi_target_node_targets_append(parser, result, splat);
13106 has_rest = true;
13107 } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13108 context_push(parser, PM_CONTEXT_MULTI_TARGET);
13109 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13110 target = parse_target(parser, target, true, false);
13111
13112 pm_multi_target_node_targets_append(parser, result, target);
13113 context_pop(parser);
13114 } else if (token_begins_expression_p(parser->current.type)) {
13115 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13116 target = parse_target(parser, target, true, false);
13117
13118 pm_multi_target_node_targets_append(parser, result, target);
13119 } else if (!match1(parser, PM_TOKEN_EOF)) {
13120 // If we get here, then we have a trailing , in a multi target node.
13121 // We'll add an implicit rest node to represent this.
13122 pm_node_t *rest = UP(pm_implicit_rest_node_create(parser, &parser->previous));
13123 pm_multi_target_node_targets_append(parser, result, rest);
13124 break;
13125 }
13126 }
13127
13128 return UP(result);
13129}
13130
13135static pm_node_t *
13136parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13137 pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
13138 accept1(parser, PM_TOKEN_NEWLINE);
13139
13140 // Ensure that we have either an = or a ) after the targets.
13141 if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
13142 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13143 }
13144
13145 return result;
13146}
13147
13151static pm_statements_node_t *
13152parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
13153 // First, skip past any optional terminators that might be at the beginning
13154 // of the statements.
13155 while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13156
13157 // If we have a terminator, then we can just return NULL.
13158 if (context_terminator(context, &parser->current)) return NULL;
13159
13160 pm_statements_node_t *statements = pm_statements_node_create(parser);
13161
13162 // At this point we know we have at least one statement, and that it
13163 // immediately follows the current token.
13164 context_push(parser, context);
13165
13166 while (true) {
13167 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
13168 pm_statements_node_body_append(parser, statements, node, true);
13169
13170 // If we're recovering from a syntax error, then we need to stop parsing
13171 // the statements now.
13172 if (parser->recovering) {
13173 // If this is the level of context where the recovery has happened,
13174 // then we can mark the parser as done recovering.
13175 if (context_terminator(context, &parser->current)) parser->recovering = false;
13176 break;
13177 }
13178
13179 // If we have a terminator, then we will parse all consecutive
13180 // terminators and then continue parsing the statements list.
13181 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13182 // If we have a terminator, then we will continue parsing the
13183 // statements list.
13184 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13185 if (context_terminator(context, &parser->current)) break;
13186
13187 // Now we can continue parsing the list of statements.
13188 continue;
13189 }
13190
13191 // At this point we have a list of statements that are not terminated by
13192 // a newline or semicolon. At this point we need to check if we're at
13193 // the end of the statements list. If we are, then we should break out
13194 // of the loop.
13195 if (context_terminator(context, &parser->current)) break;
13196
13197 // At this point, we have a syntax error, because the statement was not
13198 // terminated by a newline or semicolon, and we're not at the end of the
13199 // statements list. Ideally we should scan forward to determine if we
13200 // should insert a missing terminator or break out of parsing the
13201 // statements list at this point.
13202 //
13203 // We don't have that yet, so instead we'll do a more naive approach. If
13204 // we were unable to parse an expression, then we will skip past this
13205 // token and continue parsing the statements list. Otherwise we'll add
13206 // an error and continue parsing the statements list.
13207 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
13208 parser_lex(parser);
13209
13210 // If we are at the end of the file, then we need to stop parsing
13211 // the statements entirely at this point. Mark the parser as
13212 // recovering, as we know that EOF closes the top-level context, and
13213 // then break out of the loop.
13214 if (match1(parser, PM_TOKEN_EOF)) {
13215 parser->recovering = true;
13216 break;
13217 }
13218
13219 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13220 if (context_terminator(context, &parser->current)) break;
13221 } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
13222 // This is an inlined version of accept1 because the error that we
13223 // want to add has varargs. If this happens again, we should
13224 // probably extract a helper function.
13225 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
13226 parser->previous.start = parser->previous.end;
13227 parser->previous.type = PM_TOKEN_MISSING;
13228 }
13229 }
13230
13231 context_pop(parser);
13232 bool last_value = true;
13233 switch (context) {
13236 last_value = false;
13237 break;
13238 default:
13239 break;
13240 }
13241 pm_void_statements_check(parser, statements, last_value);
13242
13243 return statements;
13244}
13245
13250static void
13251pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13252 const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
13253
13254 if (duplicated != NULL) {
13255 pm_buffer_t buffer = { 0 };
13256 pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
13257
13258 pm_diagnostic_list_append_format(
13259 &parser->warning_list,
13260 duplicated->location.start,
13261 duplicated->location.end,
13262 PM_WARN_DUPLICATED_HASH_KEY,
13263 (int) pm_buffer_length(&buffer),
13264 pm_buffer_value(&buffer),
13265 pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
13266 );
13267
13268 pm_buffer_free(&buffer);
13269 }
13270}
13271
13276static void
13277pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13278 pm_node_t *previous;
13279
13280 if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
13281 pm_diagnostic_list_append_format(
13282 &parser->warning_list,
13283 node->location.start,
13284 node->location.end,
13285 PM_WARN_DUPLICATED_WHEN_CLAUSE,
13286 pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
13287 pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
13288 );
13289 }
13290}
13291
13295static bool
13296parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node, uint16_t depth) {
13297 assert(PM_NODE_TYPE_P(node, PM_HASH_NODE) || PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE));
13298 bool contains_keyword_splat = false;
13299
13300 while (true) {
13301 pm_node_t *element;
13302
13303 switch (parser->current.type) {
13304 case PM_TOKEN_USTAR_STAR: {
13305 parser_lex(parser);
13306 pm_token_t operator = parser->previous;
13307 pm_node_t *value = NULL;
13308
13309 if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
13310 // If we're about to parse a nested hash that is being
13311 // pushed into this hash directly with **, then we want the
13312 // inner hash to share the static literals with the outer
13313 // hash.
13314 parser->current_hash_keys = literals;
13315 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
13316 } else if (token_begins_expression_p(parser->current.type)) {
13317 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
13318 } else {
13319 pm_parser_scope_forwarding_keywords_check(parser, &operator);
13320 }
13321
13322 element = UP(pm_assoc_splat_node_create(parser, value, &operator));
13323 contains_keyword_splat = true;
13324 break;
13325 }
13326 case PM_TOKEN_LABEL: {
13327 pm_token_t label = parser->current;
13328 parser_lex(parser);
13329
13330 pm_node_t *key = UP(pm_symbol_node_label_create(parser, &label));
13331 pm_hash_key_static_literals_add(parser, literals, key);
13332
13333 pm_token_t operator = not_provided(parser);
13334 pm_node_t *value = NULL;
13335
13336 if (token_begins_expression_p(parser->current.type)) {
13337 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
13338 } else {
13339 if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
13340 pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
13341 value = UP(pm_constant_read_node_create(parser, &constant));
13342 } else {
13343 int depth = -1;
13344 pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
13345
13346 if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
13347 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
13348 } else {
13349 depth = pm_parser_local_depth(parser, &identifier);
13350 }
13351
13352 if (depth == -1) {
13353 value = UP(pm_call_node_variable_call_create(parser, &identifier));
13354 } else {
13355 value = UP(pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth));
13356 }
13357 }
13358
13359 value->location.end++;
13360 value = UP(pm_implicit_node_create(parser, value));
13361 }
13362
13363 element = UP(pm_assoc_node_create(parser, key, &operator, value));
13364 break;
13365 }
13366 default: {
13367 pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
13368
13369 // Hash keys that are strings are automatically frozen. We will
13370 // mark that here.
13371 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
13372 pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
13373 }
13374
13375 pm_hash_key_static_literals_add(parser, literals, key);
13376
13377 pm_token_t operator;
13378 if (pm_symbol_node_label_p(key)) {
13379 operator = not_provided(parser);
13380 } else {
13381 expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
13382 operator = parser->previous;
13383 }
13384
13385 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
13386 element = UP(pm_assoc_node_create(parser, key, &operator, value));
13387 break;
13388 }
13389 }
13390
13391 if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
13392 pm_hash_node_elements_append((pm_hash_node_t *) node, element);
13393 } else {
13394 pm_keyword_hash_node_elements_append((pm_keyword_hash_node_t *) node, element);
13395 }
13396
13397 // If there's no comma after the element, then we're done.
13398 if (!accept1(parser, PM_TOKEN_COMMA)) break;
13399
13400 // If the next element starts with a label or a **, then we know we have
13401 // another element in the hash, so we'll continue parsing.
13402 if (match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)) continue;
13403
13404 // Otherwise we need to check if the subsequent token begins an expression.
13405 // If it does, then we'll continue parsing.
13406 if (token_begins_expression_p(parser->current.type)) continue;
13407
13408 // Otherwise by default we will exit out of this loop.
13409 break;
13410 }
13411
13412 return contains_keyword_splat;
13413}
13414
13415static inline bool
13416argument_allowed_for_bare_hash(pm_parser_t *parser, pm_node_t *argument) {
13417 if (pm_symbol_node_label_p(argument)) {
13418 return true;
13419 }
13420
13421 switch (PM_NODE_TYPE(argument)) {
13422 case PM_CALL_NODE: {
13423 pm_call_node_t *cast = (pm_call_node_t *) argument;
13424 if (cast->opening_loc.start == NULL && cast->arguments != NULL) {
13425 if (PM_NODE_FLAG_P(cast->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS | PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
13426 return false;
13427 }
13428 if (cast->block != NULL) {
13429 return false;
13430 }
13431 }
13432 break;
13433 }
13434 default: break;
13435 }
13436 return accept1(parser, PM_TOKEN_EQUAL_GREATER);
13437}
13438
13442static inline void
13443parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
13444 if (arguments->arguments == NULL) {
13445 arguments->arguments = pm_arguments_node_create(parser);
13446 }
13447
13448 pm_arguments_node_arguments_append(arguments->arguments, argument);
13449}
13450
13454static void
13455parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
13456 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
13457
13458 // First we need to check if the next token is one that could be the start
13459 // of an argument. If it's not, then we can just return.
13460 if (
13461 match2(parser, terminator, PM_TOKEN_EOF) ||
13462 (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
13463 context_terminator(parser->current_context->context, &parser->current)
13464 ) {
13465 return;
13466 }
13467
13468 bool parsed_first_argument = false;
13469 bool parsed_bare_hash = false;
13470 bool parsed_block_argument = false;
13471 bool parsed_forwarding_arguments = false;
13472
13473 while (!match1(parser, PM_TOKEN_EOF)) {
13474 if (parsed_forwarding_arguments) {
13475 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
13476 }
13477
13478 pm_node_t *argument = NULL;
13479
13480 switch (parser->current.type) {
13481 case PM_TOKEN_USTAR_STAR:
13482 case PM_TOKEN_LABEL: {
13483 if (parsed_bare_hash) {
13484 pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
13485 }
13486
13487 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
13488 argument = UP(hash);
13489
13490 pm_static_literals_t hash_keys = { 0 };
13491 bool contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(hash), (uint16_t) (depth + 1));
13492
13493 parse_arguments_append(parser, arguments, argument);
13494
13495 pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13496 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13497 pm_node_flag_set(UP(arguments->arguments), flags);
13498
13499 pm_static_literals_free(&hash_keys);
13500 parsed_bare_hash = true;
13501
13502 break;
13503 }
13504 case PM_TOKEN_UAMPERSAND: {
13505 parser_lex(parser);
13506 pm_token_t operator = parser->previous;
13507 pm_node_t *expression = NULL;
13508
13509 if (token_begins_expression_p(parser->current.type)) {
13510 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
13511 } else {
13512 pm_parser_scope_forwarding_block_check(parser, &operator);
13513 }
13514
13515 argument = UP(pm_block_argument_node_create(parser, &operator, expression));
13516 if (parsed_block_argument) {
13517 parse_arguments_append(parser, arguments, argument);
13518 } else {
13519 arguments->block = argument;
13520 }
13521
13522 if (match1(parser, PM_TOKEN_COMMA)) {
13523 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
13524 }
13525
13526 parsed_block_argument = true;
13527 break;
13528 }
13529 case PM_TOKEN_USTAR: {
13530 parser_lex(parser);
13531 pm_token_t operator = parser->previous;
13532
13533 if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
13534 pm_parser_scope_forwarding_positionals_check(parser, &operator);
13535 argument = UP(pm_splat_node_create(parser, &operator, NULL));
13536 if (parsed_bare_hash) {
13537 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
13538 }
13539 } else {
13540 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
13541
13542 if (parsed_bare_hash) {
13543 pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
13544 }
13545
13546 argument = UP(pm_splat_node_create(parser, &operator, expression));
13547 }
13548
13549 parse_arguments_append(parser, arguments, argument);
13550 break;
13551 }
13552 case PM_TOKEN_UDOT_DOT_DOT: {
13553 if (accepts_forwarding) {
13554 parser_lex(parser);
13555
13556 if (token_begins_expression_p(parser->current.type)) {
13557 // If the token begins an expression then this ... was
13558 // not actually argument forwarding but was instead a
13559 // range.
13560 pm_token_t operator = parser->previous;
13561 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
13562
13563 // If we parse a range, we need to validate that we
13564 // didn't accidentally violate the nonassoc rules of the
13565 // ... operator.
13566 if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
13567 pm_range_node_t *range = (pm_range_node_t *) right;
13568 pm_parser_err(parser, range->operator_loc.start, range->operator_loc.end, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
13569 }
13570
13571 argument = UP(pm_range_node_create(parser, NULL, &operator, right));
13572 } else {
13573 pm_parser_scope_forwarding_all_check(parser, &parser->previous);
13574 if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
13575 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
13576 }
13577
13578 argument = UP(pm_forwarding_arguments_node_create(parser, &parser->previous));
13579 parse_arguments_append(parser, arguments, argument);
13580 pm_node_flag_set(UP(arguments->arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
13581 arguments->has_forwarding = true;
13582 parsed_forwarding_arguments = true;
13583 break;
13584 }
13585 }
13586 }
13588 default: {
13589 if (argument == NULL) {
13590 argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
13591 }
13592
13593 bool contains_keywords = false;
13594 bool contains_keyword_splat = false;
13595
13596 if (argument_allowed_for_bare_hash(parser, argument)){
13597 if (parsed_bare_hash) {
13598 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
13599 }
13600
13601 pm_token_t operator;
13602 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
13603 operator = parser->previous;
13604 } else {
13605 operator = not_provided(parser);
13606 }
13607
13608 pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
13609 contains_keywords = true;
13610
13611 // Create the set of static literals for this hash.
13612 pm_static_literals_t hash_keys = { 0 };
13613 pm_hash_key_static_literals_add(parser, &hash_keys, argument);
13614
13615 // Finish parsing the one we are part way through.
13616 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
13617 argument = UP(pm_assoc_node_create(parser, argument, &operator, value));
13618
13619 pm_keyword_hash_node_elements_append(bare_hash, argument);
13620 argument = UP(bare_hash);
13621
13622 // Then parse more if we have a comma
13623 if (accept1(parser, PM_TOKEN_COMMA) && (
13624 token_begins_expression_p(parser->current.type) ||
13625 match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
13626 )) {
13627 contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(bare_hash), (uint16_t) (depth + 1));
13628 }
13629
13630 pm_static_literals_free(&hash_keys);
13631 parsed_bare_hash = true;
13632 }
13633
13634 parse_arguments_append(parser, arguments, argument);
13635
13636 pm_node_flags_t flags = 0;
13637 if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13638 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13639 pm_node_flag_set(UP(arguments->arguments), flags);
13640
13641 break;
13642 }
13643 }
13644
13645 parsed_first_argument = true;
13646
13647 // If parsing the argument failed, we need to stop parsing arguments.
13648 if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
13649
13650 // If the terminator of these arguments is not EOF, then we have a
13651 // specific token we're looking for. In that case we can accept a
13652 // newline here because it is not functioning as a statement terminator.
13653 bool accepted_newline = false;
13654 if (terminator != PM_TOKEN_EOF) {
13655 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
13656 }
13657
13658 if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
13659 // If we previously were on a comma and we just parsed a bare hash,
13660 // then we want to continue parsing arguments. This is because the
13661 // comma was grabbed up by the hash parser.
13662 } else if (accept1(parser, PM_TOKEN_COMMA)) {
13663 // If there was a comma, then we need to check if we also accepted a
13664 // newline. If we did, then this is a syntax error.
13665 if (accepted_newline) {
13666 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
13667 }
13668
13669 // If this is a command call and an argument takes a block,
13670 // there can be no further arguments. For example,
13671 // `foo(bar 1 do end, 2)` should be rejected.
13672 if (PM_NODE_TYPE_P(argument, PM_CALL_NODE)) {
13673 pm_call_node_t *call = (pm_call_node_t *) argument;
13674 if (call->opening_loc.start == NULL && call->arguments != NULL && call->block != NULL) {
13675 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
13676 break;
13677 }
13678 }
13679 } else {
13680 // If there is no comma at the end of the argument list then we're
13681 // done parsing arguments and can break out of this loop.
13682 break;
13683 }
13684
13685 // If we hit the terminator, then that means we have a trailing comma so
13686 // we can accept that output as well.
13687 if (match1(parser, terminator)) break;
13688 }
13689}
13690
13702parse_required_destructured_parameter(pm_parser_t *parser) {
13703 expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
13704
13705 pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
13706 pm_multi_target_node_opening_set(node, &parser->previous);
13707
13708 do {
13709 pm_node_t *param;
13710
13711 // If we get here then we have a trailing comma, which isn't allowed in
13712 // the grammar. In other places, multi targets _do_ allow trailing
13713 // commas, so here we'll assume this is a mistake of the user not
13714 // knowing it's not allowed here.
13715 if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
13716 param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
13717 pm_multi_target_node_targets_append(parser, node, param);
13718 pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
13719 break;
13720 }
13721
13722 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13723 param = UP(parse_required_destructured_parameter(parser));
13724 } else if (accept1(parser, PM_TOKEN_USTAR)) {
13725 pm_token_t star = parser->previous;
13726 pm_node_t *value = NULL;
13727
13728 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
13729 pm_token_t name = parser->previous;
13730 value = UP(pm_required_parameter_node_create(parser, &name));
13731 if (pm_parser_parameter_name_check(parser, &name)) {
13732 pm_node_flag_set_repeated_parameter(value);
13733 }
13734 pm_parser_local_add_token(parser, &name, 1);
13735 }
13736
13737 param = UP(pm_splat_node_create(parser, &star, value));
13738 } else {
13739 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
13740 pm_token_t name = parser->previous;
13741
13742 param = UP(pm_required_parameter_node_create(parser, &name));
13743 if (pm_parser_parameter_name_check(parser, &name)) {
13744 pm_node_flag_set_repeated_parameter(param);
13745 }
13746 pm_parser_local_add_token(parser, &name, 1);
13747 }
13748
13749 pm_multi_target_node_targets_append(parser, node, param);
13750 } while (accept1(parser, PM_TOKEN_COMMA));
13751
13752 accept1(parser, PM_TOKEN_NEWLINE);
13753 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
13754 pm_multi_target_node_closing_set(node, &parser->previous);
13755
13756 return node;
13757}
13758
13763typedef enum {
13764 PM_PARAMETERS_NO_CHANGE = 0, // Extra state for tokens that should not change the state
13765 PM_PARAMETERS_ORDER_NOTHING_AFTER = 1,
13766 PM_PARAMETERS_ORDER_KEYWORDS_REST,
13767 PM_PARAMETERS_ORDER_KEYWORDS,
13768 PM_PARAMETERS_ORDER_REST,
13769 PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
13770 PM_PARAMETERS_ORDER_OPTIONAL,
13771 PM_PARAMETERS_ORDER_NAMED,
13772 PM_PARAMETERS_ORDER_NONE,
13773} pm_parameters_order_t;
13774
13778static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
13779 [0] = PM_PARAMETERS_NO_CHANGE,
13780 [PM_TOKEN_UAMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
13781 [PM_TOKEN_AMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
13782 [PM_TOKEN_UDOT_DOT_DOT] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
13783 [PM_TOKEN_IDENTIFIER] = PM_PARAMETERS_ORDER_NAMED,
13784 [PM_TOKEN_PARENTHESIS_LEFT] = PM_PARAMETERS_ORDER_NAMED,
13785 [PM_TOKEN_EQUAL] = PM_PARAMETERS_ORDER_OPTIONAL,
13786 [PM_TOKEN_LABEL] = PM_PARAMETERS_ORDER_KEYWORDS,
13787 [PM_TOKEN_USTAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
13788 [PM_TOKEN_STAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
13789 [PM_TOKEN_USTAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST,
13790 [PM_TOKEN_STAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST
13791};
13792
13800static bool
13801update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
13802 pm_parameters_order_t state = parameters_ordering[token->type];
13803 if (state == PM_PARAMETERS_NO_CHANGE) return true;
13804
13805 // If we see another ordered argument after a optional argument
13806 // we only continue parsing ordered arguments until we stop seeing ordered arguments.
13807 if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
13808 *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
13809 return true;
13810 } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
13811 return true;
13812 }
13813
13814 if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
13815 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
13816 return false;
13817 } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
13818 pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
13819 return false;
13820 } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
13821 // We know what transition we failed on, so we can provide a better error here.
13822 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
13823 return false;
13824 }
13825
13826 if (state < *current) *current = state;
13827 return true;
13828}
13829
13833static pm_parameters_node_t *
13834parse_parameters(
13835 pm_parser_t *parser,
13836 pm_binding_power_t binding_power,
13837 bool uses_parentheses,
13838 bool allows_trailing_comma,
13839 bool allows_forwarding_parameters,
13840 bool accepts_blocks_in_defaults,
13841 bool in_block,
13842 uint16_t depth
13843) {
13844 pm_do_loop_stack_push(parser, false);
13845
13846 pm_parameters_node_t *params = pm_parameters_node_create(parser);
13847 pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
13848
13849 while (true) {
13850 bool parsing = true;
13851
13852 switch (parser->current.type) {
13853 case PM_TOKEN_PARENTHESIS_LEFT: {
13854 update_parameter_state(parser, &parser->current, &order);
13855 pm_node_t *param = UP(parse_required_destructured_parameter(parser));
13856
13857 if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
13858 pm_parameters_node_requireds_append(params, param);
13859 } else {
13860 pm_parameters_node_posts_append(params, param);
13861 }
13862 break;
13863 }
13864 case PM_TOKEN_UAMPERSAND:
13865 case PM_TOKEN_AMPERSAND: {
13866 update_parameter_state(parser, &parser->current, &order);
13867 parser_lex(parser);
13868
13869 pm_token_t operator = parser->previous;
13870 pm_token_t name;
13871
13872 bool repeated = false;
13873 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
13874 name = parser->previous;
13875 repeated = pm_parser_parameter_name_check(parser, &name);
13876 pm_parser_local_add_token(parser, &name, 1);
13877 } else {
13878 name = not_provided(parser);
13879 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
13880 }
13881
13882 pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
13883 if (repeated) {
13884 pm_node_flag_set_repeated_parameter(UP(param));
13885 }
13886 if (params->block == NULL) {
13887 pm_parameters_node_block_set(params, param);
13888 } else {
13889 pm_parser_err_node(parser, UP(param), PM_ERR_PARAMETER_BLOCK_MULTI);
13890 pm_parameters_node_posts_append(params, UP(param));
13891 }
13892
13893 break;
13894 }
13895 case PM_TOKEN_UDOT_DOT_DOT: {
13896 if (!allows_forwarding_parameters) {
13897 pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
13898 }
13899
13900 bool succeeded = update_parameter_state(parser, &parser->current, &order);
13901 parser_lex(parser);
13902
13903 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
13904 pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
13905
13906 if (params->keyword_rest != NULL) {
13907 // If we already have a keyword rest parameter, then we replace it with the
13908 // forwarding parameter and move the keyword rest parameter to the posts list.
13909 pm_node_t *keyword_rest = params->keyword_rest;
13910 pm_parameters_node_posts_append(params, keyword_rest);
13911 if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
13912 params->keyword_rest = NULL;
13913 }
13914
13915 pm_parameters_node_keyword_rest_set(params, UP(param));
13916 break;
13917 }
13918 case PM_TOKEN_CLASS_VARIABLE:
13919 case PM_TOKEN_IDENTIFIER:
13920 case PM_TOKEN_CONSTANT:
13921 case PM_TOKEN_INSTANCE_VARIABLE:
13922 case PM_TOKEN_GLOBAL_VARIABLE:
13923 case PM_TOKEN_METHOD_NAME: {
13924 parser_lex(parser);
13925 switch (parser->previous.type) {
13926 case PM_TOKEN_CONSTANT:
13927 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
13928 break;
13929 case PM_TOKEN_INSTANCE_VARIABLE:
13930 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
13931 break;
13932 case PM_TOKEN_GLOBAL_VARIABLE:
13933 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
13934 break;
13935 case PM_TOKEN_CLASS_VARIABLE:
13936 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
13937 break;
13938 case PM_TOKEN_METHOD_NAME:
13939 pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
13940 break;
13941 default: break;
13942 }
13943
13944 if (parser->current.type == PM_TOKEN_EQUAL) {
13945 update_parameter_state(parser, &parser->current, &order);
13946 } else {
13947 update_parameter_state(parser, &parser->previous, &order);
13948 }
13949
13950 pm_token_t name = parser->previous;
13951 bool repeated = pm_parser_parameter_name_check(parser, &name);
13952 pm_parser_local_add_token(parser, &name, 1);
13953
13954 if (match1(parser, PM_TOKEN_EQUAL)) {
13955 pm_token_t operator = parser->current;
13956 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
13957 parser_lex(parser);
13958
13959 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
13960 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
13961
13962 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
13963 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
13964 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
13965
13966 pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
13967
13968 if (repeated) {
13969 pm_node_flag_set_repeated_parameter(UP(param));
13970 }
13971 pm_parameters_node_optionals_append(params, param);
13972
13973 // If the value of the parameter increased the number of
13974 // reads of that parameter, then we need to warn that we
13975 // have a circular definition.
13976 if ((parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
13977 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
13978 }
13979
13980 context_pop(parser);
13981
13982 // If parsing the value of the parameter resulted in error recovery,
13983 // then we can put a missing node in its place and stop parsing the
13984 // parameters entirely now.
13985 if (parser->recovering) {
13986 parsing = false;
13987 break;
13988 }
13989 } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
13990 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
13991 if (repeated) {
13992 pm_node_flag_set_repeated_parameter(UP(param));
13993 }
13994 pm_parameters_node_requireds_append(params, UP(param));
13995 } else {
13996 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
13997 if (repeated) {
13998 pm_node_flag_set_repeated_parameter(UP(param));
13999 }
14000 pm_parameters_node_posts_append(params, UP(param));
14001 }
14002
14003 break;
14004 }
14005 case PM_TOKEN_LABEL: {
14006 if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
14007 update_parameter_state(parser, &parser->current, &order);
14008
14009 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14010 parser_lex(parser);
14011
14012 pm_token_t name = parser->previous;
14013 pm_token_t local = name;
14014 local.end -= 1;
14015
14016 if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
14017 pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14018 } else if (local.end[-1] == '!' || local.end[-1] == '?') {
14019 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14020 }
14021
14022 bool repeated = pm_parser_parameter_name_check(parser, &local);
14023 pm_parser_local_add_token(parser, &local, 1);
14024
14025 switch (parser->current.type) {
14026 case PM_TOKEN_COMMA:
14027 case PM_TOKEN_PARENTHESIS_RIGHT:
14028 case PM_TOKEN_PIPE: {
14029 context_pop(parser);
14030
14031 pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14032 if (repeated) {
14033 pm_node_flag_set_repeated_parameter(param);
14034 }
14035
14036 pm_parameters_node_keywords_append(params, param);
14037 break;
14038 }
14039 case PM_TOKEN_SEMICOLON:
14040 case PM_TOKEN_NEWLINE: {
14041 context_pop(parser);
14042
14043 if (uses_parentheses) {
14044 parsing = false;
14045 break;
14046 }
14047
14048 pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14049 if (repeated) {
14050 pm_node_flag_set_repeated_parameter(param);
14051 }
14052
14053 pm_parameters_node_keywords_append(params, param);
14054 break;
14055 }
14056 default: {
14057 pm_node_t *param;
14058
14059 if (token_begins_expression_p(parser->current.type)) {
14060 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14061 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14062
14063 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14064 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
14065 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14066
14067 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14068 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
14069 }
14070
14071 param = UP(pm_optional_keyword_parameter_node_create(parser, &name, value));
14072 }
14073 else {
14074 param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14075 }
14076
14077 if (repeated) {
14078 pm_node_flag_set_repeated_parameter(param);
14079 }
14080
14081 context_pop(parser);
14082 pm_parameters_node_keywords_append(params, param);
14083
14084 // If parsing the value of the parameter resulted in error recovery,
14085 // then we can put a missing node in its place and stop parsing the
14086 // parameters entirely now.
14087 if (parser->recovering) {
14088 parsing = false;
14089 break;
14090 }
14091 }
14092 }
14093
14094 parser->in_keyword_arg = false;
14095 break;
14096 }
14097 case PM_TOKEN_USTAR:
14098 case PM_TOKEN_STAR: {
14099 update_parameter_state(parser, &parser->current, &order);
14100 parser_lex(parser);
14101
14102 pm_token_t operator = parser->previous;
14103 pm_token_t name;
14104 bool repeated = false;
14105
14106 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14107 name = parser->previous;
14108 repeated = pm_parser_parameter_name_check(parser, &name);
14109 pm_parser_local_add_token(parser, &name, 1);
14110 } else {
14111 name = not_provided(parser);
14112 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
14113 }
14114
14115 pm_node_t *param = UP(pm_rest_parameter_node_create(parser, &operator, &name));
14116 if (repeated) {
14117 pm_node_flag_set_repeated_parameter(param);
14118 }
14119
14120 if (params->rest == NULL) {
14121 pm_parameters_node_rest_set(params, param);
14122 } else {
14123 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
14124 pm_parameters_node_posts_append(params, param);
14125 }
14126
14127 break;
14128 }
14129 case PM_TOKEN_STAR_STAR:
14130 case PM_TOKEN_USTAR_STAR: {
14131 pm_parameters_order_t previous_order = order;
14132 update_parameter_state(parser, &parser->current, &order);
14133 parser_lex(parser);
14134
14135 pm_token_t operator = parser->previous;
14136 pm_node_t *param;
14137
14138 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14139 if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14140 pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14141 }
14142
14143 param = UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
14144 } else {
14145 pm_token_t name;
14146
14147 bool repeated = false;
14148 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14149 name = parser->previous;
14150 repeated = pm_parser_parameter_name_check(parser, &name);
14151 pm_parser_local_add_token(parser, &name, 1);
14152 } else {
14153 name = not_provided(parser);
14154 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
14155 }
14156
14157 param = UP(pm_keyword_rest_parameter_node_create(parser, &operator, &name));
14158 if (repeated) {
14159 pm_node_flag_set_repeated_parameter(param);
14160 }
14161 }
14162
14163 if (params->keyword_rest == NULL) {
14164 pm_parameters_node_keyword_rest_set(params, param);
14165 } else {
14166 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
14167 pm_parameters_node_posts_append(params, param);
14168 }
14169
14170 break;
14171 }
14172 default:
14173 if (parser->previous.type == PM_TOKEN_COMMA) {
14174 if (allows_trailing_comma && order >= PM_PARAMETERS_ORDER_NAMED) {
14175 // If we get here, then we have a trailing comma in a
14176 // block parameter list.
14177 pm_node_t *param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
14178
14179 if (params->rest == NULL) {
14180 pm_parameters_node_rest_set(params, param);
14181 } else {
14182 pm_parser_err_node(parser, UP(param), PM_ERR_PARAMETER_SPLAT_MULTI);
14183 pm_parameters_node_posts_append(params, UP(param));
14184 }
14185 } else {
14186 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14187 }
14188 }
14189
14190 parsing = false;
14191 break;
14192 }
14193
14194 // If we hit some kind of issue while parsing the parameter, this would
14195 // have been set to false. In that case, we need to break out of the
14196 // loop.
14197 if (!parsing) break;
14198
14199 bool accepted_newline = false;
14200 if (uses_parentheses) {
14201 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14202 }
14203
14204 if (accept1(parser, PM_TOKEN_COMMA)) {
14205 // If there was a comma, but we also accepted a newline, then this
14206 // is a syntax error.
14207 if (accepted_newline) {
14208 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14209 }
14210 } else {
14211 // If there was no comma, then we're done parsing parameters.
14212 break;
14213 }
14214 }
14215
14216 pm_do_loop_stack_pop(parser);
14217
14218 // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
14219 if (params->base.location.start == params->base.location.end) {
14220 pm_node_destroy(parser, UP(params));
14221 return NULL;
14222 }
14223
14224 return params;
14225}
14226
14231static size_t
14232token_newline_index(const pm_parser_t *parser) {
14233 if (parser->heredoc_end == NULL) {
14234 // This is the common case. In this case we can look at the previously
14235 // recorded newline in the newline list and subtract from the current
14236 // offset.
14237 return parser->newline_list.size - 1;
14238 } else {
14239 // This is unlikely. This is the case that we have already parsed the
14240 // start of a heredoc, so we cannot rely on looking at the previous
14241 // offset of the newline list, and instead must go through the whole
14242 // process of a binary search for the line number.
14243 return (size_t) pm_newline_list_line(&parser->newline_list, parser->current.start, 0);
14244 }
14245}
14246
14251static int64_t
14252token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
14253 const uint8_t *cursor = parser->start + parser->newline_list.offsets[newline_index];
14254 const uint8_t *end = token->start;
14255
14256 // Skip over the BOM if it is present.
14257 if (
14258 newline_index == 0 &&
14259 parser->start[0] == 0xef &&
14260 parser->start[1] == 0xbb &&
14261 parser->start[2] == 0xbf
14262 ) cursor += 3;
14263
14264 int64_t column = 0;
14265 for (; cursor < end; cursor++) {
14266 switch (*cursor) {
14267 case '\t':
14268 column = ((column / PM_TAB_WHITESPACE_SIZE) + 1) * PM_TAB_WHITESPACE_SIZE;
14269 break;
14270 case ' ':
14271 column++;
14272 break;
14273 default:
14274 column++;
14275 if (break_on_non_space) return -1;
14276 break;
14277 }
14278 }
14279
14280 return column;
14281}
14282
14287static void
14288parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening_token, bool if_after_else, bool allow_indent) {
14289 // If these warnings are disabled (unlikely), then we can just return.
14290 if (!parser->warn_mismatched_indentation) return;
14291
14292 // If the tokens are on the same line, we do not warn.
14293 size_t closing_newline_index = token_newline_index(parser);
14294 if (opening_newline_index == closing_newline_index) return;
14295
14296 // If the opening token has anything other than spaces or tabs before it,
14297 // then we do not warn. This is unless we are matching up an `if`/`end` pair
14298 // and the `if` immediately follows an `else` keyword.
14299 int64_t opening_column = token_column(parser, opening_newline_index, opening_token, !if_after_else);
14300 if (!if_after_else && (opening_column == -1)) return;
14301
14302 // Get a reference to the closing token off the current parser. This assumes
14303 // that the caller has placed this in the correct position.
14304 pm_token_t *closing_token = &parser->current;
14305
14306 // If the tokens are at the same indentation, we do not warn.
14307 int64_t closing_column = token_column(parser, closing_newline_index, closing_token, true);
14308 if ((closing_column == -1) || (opening_column == closing_column)) return;
14309
14310 // If the closing column is greater than the opening column and we are
14311 // allowing indentation, then we do not warn.
14312 if (allow_indent && (closing_column > opening_column)) return;
14313
14314 // Otherwise, add a warning.
14315 PM_PARSER_WARN_FORMAT(
14316 parser,
14317 closing_token->start,
14318 closing_token->end,
14319 PM_WARN_INDENTATION_MISMATCH,
14320 (int) (closing_token->end - closing_token->start),
14321 (const char *) closing_token->start,
14322 (int) (opening_token->end - opening_token->start),
14323 (const char *) opening_token->start,
14324 ((int32_t) opening_newline_index) + parser->start_line
14325 );
14326}
14327
14328typedef enum {
14329 PM_RESCUES_BEGIN = 1,
14330 PM_RESCUES_BLOCK,
14331 PM_RESCUES_CLASS,
14332 PM_RESCUES_DEF,
14333 PM_RESCUES_LAMBDA,
14334 PM_RESCUES_MODULE,
14335 PM_RESCUES_SCLASS
14336} pm_rescues_type_t;
14337
14342static inline void
14343parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
14344 pm_rescue_node_t *current = NULL;
14345
14346 while (match1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
14347 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14348 parser_lex(parser);
14349
14350 pm_rescue_node_t *rescue = pm_rescue_node_create(parser, &parser->previous);
14351
14352 switch (parser->current.type) {
14353 case PM_TOKEN_EQUAL_GREATER: {
14354 // Here we have an immediate => after the rescue keyword, in which case
14355 // we're going to have an empty list of exceptions to rescue (which
14356 // implies StandardError).
14357 parser_lex(parser);
14358 pm_rescue_node_operator_set(rescue, &parser->previous);
14359
14360 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
14361 reference = parse_target(parser, reference, false, false);
14362
14363 pm_rescue_node_reference_set(rescue, reference);
14364 break;
14365 }
14366 case PM_TOKEN_NEWLINE:
14367 case PM_TOKEN_SEMICOLON:
14368 case PM_TOKEN_KEYWORD_THEN:
14369 // Here we have a terminator for the rescue keyword, in which
14370 // case we're going to just continue on.
14371 break;
14372 default: {
14373 if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
14374 // Here we have something that could be an exception expression, so
14375 // we'll attempt to parse it here and any others delimited by commas.
14376
14377 do {
14378 pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
14379 pm_rescue_node_exceptions_append(rescue, expression);
14380
14381 // If we hit a newline, then this is the end of the rescue expression. We
14382 // can continue on to parse the statements.
14383 if (match3(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_THEN)) break;
14384
14385 // If we hit a `=>` then we're going to parse the exception variable. Once
14386 // we've done that, we'll break out of the loop and parse the statements.
14387 if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
14388 pm_rescue_node_operator_set(rescue, &parser->previous);
14389
14390 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
14391 reference = parse_target(parser, reference, false, false);
14392
14393 pm_rescue_node_reference_set(rescue, reference);
14394 break;
14395 }
14396 } while (accept1(parser, PM_TOKEN_COMMA));
14397 }
14398 }
14399 }
14400
14401 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
14402 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
14403 rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
14404 }
14405 } else {
14406 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
14407 rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
14408 }
14409
14410 if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
14411 pm_accepts_block_stack_push(parser, true);
14412 pm_context_t context;
14413
14414 switch (type) {
14415 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_RESCUE; break;
14416 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_RESCUE; break;
14417 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_RESCUE; break;
14418 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_RESCUE; break;
14419 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_RESCUE; break;
14420 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_RESCUE; break;
14421 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_RESCUE; break;
14422 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
14423 }
14424
14425 pm_statements_node_t *statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14426 if (statements != NULL) pm_rescue_node_statements_set(rescue, statements);
14427
14428 pm_accepts_block_stack_pop(parser);
14429 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14430 }
14431
14432 if (current == NULL) {
14433 pm_begin_node_rescue_clause_set(parent_node, rescue);
14434 } else {
14435 pm_rescue_node_subsequent_set(current, rescue);
14436 }
14437
14438 current = rescue;
14439 }
14440
14441 // The end node locations on rescue nodes will not be set correctly
14442 // since we won't know the end until we've found all subsequent
14443 // clauses. This sets the end location on all rescues once we know it.
14444 if (current != NULL) {
14445 const uint8_t *end_to_set = current->base.location.end;
14446 pm_rescue_node_t *clause = parent_node->rescue_clause;
14447
14448 while (clause != NULL) {
14449 clause->base.location.end = end_to_set;
14450 clause = clause->subsequent;
14451 }
14452 }
14453
14454 pm_token_t else_keyword;
14455 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
14456 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14457 opening_newline_index = token_newline_index(parser);
14458
14459 else_keyword = parser->current;
14460 opening = &else_keyword;
14461
14462 parser_lex(parser);
14463 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14464
14465 pm_statements_node_t *else_statements = NULL;
14466 if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
14467 pm_accepts_block_stack_push(parser, true);
14468 pm_context_t context;
14469
14470 switch (type) {
14471 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ELSE; break;
14472 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ELSE; break;
14473 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ELSE; break;
14474 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ELSE; break;
14475 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
14476 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
14477 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
14478 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
14479 }
14480
14481 else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14482 pm_accepts_block_stack_pop(parser);
14483
14484 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14485 }
14486
14487 pm_else_node_t *else_clause = pm_else_node_create(parser, &else_keyword, else_statements, &parser->current);
14488 pm_begin_node_else_clause_set(parent_node, else_clause);
14489
14490 // If we don't have a `current` rescue node, then this is a dangling
14491 // else, and it's an error.
14492 if (current == NULL) pm_parser_err_node(parser, UP(else_clause), PM_ERR_BEGIN_LONELY_ELSE);
14493 }
14494
14495 if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
14496 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14497 pm_token_t ensure_keyword = parser->current;
14498
14499 parser_lex(parser);
14500 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14501
14502 pm_statements_node_t *ensure_statements = NULL;
14503 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
14504 pm_accepts_block_stack_push(parser, true);
14505 pm_context_t context;
14506
14507 switch (type) {
14508 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ENSURE; break;
14509 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ENSURE; break;
14510 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ENSURE; break;
14511 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ENSURE; break;
14512 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ENSURE; break;
14513 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ENSURE; break;
14514 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ENSURE; break;
14515 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
14516 }
14517
14518 ensure_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14519 pm_accepts_block_stack_pop(parser);
14520
14521 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14522 }
14523
14524 pm_ensure_node_t *ensure_clause = pm_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->current);
14525 pm_begin_node_ensure_clause_set(parent_node, ensure_clause);
14526 }
14527
14528 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
14529 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14530 pm_begin_node_end_keyword_set(parent_node, &parser->current);
14531 } else {
14532 pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
14533 pm_begin_node_end_keyword_set(parent_node, &end_keyword);
14534 }
14535}
14536
14541static pm_begin_node_t *
14542parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
14543 pm_token_t begin_keyword = not_provided(parser);
14544 pm_begin_node_t *node = pm_begin_node_create(parser, &begin_keyword, statements);
14545
14546 parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
14547 node->base.location.start = start;
14548
14549 return node;
14550}
14551
14556parse_block_parameters(
14557 pm_parser_t *parser,
14558 bool allows_trailing_comma,
14559 const pm_token_t *opening,
14560 bool is_lambda_literal,
14561 bool accepts_blocks_in_defaults,
14562 uint16_t depth
14563) {
14564 pm_parameters_node_t *parameters = NULL;
14565 if (!match1(parser, PM_TOKEN_SEMICOLON)) {
14566 if (!is_lambda_literal) {
14567 context_push(parser, PM_CONTEXT_BLOCK_PARAMETERS);
14568 }
14569 parameters = parse_parameters(
14570 parser,
14571 is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
14572 false,
14573 allows_trailing_comma,
14574 false,
14575 accepts_blocks_in_defaults,
14576 true,
14577 (uint16_t) (depth + 1)
14578 );
14579 if (!is_lambda_literal) {
14580 context_pop(parser);
14581 }
14582 }
14583
14584 pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
14585 if ((opening->type != PM_TOKEN_NOT_PROVIDED)) {
14586 accept1(parser, PM_TOKEN_NEWLINE);
14587
14588 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
14589 do {
14590 switch (parser->current.type) {
14591 case PM_TOKEN_CONSTANT:
14592 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14593 parser_lex(parser);
14594 break;
14595 case PM_TOKEN_INSTANCE_VARIABLE:
14596 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14597 parser_lex(parser);
14598 break;
14599 case PM_TOKEN_GLOBAL_VARIABLE:
14600 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14601 parser_lex(parser);
14602 break;
14603 case PM_TOKEN_CLASS_VARIABLE:
14604 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14605 parser_lex(parser);
14606 break;
14607 default:
14608 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
14609 break;
14610 }
14611
14612 bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
14613 pm_parser_local_add_token(parser, &parser->previous, 1);
14614
14615 pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
14616 if (repeated) pm_node_flag_set_repeated_parameter(UP(local));
14617
14618 pm_block_parameters_node_append_local(block_parameters, local);
14619 } while (accept1(parser, PM_TOKEN_COMMA));
14620 }
14621 }
14622
14623 return block_parameters;
14624}
14625
14630static bool
14631outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
14632 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
14633 if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
14634 }
14635
14636 return false;
14637}
14638
14644static const char * const pm_numbered_parameter_names[] = {
14645 "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
14646};
14647
14653static pm_node_t *
14654parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
14655 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
14656
14657 // If we have ordinary parameters, then we will return them as the set of
14658 // parameters.
14659 if (parameters != NULL) {
14660 // If we also have implicit parameters, then this is an error.
14661 if (implicit_parameters->size > 0) {
14662 pm_node_t *node = implicit_parameters->nodes[0];
14663
14664 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14665 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
14666 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14667 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
14668 } else {
14669 assert(false && "unreachable");
14670 }
14671 }
14672
14673 return parameters;
14674 }
14675
14676 // If we don't have any implicit parameters, then the set of parameters is
14677 // NULL.
14678 if (implicit_parameters->size == 0) {
14679 return NULL;
14680 }
14681
14682 // If we don't have ordinary parameters, then we now must validate our set
14683 // of implicit parameters. We can only have numbered parameters or it, but
14684 // they cannot be mixed.
14685 uint8_t numbered_parameter = 0;
14686 bool it_parameter = false;
14687
14688 for (size_t index = 0; index < implicit_parameters->size; index++) {
14689 pm_node_t *node = implicit_parameters->nodes[index];
14690
14691 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14692 if (it_parameter) {
14693 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
14694 } else if (outer_scope_using_numbered_parameters_p(parser)) {
14695 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
14696 } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
14697 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
14698 } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
14699 numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
14700 } else {
14701 assert(false && "unreachable");
14702 }
14703 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14704 if (numbered_parameter > 0) {
14705 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
14706 } else {
14707 it_parameter = true;
14708 }
14709 }
14710 }
14711
14712 if (numbered_parameter > 0) {
14713 // Go through the parent scopes and mark them as being disallowed from
14714 // using numbered parameters because this inner scope is using them.
14715 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
14716 scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
14717 }
14718
14719 const pm_location_t location = { .start = opening->start, .end = closing->end };
14720 return UP(pm_numbered_parameters_node_create(parser, &location, numbered_parameter));
14721 }
14722
14723 if (it_parameter) {
14724 return UP(pm_it_parameters_node_create(parser, opening, closing));
14725 }
14726
14727 return NULL;
14728}
14729
14733static pm_block_node_t *
14734parse_block(pm_parser_t *parser, uint16_t depth) {
14735 pm_token_t opening = parser->previous;
14736 accept1(parser, PM_TOKEN_NEWLINE);
14737
14738 pm_accepts_block_stack_push(parser, true);
14739 pm_parser_scope_push(parser, false);
14740
14741 pm_block_parameters_node_t *block_parameters = NULL;
14742
14743 if (accept1(parser, PM_TOKEN_PIPE)) {
14744 pm_token_t block_parameters_opening = parser->previous;
14745 if (match1(parser, PM_TOKEN_PIPE)) {
14746 block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
14747 parser->command_start = true;
14748 parser_lex(parser);
14749 } else {
14750 block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false, true, (uint16_t) (depth + 1));
14751 accept1(parser, PM_TOKEN_NEWLINE);
14752 parser->command_start = true;
14753 expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
14754 }
14755
14756 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
14757 }
14758
14759 accept1(parser, PM_TOKEN_NEWLINE);
14760 pm_node_t *statements = NULL;
14761
14762 if (opening.type == PM_TOKEN_BRACE_LEFT) {
14763 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
14764 statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1)));
14765 }
14766
14767 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE);
14768 } else {
14769 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
14770 if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE)) {
14771 pm_accepts_block_stack_push(parser, true);
14772 statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1)));
14773 pm_accepts_block_stack_pop(parser);
14774 }
14775
14776 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
14777 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
14778 statements = UP(parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1)));
14779 }
14780 }
14781
14782 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END);
14783 }
14784
14785 pm_constant_id_list_t locals;
14786 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
14787 pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &opening, &parser->previous);
14788
14789 pm_parser_scope_pop(parser);
14790 pm_accepts_block_stack_pop(parser);
14791
14792 return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
14793}
14794
14800static bool
14801parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call, uint16_t depth) {
14802 bool found = false;
14803
14804 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
14805 found |= true;
14806 arguments->opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
14807
14808 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14809 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
14810 } else {
14811 pm_accepts_block_stack_push(parser, true);
14812 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1));
14813
14814 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14815 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
14816 parser->previous.start = parser->previous.end;
14817 parser->previous.type = PM_TOKEN_MISSING;
14818 }
14819
14820 pm_accepts_block_stack_pop(parser);
14821 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
14822 }
14823 } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
14824 found |= true;
14825 pm_accepts_block_stack_push(parser, false);
14826
14827 // If we get here, then the subsequent token cannot be used as an infix
14828 // operator. In this case we assume the subsequent token is part of an
14829 // argument to this method call.
14830 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, (uint16_t) (depth + 1));
14831
14832 // If we have done with the arguments and still not consumed the comma,
14833 // then we have a trailing comma where we need to check whether it is
14834 // allowed or not.
14835 if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
14836 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type));
14837 }
14838
14839 pm_accepts_block_stack_pop(parser);
14840 }
14841
14842 // If we're at the end of the arguments, we can now check if there is a block
14843 // node that starts with a {. If there is, then we can parse it and add it to
14844 // the arguments.
14845 if (accepts_block) {
14846 pm_block_node_t *block = NULL;
14847
14848 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
14849 found |= true;
14850 block = parse_block(parser, (uint16_t) (depth + 1));
14851 pm_arguments_validate_block(parser, arguments, block);
14852 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
14853 found |= true;
14854 block = parse_block(parser, (uint16_t) (depth + 1));
14855 }
14856
14857 if (block != NULL) {
14858 if (arguments->block == NULL && !arguments->has_forwarding) {
14859 arguments->block = UP(block);
14860 } else {
14861 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_BLOCK_MULTI);
14862
14863 if (arguments->block != NULL) {
14864 if (arguments->arguments == NULL) {
14865 arguments->arguments = pm_arguments_node_create(parser);
14866 }
14867 pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
14868 }
14869 arguments->block = UP(block);
14870 }
14871 }
14872 }
14873
14874 return found;
14875}
14876
14881static void
14882parse_return(pm_parser_t *parser, pm_node_t *node) {
14883 bool in_sclass = false;
14884 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
14885 switch (context_node->context) {
14889 case PM_CONTEXT_BEGIN:
14890 case PM_CONTEXT_CASE_IN:
14893 case PM_CONTEXT_DEFINED:
14894 case PM_CONTEXT_ELSE:
14895 case PM_CONTEXT_ELSIF:
14896 case PM_CONTEXT_EMBEXPR:
14898 case PM_CONTEXT_FOR:
14899 case PM_CONTEXT_IF:
14901 case PM_CONTEXT_MAIN:
14903 case PM_CONTEXT_PARENS:
14904 case PM_CONTEXT_POSTEXE:
14906 case PM_CONTEXT_PREEXE:
14908 case PM_CONTEXT_TERNARY:
14909 case PM_CONTEXT_UNLESS:
14910 case PM_CONTEXT_UNTIL:
14911 case PM_CONTEXT_WHILE:
14912 // Keep iterating up the lists of contexts, because returns can
14913 // see through these.
14914 continue;
14918 case PM_CONTEXT_SCLASS:
14919 in_sclass = true;
14920 continue;
14924 case PM_CONTEXT_CLASS:
14928 case PM_CONTEXT_MODULE:
14929 // These contexts are invalid for a return.
14930 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
14931 return;
14942 case PM_CONTEXT_DEF:
14948 // These contexts are valid for a return, and we should not
14949 // continue to loop.
14950 return;
14951 case PM_CONTEXT_NONE:
14952 // This case should never happen.
14953 assert(false && "unreachable");
14954 break;
14955 }
14956 }
14957 if (in_sclass && parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
14958 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
14959 }
14960}
14961
14966static void
14967parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
14968 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
14969 switch (context_node->context) {
14976 case PM_CONTEXT_DEFINED:
14977 case PM_CONTEXT_FOR:
14984 case PM_CONTEXT_POSTEXE:
14985 case PM_CONTEXT_UNTIL:
14986 case PM_CONTEXT_WHILE:
14987 // These are the good cases. We're allowed to have a block exit
14988 // in these contexts.
14989 return;
14990 case PM_CONTEXT_DEF:
14995 case PM_CONTEXT_MAIN:
14996 case PM_CONTEXT_PREEXE:
14997 case PM_CONTEXT_SCLASS:
15001 // These are the bad cases. We're not allowed to have a block
15002 // exit in these contexts.
15003 //
15004 // If we get here, then we're about to mark this block exit
15005 // as invalid. However, it could later _become_ valid if we
15006 // find a trailing while/until on the expression. In this
15007 // case instead of adding the error here, we'll add the
15008 // block exit to the list of exits for the expression, and
15009 // the node parsing will handle validating it instead.
15010 assert(parser->current_block_exits != NULL);
15011 pm_node_list_append(parser->current_block_exits, node);
15012 return;
15016 case PM_CONTEXT_BEGIN:
15017 case PM_CONTEXT_CASE_IN:
15022 case PM_CONTEXT_CLASS:
15024 case PM_CONTEXT_ELSE:
15025 case PM_CONTEXT_ELSIF:
15026 case PM_CONTEXT_EMBEXPR:
15028 case PM_CONTEXT_IF:
15032 case PM_CONTEXT_MODULE:
15034 case PM_CONTEXT_PARENS:
15037 case PM_CONTEXT_TERNARY:
15038 case PM_CONTEXT_UNLESS:
15039 // In these contexts we should continue walking up the list of
15040 // contexts.
15041 break;
15042 case PM_CONTEXT_NONE:
15043 // This case should never happen.
15044 assert(false && "unreachable");
15045 break;
15046 }
15047 }
15048}
15049
15054static pm_node_list_t *
15055push_block_exits(pm_parser_t *parser, pm_node_list_t *current_block_exits) {
15056 pm_node_list_t *previous_block_exits = parser->current_block_exits;
15057 parser->current_block_exits = current_block_exits;
15058 return previous_block_exits;
15059}
15060
15066static void
15067flush_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15068 pm_node_t *block_exit;
15069 PM_NODE_LIST_FOREACH(parser->current_block_exits, index, block_exit) {
15070 const char *type;
15071
15072 switch (PM_NODE_TYPE(block_exit)) {
15073 case PM_BREAK_NODE: type = "break"; break;
15074 case PM_NEXT_NODE: type = "next"; break;
15075 case PM_REDO_NODE: type = "redo"; break;
15076 default: assert(false && "unreachable"); type = ""; break;
15077 }
15078
15079 PM_PARSER_ERR_NODE_FORMAT(parser, block_exit, PM_ERR_INVALID_BLOCK_EXIT, type);
15080 }
15081
15082 parser->current_block_exits = previous_block_exits;
15083}
15084
15089static void
15090pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15091 if (match2(parser, PM_TOKEN_KEYWORD_WHILE_MODIFIER, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) {
15092 // If we matched a trailing while/until, then all of the block exits in
15093 // the contained list are valid. In this case we do not need to do
15094 // anything.
15095 parser->current_block_exits = previous_block_exits;
15096 } else if (previous_block_exits != NULL) {
15097 // If we did not matching a trailing while/until, then all of the block
15098 // exits contained in the list are invalid for this specific context.
15099 // However, they could still become valid in a higher level context if
15100 // there is another list above this one. In this case we'll push all of
15101 // the block exits up to the previous list.
15102 pm_node_list_concat(previous_block_exits, parser->current_block_exits);
15103 parser->current_block_exits = previous_block_exits;
15104 } else {
15105 // If we did not match a trailing while/until and this was the last
15106 // chance to do so, then all of the block exits in the list are invalid
15107 // and we need to add an error for each of them.
15108 flush_block_exits(parser, previous_block_exits);
15109 }
15110}
15111
15112static inline pm_node_t *
15113parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
15114 context_push(parser, PM_CONTEXT_PREDICATE);
15115 pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
15116 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, error_id, (uint16_t) (depth + 1));
15117
15118 // Predicates are closed by a term, a "then", or a term and then a "then".
15119 bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15120
15121 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15122 predicate_closed = true;
15123 *then_keyword = parser->previous;
15124 }
15125
15126 if (!predicate_closed) {
15127 pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
15128 }
15129
15130 context_pop(parser);
15131 return predicate;
15132}
15133
15134static inline pm_node_t *
15135parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
15136 pm_node_list_t current_block_exits = { 0 };
15137 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
15138
15139 pm_token_t keyword = parser->previous;
15140 pm_token_t then_keyword = not_provided(parser);
15141
15142 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
15143 pm_statements_node_t *statements = NULL;
15144
15145 if (!match3(parser, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
15146 pm_accepts_block_stack_push(parser, true);
15147 statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15148 pm_accepts_block_stack_pop(parser);
15149 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15150 }
15151
15152 pm_token_t end_keyword = not_provided(parser);
15153 pm_node_t *parent = NULL;
15154
15155 switch (context) {
15156 case PM_CONTEXT_IF:
15157 parent = UP(pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword));
15158 break;
15159 case PM_CONTEXT_UNLESS:
15160 parent = UP(pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements));
15161 break;
15162 default:
15163 assert(false && "unreachable");
15164 break;
15165 }
15166
15167 pm_node_t *current = parent;
15168
15169 // Parse any number of elsif clauses. This will form a linked list of if
15170 // nodes pointing to each other from the top.
15171 if (context == PM_CONTEXT_IF) {
15172 while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
15173 if (parser_end_of_line_p(parser)) {
15174 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
15175 }
15176
15177 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15178 pm_token_t elsif_keyword = parser->current;
15179 parser_lex(parser);
15180
15181 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
15182 pm_accepts_block_stack_push(parser, true);
15183
15184 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF, (uint16_t) (depth + 1));
15185 pm_accepts_block_stack_pop(parser);
15186 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15187
15188 pm_node_t *elsif = UP(pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword));
15189 ((pm_if_node_t *) current)->subsequent = elsif;
15190 current = elsif;
15191 }
15192 }
15193
15194 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15195 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15196 opening_newline_index = token_newline_index(parser);
15197
15198 parser_lex(parser);
15199 pm_token_t else_keyword = parser->previous;
15200
15201 pm_accepts_block_stack_push(parser, true);
15202 pm_statements_node_t *else_statements = parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1));
15203 pm_accepts_block_stack_pop(parser);
15204
15205 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15206 parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
15207 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE);
15208
15209 pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
15210
15211 switch (context) {
15212 case PM_CONTEXT_IF:
15213 ((pm_if_node_t *) current)->subsequent = UP(else_node);
15214 break;
15215 case PM_CONTEXT_UNLESS:
15216 ((pm_unless_node_t *) parent)->else_clause = else_node;
15217 break;
15218 default:
15219 assert(false && "unreachable");
15220 break;
15221 }
15222 } else {
15223 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
15224 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM);
15225 }
15226
15227 // Set the appropriate end location for all of the nodes in the subtree.
15228 switch (context) {
15229 case PM_CONTEXT_IF: {
15230 pm_node_t *current = parent;
15231 bool recursing = true;
15232
15233 while (recursing) {
15234 switch (PM_NODE_TYPE(current)) {
15235 case PM_IF_NODE:
15236 pm_if_node_end_keyword_loc_set((pm_if_node_t *) current, &parser->previous);
15237 current = ((pm_if_node_t *) current)->subsequent;
15238 recursing = current != NULL;
15239 break;
15240 case PM_ELSE_NODE:
15241 pm_else_node_end_keyword_loc_set((pm_else_node_t *) current, &parser->previous);
15242 recursing = false;
15243 break;
15244 default: {
15245 recursing = false;
15246 break;
15247 }
15248 }
15249 }
15250 break;
15251 }
15252 case PM_CONTEXT_UNLESS:
15253 pm_unless_node_end_keyword_loc_set((pm_unless_node_t *) parent, &parser->previous);
15254 break;
15255 default:
15256 assert(false && "unreachable");
15257 break;
15258 }
15259
15260 pop_block_exits(parser, previous_block_exits);
15261 pm_node_list_free(&current_block_exits);
15262
15263 return parent;
15264}
15265
15270#define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15271 case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
15272 case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
15273 case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
15274 case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
15275 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
15276 case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
15277 case PM_TOKEN_KEYWORD_OR: case PM_TOKEN_KEYWORD_REDO: case PM_TOKEN_KEYWORD_RESCUE: case PM_TOKEN_KEYWORD_RETRY: \
15278 case PM_TOKEN_KEYWORD_RETURN: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_SUPER: case PM_TOKEN_KEYWORD_THEN: \
15279 case PM_TOKEN_KEYWORD_TRUE: case PM_TOKEN_KEYWORD_UNDEF: case PM_TOKEN_KEYWORD_UNLESS: case PM_TOKEN_KEYWORD_UNTIL: \
15280 case PM_TOKEN_KEYWORD_WHEN: case PM_TOKEN_KEYWORD_WHILE: case PM_TOKEN_KEYWORD_YIELD
15281
15286#define PM_CASE_OPERATOR PM_TOKEN_AMPERSAND: case PM_TOKEN_BACKTICK: case PM_TOKEN_BANG_EQUAL: \
15287 case PM_TOKEN_BANG_TILDE: case PM_TOKEN_BANG: case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \
15288 case PM_TOKEN_BRACKET_LEFT_RIGHT: case PM_TOKEN_CARET: case PM_TOKEN_EQUAL_EQUAL_EQUAL: case PM_TOKEN_EQUAL_EQUAL: \
15289 case PM_TOKEN_EQUAL_TILDE: case PM_TOKEN_GREATER_EQUAL: case PM_TOKEN_GREATER_GREATER: case PM_TOKEN_GREATER: \
15290 case PM_TOKEN_LESS_EQUAL_GREATER: case PM_TOKEN_LESS_EQUAL: case PM_TOKEN_LESS_LESS: case PM_TOKEN_LESS: \
15291 case PM_TOKEN_MINUS: case PM_TOKEN_PERCENT: case PM_TOKEN_PIPE: case PM_TOKEN_PLUS: case PM_TOKEN_SLASH: \
15292 case PM_TOKEN_STAR_STAR: case PM_TOKEN_STAR: case PM_TOKEN_TILDE: case PM_TOKEN_UAMPERSAND: case PM_TOKEN_UMINUS: \
15293 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_UPLUS: case PM_TOKEN_USTAR: case PM_TOKEN_USTAR_STAR
15294
15300#define PM_CASE_PRIMITIVE PM_TOKEN_INTEGER: case PM_TOKEN_INTEGER_IMAGINARY: case PM_TOKEN_INTEGER_RATIONAL: \
15301 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: case PM_TOKEN_FLOAT: case PM_TOKEN_FLOAT_IMAGINARY: \
15302 case PM_TOKEN_FLOAT_RATIONAL: case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: case PM_TOKEN_SYMBOL_BEGIN: \
15303 case PM_TOKEN_REGEXP_BEGIN: case PM_TOKEN_BACKTICK: case PM_TOKEN_PERCENT_LOWER_X: case PM_TOKEN_PERCENT_LOWER_I: \
15304 case PM_TOKEN_PERCENT_LOWER_W: case PM_TOKEN_PERCENT_UPPER_I: case PM_TOKEN_PERCENT_UPPER_W: \
15305 case PM_TOKEN_STRING_BEGIN: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_TRUE: \
15306 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15307 case PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_MINUS_GREATER: case PM_TOKEN_HEREDOC_START: \
15308 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_CHARACTER_LITERAL
15309
15314#define PM_CASE_PARAMETER PM_TOKEN_UAMPERSAND: case PM_TOKEN_AMPERSAND: case PM_TOKEN_UDOT_DOT_DOT: \
15315 case PM_TOKEN_IDENTIFIER: case PM_TOKEN_LABEL: case PM_TOKEN_USTAR: case PM_TOKEN_STAR: case PM_TOKEN_STAR_STAR: \
15316 case PM_TOKEN_USTAR_STAR: case PM_TOKEN_CONSTANT: case PM_TOKEN_INSTANCE_VARIABLE: case PM_TOKEN_GLOBAL_VARIABLE: \
15317 case PM_TOKEN_CLASS_VARIABLE
15318
15323#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
15324 case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
15325 case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
15326 case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
15327
15328// Assert here that the flags are the same so that we can safely switch the type
15329// of the node without having to move the flags.
15330PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING), "Expected the flags to match.");
15331
15336static inline pm_node_flags_t
15337parse_unescaped_encoding(const pm_parser_t *parser) {
15338 if (parser->explicit_encoding != NULL) {
15340 // If the there's an explicit encoding and it's using a UTF-8 escape
15341 // sequence, then mark the string as UTF-8.
15342 return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
15343 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
15344 // If there's a non-UTF-8 escape sequence being used, then the
15345 // string uses the source encoding, unless the source is marked as
15346 // US-ASCII. In that case the string is forced as ASCII-8BIT in
15347 // order to keep the string valid.
15348 return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
15349 }
15350 }
15351 return 0;
15352}
15353
15358static pm_node_t *
15359parse_string_part(pm_parser_t *parser, uint16_t depth) {
15360 switch (parser->current.type) {
15361 // Here the lexer has returned to us plain string content. In this case
15362 // we'll create a string node that has no opening or closing and return that
15363 // as the part. These kinds of parts look like:
15364 //
15365 // "aaa #{bbb} #@ccc ddd"
15366 // ^^^^ ^ ^^^^
15367 case PM_TOKEN_STRING_CONTENT: {
15368 pm_token_t opening = not_provided(parser);
15369 pm_token_t closing = not_provided(parser);
15370
15371 pm_node_t *node = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
15372 pm_node_flag_set(node, parse_unescaped_encoding(parser));
15373
15374 parser_lex(parser);
15375 return node;
15376 }
15377 // Here the lexer has returned the beginning of an embedded expression. In
15378 // that case we'll parse the inner statements and return that as the part.
15379 // These kinds of parts look like:
15380 //
15381 // "aaa #{bbb} #@ccc ddd"
15382 // ^^^^^^
15383 case PM_TOKEN_EMBEXPR_BEGIN: {
15384 // Ruby disallows seeing encoding around interpolation in strings,
15385 // even though it is known at parse time.
15386 parser->explicit_encoding = NULL;
15387
15388 pm_lex_state_t state = parser->lex_state;
15389 int brace_nesting = parser->brace_nesting;
15390
15391 parser->brace_nesting = 0;
15392 lex_state_set(parser, PM_LEX_STATE_BEG);
15393 parser_lex(parser);
15394
15395 pm_token_t opening = parser->previous;
15396 pm_statements_node_t *statements = NULL;
15397
15398 if (!match1(parser, PM_TOKEN_EMBEXPR_END)) {
15399 pm_accepts_block_stack_push(parser, true);
15400 statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
15401 pm_accepts_block_stack_pop(parser);
15402 }
15403
15404 parser->brace_nesting = brace_nesting;
15405 lex_state_set(parser, state);
15406
15407 expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
15408 pm_token_t closing = parser->previous;
15409
15410 // If this set of embedded statements only contains a single
15411 // statement, then Ruby does not consider it as a possible statement
15412 // that could emit a line event.
15413 if (statements != NULL && statements->body.size == 1) {
15414 pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
15415 }
15416
15417 return UP(pm_embedded_statements_node_create(parser, &opening, statements, &closing));
15418 }
15419
15420 // Here the lexer has returned the beginning of an embedded variable.
15421 // In that case we'll parse the variable and create an appropriate node
15422 // for it and then return that node. These kinds of parts look like:
15423 //
15424 // "aaa #{bbb} #@ccc ddd"
15425 // ^^^^^
15426 case PM_TOKEN_EMBVAR: {
15427 // Ruby disallows seeing encoding around interpolation in strings,
15428 // even though it is known at parse time.
15429 parser->explicit_encoding = NULL;
15430
15431 lex_state_set(parser, PM_LEX_STATE_BEG);
15432 parser_lex(parser);
15433
15434 pm_token_t operator = parser->previous;
15435 pm_node_t *variable;
15436
15437 switch (parser->current.type) {
15438 // In this case a back reference is being interpolated. We'll
15439 // create a global variable read node.
15440 case PM_TOKEN_BACK_REFERENCE:
15441 parser_lex(parser);
15442 variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
15443 break;
15444 // In this case an nth reference is being interpolated. We'll
15445 // create a global variable read node.
15446 case PM_TOKEN_NUMBERED_REFERENCE:
15447 parser_lex(parser);
15448 variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
15449 break;
15450 // In this case a global variable is being interpolated. We'll
15451 // create a global variable read node.
15452 case PM_TOKEN_GLOBAL_VARIABLE:
15453 parser_lex(parser);
15454 variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
15455 break;
15456 // In this case an instance variable is being interpolated.
15457 // We'll create an instance variable read node.
15458 case PM_TOKEN_INSTANCE_VARIABLE:
15459 parser_lex(parser);
15460 variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
15461 break;
15462 // In this case a class variable is being interpolated. We'll
15463 // create a class variable read node.
15464 case PM_TOKEN_CLASS_VARIABLE:
15465 parser_lex(parser);
15466 variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
15467 break;
15468 // We can hit here if we got an invalid token. In that case
15469 // we'll not attempt to lex this token and instead just return a
15470 // missing node.
15471 default:
15472 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
15473 variable = UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
15474 break;
15475 }
15476
15477 return UP(pm_embedded_variable_node_create(parser, &operator, variable));
15478 }
15479 default:
15480 parser_lex(parser);
15481 pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
15482 return NULL;
15483 }
15484}
15485
15491static const uint8_t *
15492parse_operator_symbol_name(const pm_token_t *name) {
15493 switch (name->type) {
15494 case PM_TOKEN_TILDE:
15495 case PM_TOKEN_BANG:
15496 if (name->end[-1] == '@') return name->end - 1;
15498 default:
15499 return name->end;
15500 }
15501}
15502
15503static pm_node_t *
15504parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
15505 pm_token_t closing = not_provided(parser);
15506 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
15507
15508 const uint8_t *end = parse_operator_symbol_name(&parser->current);
15509
15510 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15511 parser_lex(parser);
15512
15513 pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
15514 pm_node_flag_set(UP(symbol), PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
15515
15516 return UP(symbol);
15517}
15518
15524static pm_node_t *
15525parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state, uint16_t depth) {
15526 const pm_token_t opening = parser->previous;
15527
15528 if (lex_mode->mode != PM_LEX_STRING) {
15529 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15530
15531 switch (parser->current.type) {
15532 case PM_CASE_OPERATOR:
15533 return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
15534 case PM_TOKEN_IDENTIFIER:
15535 case PM_TOKEN_CONSTANT:
15536 case PM_TOKEN_INSTANCE_VARIABLE:
15537 case PM_TOKEN_METHOD_NAME:
15538 case PM_TOKEN_CLASS_VARIABLE:
15539 case PM_TOKEN_GLOBAL_VARIABLE:
15540 case PM_TOKEN_NUMBERED_REFERENCE:
15541 case PM_TOKEN_BACK_REFERENCE:
15542 case PM_CASE_KEYWORD:
15543 parser_lex(parser);
15544 break;
15545 default:
15546 expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
15547 break;
15548 }
15549
15550 pm_token_t closing = not_provided(parser);
15551 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
15552
15553 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15554 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15555
15556 return UP(symbol);
15557 }
15558
15559 if (lex_mode->as.string.interpolation) {
15560 // If we have the end of the symbol, then we can return an empty symbol.
15561 if (match1(parser, PM_TOKEN_STRING_END)) {
15562 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15563 parser_lex(parser);
15564
15565 pm_token_t content = not_provided(parser);
15566 pm_token_t closing = parser->previous;
15567 return UP(pm_symbol_node_create(parser, &opening, &content, &closing));
15568 }
15569
15570 // Now we can parse the first part of the symbol.
15571 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
15572
15573 // If we got a string part, then it's possible that we could transform
15574 // what looks like an interpolated symbol into a regular symbol.
15575 if (part && PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15576 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15577 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15578
15579 return UP(pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous));
15580 }
15581
15582 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
15583 if (part) pm_interpolated_symbol_node_append(symbol, part);
15584
15585 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15586 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
15587 pm_interpolated_symbol_node_append(symbol, part);
15588 }
15589 }
15590
15591 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15592 if (match1(parser, PM_TOKEN_EOF)) {
15593 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15594 } else {
15595 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15596 }
15597
15598 pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
15599 return UP(symbol);
15600 }
15601
15602 pm_token_t content;
15603 pm_string_t unescaped;
15604
15605 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15606 content = parser->current;
15607 unescaped = parser->current_string;
15608 parser_lex(parser);
15609
15610 // If we have two string contents in a row, then the content of this
15611 // symbol is split because of heredoc contents. This looks like:
15612 //
15613 // <<A; :'a
15614 // A
15615 // b'
15616 //
15617 // In this case, the best way we have to represent this is as an
15618 // interpolated string node, so that's what we'll do here.
15619 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15620 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
15621 pm_token_t bounds = not_provided(parser);
15622
15623 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped));
15624 pm_interpolated_symbol_node_append(symbol, part);
15625
15626 part = UP(pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string));
15627 pm_interpolated_symbol_node_append(symbol, part);
15628
15629 if (next_state != PM_LEX_STATE_NONE) {
15630 lex_state_set(parser, next_state);
15631 }
15632
15633 parser_lex(parser);
15634 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
15635
15636 pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
15637 return UP(symbol);
15638 }
15639 } else {
15640 content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
15641 pm_string_shared_init(&unescaped, content.start, content.end);
15642 }
15643
15644 if (next_state != PM_LEX_STATE_NONE) {
15645 lex_state_set(parser, next_state);
15646 }
15647
15648 if (match1(parser, PM_TOKEN_EOF)) {
15649 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
15650 } else {
15651 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
15652 }
15653
15654 return UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false)));
15655}
15656
15661static inline pm_node_t *
15662parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
15663 switch (parser->current.type) {
15664 case PM_CASE_OPERATOR: {
15665 const pm_token_t opening = not_provided(parser);
15666 return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
15667 }
15668 case PM_CASE_KEYWORD:
15669 case PM_TOKEN_CONSTANT:
15670 case PM_TOKEN_IDENTIFIER:
15671 case PM_TOKEN_METHOD_NAME: {
15672 parser_lex(parser);
15673
15674 pm_token_t opening = not_provided(parser);
15675 pm_token_t closing = not_provided(parser);
15676 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
15677
15678 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15679 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15680
15681 return UP(symbol);
15682 }
15683 case PM_TOKEN_SYMBOL_BEGIN: {
15684 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
15685 parser_lex(parser);
15686
15687 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
15688 }
15689 default:
15690 pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
15691 return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
15692 }
15693}
15694
15701static inline pm_node_t *
15702parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
15703 switch (parser->current.type) {
15704 case PM_CASE_OPERATOR: {
15705 const pm_token_t opening = not_provided(parser);
15706 return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
15707 }
15708 case PM_CASE_KEYWORD:
15709 case PM_TOKEN_CONSTANT:
15710 case PM_TOKEN_IDENTIFIER:
15711 case PM_TOKEN_METHOD_NAME: {
15712 if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
15713 parser_lex(parser);
15714
15715 pm_token_t opening = not_provided(parser);
15716 pm_token_t closing = not_provided(parser);
15717 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
15718
15719 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15720 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15721
15722 return UP(symbol);
15723 }
15724 case PM_TOKEN_SYMBOL_BEGIN: {
15725 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
15726 parser_lex(parser);
15727
15728 return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
15729 }
15730 case PM_TOKEN_BACK_REFERENCE:
15731 parser_lex(parser);
15732 return UP(pm_back_reference_read_node_create(parser, &parser->previous));
15733 case PM_TOKEN_NUMBERED_REFERENCE:
15734 parser_lex(parser);
15735 return UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
15736 case PM_TOKEN_GLOBAL_VARIABLE:
15737 parser_lex(parser);
15738 return UP(pm_global_variable_read_node_create(parser, &parser->previous));
15739 default:
15740 pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
15741 return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
15742 }
15743}
15744
15749static pm_node_t *
15750parse_variable(pm_parser_t *parser) {
15751 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
15752 int depth;
15753 bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
15754
15755 if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
15756 return UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false));
15757 }
15758
15759 pm_scope_t *current_scope = parser->current_scope;
15760 if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
15761 if (is_numbered_param) {
15762 // When you use a numbered parameter, it implies the existence of
15763 // all of the locals that exist before it. For example, referencing
15764 // _2 means that _1 must exist. Therefore here we loop through all
15765 // of the possibilities and add them into the constant pool.
15766 uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
15767 for (uint8_t number = 1; number <= maximum; number++) {
15768 pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
15769 }
15770
15771 if (!match1(parser, PM_TOKEN_EQUAL)) {
15772 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
15773 }
15774
15775 pm_node_t *node = UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false));
15776 pm_node_list_append(&current_scope->implicit_parameters, node);
15777
15778 return node;
15779 } else if ((parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
15780 pm_node_t *node = UP(pm_it_local_variable_read_node_create(parser, &parser->previous));
15781 pm_node_list_append(&current_scope->implicit_parameters, node);
15782
15783 return node;
15784 }
15785 }
15786
15787 return NULL;
15788}
15789
15793static pm_node_t *
15794parse_variable_call(pm_parser_t *parser) {
15795 pm_node_flags_t flags = 0;
15796
15797 if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
15798 pm_node_t *node = parse_variable(parser);
15799 if (node != NULL) return node;
15800 flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
15801 }
15802
15803 pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
15804 pm_node_flag_set(UP(node), flags);
15805
15806 return UP(node);
15807}
15808
15814static inline pm_token_t
15815parse_method_definition_name(pm_parser_t *parser) {
15816 switch (parser->current.type) {
15817 case PM_CASE_KEYWORD:
15818 case PM_TOKEN_CONSTANT:
15819 case PM_TOKEN_METHOD_NAME:
15820 parser_lex(parser);
15821 return parser->previous;
15822 case PM_TOKEN_IDENTIFIER:
15823 pm_refute_numbered_parameter(parser, parser->current.start, parser->current.end);
15824 parser_lex(parser);
15825 return parser->previous;
15826 case PM_CASE_OPERATOR:
15827 lex_state_set(parser, PM_LEX_STATE_ENDFN);
15828 parser_lex(parser);
15829 return parser->previous;
15830 default:
15831 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type));
15832 return (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->current.start, .end = parser->current.end };
15833 }
15834}
15835
15836static void
15837parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
15838 // Get a reference to the string struct that is being held by the string
15839 // node. This is the value we're going to actually manipulate.
15840 pm_string_ensure_owned(string);
15841
15842 // Now get the bounds of the existing string. We'll use this as a
15843 // destination to move bytes into. We'll also use it for bounds checking
15844 // since we don't require that these strings be null terminated.
15845 size_t dest_length = pm_string_length(string);
15846 const uint8_t *source_cursor = (uint8_t *) string->source;
15847 const uint8_t *source_end = source_cursor + dest_length;
15848
15849 // We're going to move bytes backward in the string when we get leading
15850 // whitespace, so we'll maintain a pointer to the current position in the
15851 // string that we're writing to.
15852 size_t trimmed_whitespace = 0;
15853
15854 // While we haven't reached the amount of common whitespace that we need to
15855 // trim and we haven't reached the end of the string, we'll keep trimming
15856 // whitespace. Trimming in this context means skipping over these bytes such
15857 // that they aren't copied into the new string.
15858 while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
15859 if (*source_cursor == '\t') {
15860 trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
15861 if (trimmed_whitespace > common_whitespace) break;
15862 } else {
15863 trimmed_whitespace++;
15864 }
15865
15866 source_cursor++;
15867 dest_length--;
15868 }
15869
15870 memmove((uint8_t *) string->source, source_cursor, (size_t) (source_end - source_cursor));
15871 string->length = dest_length;
15872}
15873
15877static void
15878parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
15879 // The next node should be dedented if it's the first node in the list or if
15880 // it follows a string node.
15881 bool dedent_next = true;
15882
15883 // Iterate over all nodes, and trim whitespace accordingly. We're going to
15884 // keep around two indices: a read and a write. If we end up trimming all of
15885 // the whitespace from a node, then we'll drop it from the list entirely.
15886 size_t write_index = 0;
15887
15888 pm_node_t *node;
15889 PM_NODE_LIST_FOREACH(nodes, read_index, node) {
15890 // We're not manipulating child nodes that aren't strings. In this case
15891 // we'll skip past it and indicate that the subsequent node should not
15892 // be dedented.
15893 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) {
15894 nodes->nodes[write_index++] = node;
15895 dedent_next = false;
15896 continue;
15897 }
15898
15899 pm_string_node_t *string_node = ((pm_string_node_t *) node);
15900 if (dedent_next) {
15901 parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace);
15902 }
15903
15904 if (string_node->unescaped.length == 0) {
15905 pm_node_destroy(parser, node);
15906 } else {
15907 nodes->nodes[write_index++] = node;
15908 }
15909
15910 // We always dedent the next node if it follows a string node.
15911 dedent_next = true;
15912 }
15913
15914 nodes->size = write_index;
15915}
15916
15920static pm_token_t
15921parse_strings_empty_content(const uint8_t *location) {
15922 return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
15923}
15924
15928static inline pm_node_t *
15929parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
15930 assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
15931 bool concating = false;
15932
15933 while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
15934 pm_node_t *node = NULL;
15935
15936 // Here we have found a string literal. We'll parse it and add it to
15937 // the list of strings.
15938 const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
15939 assert(lex_mode->mode == PM_LEX_STRING);
15940 bool lex_interpolation = lex_mode->as.string.interpolation;
15941 bool label_allowed = lex_mode->as.string.label_allowed && accepts_label;
15942
15943 pm_token_t opening = parser->current;
15944 parser_lex(parser);
15945
15946 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15947 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
15948 // If we get here, then we have an end immediately after a
15949 // start. In that case we'll create an empty content token and
15950 // return an uninterpolated string.
15951 pm_token_t content = parse_strings_empty_content(parser->previous.start);
15952 pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
15953
15954 pm_string_shared_init(&string->unescaped, content.start, content.end);
15955 node = UP(string);
15956 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
15957 // If we get here, then we have an end of a label immediately
15958 // after a start. In that case we'll create an empty symbol
15959 // node.
15960 pm_token_t content = parse_strings_empty_content(parser->previous.start);
15961 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
15962
15963 pm_string_shared_init(&symbol->unescaped, content.start, content.end);
15964 node = UP(symbol);
15965
15966 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
15967 } else if (!lex_interpolation) {
15968 // If we don't accept interpolation then we expect the string to
15969 // start with a single string content node.
15970 pm_string_t unescaped;
15971 pm_token_t content;
15972
15973 if (match1(parser, PM_TOKEN_EOF)) {
15974 unescaped = PM_STRING_EMPTY;
15975 content = not_provided(parser);
15976 } else {
15977 unescaped = parser->current_string;
15978 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
15979 content = parser->previous;
15980 }
15981
15982 // It is unfortunately possible to have multiple string content
15983 // nodes in a row in the case that there's heredoc content in
15984 // the middle of the string, like this cursed example:
15985 //
15986 // <<-END+'b
15987 // a
15988 // END
15989 // c'+'d'
15990 //
15991 // In that case we need to switch to an interpolated string to
15992 // be able to contain all of the parts.
15993 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15994 pm_node_list_t parts = { 0 };
15995
15996 pm_token_t delimiters = not_provided(parser);
15997 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped));
15998 pm_node_list_append(&parts, part);
15999
16000 do {
16001 part = UP(pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters));
16002 pm_node_list_append(&parts, part);
16003 parser_lex(parser);
16004 } while (match1(parser, PM_TOKEN_STRING_CONTENT));
16005
16006 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16007 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16008
16009 pm_node_list_free(&parts);
16010 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16011 node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
16012 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16013 } else if (match1(parser, PM_TOKEN_EOF)) {
16014 pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16015 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
16016 } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16017 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
16018 } else {
16019 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
16020 parser->previous.start = parser->previous.end;
16021 parser->previous.type = PM_TOKEN_MISSING;
16022 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
16023 }
16024 } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16025 // In this case we've hit string content so we know the string
16026 // at least has something in it. We'll need to check if the
16027 // following token is the end (in which case we can return a
16028 // plain string) or if it's not then it has interpolation.
16029 pm_token_t content = parser->current;
16030 pm_string_t unescaped = parser->current_string;
16031 parser_lex(parser);
16032
16033 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16034 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
16035 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16036
16037 // Kind of odd behavior, but basically if we have an
16038 // unterminated string and it ends in a newline, we back up one
16039 // character so that the error message is on the last line of
16040 // content in the string.
16041 if (!accept1(parser, PM_TOKEN_STRING_END)) {
16042 const uint8_t *location = parser->previous.end;
16043 if (location > parser->start && location[-1] == '\n') location--;
16044 pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
16045
16046 parser->previous.start = parser->previous.end;
16047 parser->previous.type = PM_TOKEN_MISSING;
16048 }
16049 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16050 node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
16051 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16052 } else {
16053 // If we get here, then we have interpolation so we'll need
16054 // to create a string or symbol node with interpolation.
16055 pm_node_list_t parts = { 0 };
16056 pm_token_t string_opening = not_provided(parser);
16057 pm_token_t string_closing = not_provided(parser);
16058
16059 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped));
16060 pm_node_flag_set(part, parse_unescaped_encoding(parser));
16061 pm_node_list_append(&parts, part);
16062
16063 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16064 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16065 pm_node_list_append(&parts, part);
16066 }
16067 }
16068
16069 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16070 node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
16071 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16072 } else if (match1(parser, PM_TOKEN_EOF)) {
16073 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16074 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
16075 } else {
16076 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16077 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16078 }
16079
16080 pm_node_list_free(&parts);
16081 }
16082 } else {
16083 // If we get here, then the first part of the string is not plain
16084 // string content, in which case we need to parse the string as an
16085 // interpolated string.
16086 pm_node_list_t parts = { 0 };
16087 pm_node_t *part;
16088
16089 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16090 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16091 pm_node_list_append(&parts, part);
16092 }
16093 }
16094
16095 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16096 node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
16097 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16098 } else if (match1(parser, PM_TOKEN_EOF)) {
16099 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16100 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
16101 } else {
16102 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16103 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16104 }
16105
16106 pm_node_list_free(&parts);
16107 }
16108
16109 if (current == NULL) {
16110 // If the node we just parsed is a symbol node, then we can't
16111 // concatenate it with anything else, so we can now return that
16112 // node.
16113 if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
16114 return node;
16115 }
16116
16117 // If we don't already have a node, then it's fine and we can just
16118 // set the result to be the node we just parsed.
16119 current = node;
16120 } else {
16121 // Otherwise we need to check the type of the node we just parsed.
16122 // If it cannot be concatenated with the previous node, then we'll
16123 // need to add a syntax error.
16124 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
16125 pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16126 }
16127
16128 // If we haven't already created our container for concatenation,
16129 // we'll do that now.
16130 if (!concating) {
16131 if (!PM_NODE_TYPE_P(current, PM_STRING_NODE) && !PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
16132 pm_parser_err_node(parser, current, PM_ERR_STRING_CONCATENATION);
16133 }
16134
16135 concating = true;
16136 pm_token_t bounds = not_provided(parser);
16137
16138 pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
16139 pm_interpolated_string_node_append(container, current);
16140 current = UP(container);
16141 }
16142
16143 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
16144 }
16145 }
16146
16147 return current;
16148}
16149
16150#define PM_PARSE_PATTERN_SINGLE 0
16151#define PM_PARSE_PATTERN_TOP 1
16152#define PM_PARSE_PATTERN_MULTI 2
16153
16154static pm_node_t *
16155parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
16156
16162static void
16163parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
16164 // Skip this capture if it starts with an underscore.
16165 if (peek_at(parser, location->start) == '_') return;
16166
16167 if (pm_constant_id_list_includes(captures, capture)) {
16168 pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
16169 } else {
16170 pm_constant_id_list_append(captures, capture);
16171 }
16172}
16173
16177static pm_node_t *
16178parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node, uint16_t depth) {
16179 // Now, if there are any :: operators that follow, parse them as constant
16180 // path nodes.
16181 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
16182 pm_token_t delimiter = parser->previous;
16183 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16184 node = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
16185 }
16186
16187 // If there is a [ or ( that follows, then this is part of a larger pattern
16188 // expression. We'll parse the inner pattern here, then modify the returned
16189 // inner pattern with our constant path attached.
16190 if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
16191 return node;
16192 }
16193
16194 pm_token_t opening;
16195 pm_token_t closing;
16196 pm_node_t *inner = NULL;
16197
16198 if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
16199 opening = parser->previous;
16200 accept1(parser, PM_TOKEN_NEWLINE);
16201
16202 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16203 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16204 accept1(parser, PM_TOKEN_NEWLINE);
16205 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
16206 }
16207
16208 closing = parser->previous;
16209 } else {
16210 parser_lex(parser);
16211 opening = parser->previous;
16212 accept1(parser, PM_TOKEN_NEWLINE);
16213
16214 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
16215 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16216 accept1(parser, PM_TOKEN_NEWLINE);
16217 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
16218 }
16219
16220 closing = parser->previous;
16221 }
16222
16223 if (!inner) {
16224 // If there was no inner pattern, then we have something like Foo() or
16225 // Foo[]. In that case we'll create an array pattern with no requireds.
16226 return UP(pm_array_pattern_node_constant_create(parser, node, &opening, &closing));
16227 }
16228
16229 // Now that we have the inner pattern, check to see if it's an array, find,
16230 // or hash pattern. If it is, then we'll attach our constant path to it if
16231 // it doesn't already have a constant. If it's not one of those node types
16232 // or it does have a constant, then we'll create an array pattern.
16233 switch (PM_NODE_TYPE(inner)) {
16234 case PM_ARRAY_PATTERN_NODE: {
16235 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16236
16237 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16238 pattern_node->base.location.start = node->location.start;
16239 pattern_node->base.location.end = closing.end;
16240
16241 pattern_node->constant = node;
16242 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16243 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16244
16245 return UP(pattern_node);
16246 }
16247
16248 break;
16249 }
16250 case PM_FIND_PATTERN_NODE: {
16251 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16252
16253 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16254 pattern_node->base.location.start = node->location.start;
16255 pattern_node->base.location.end = closing.end;
16256
16257 pattern_node->constant = node;
16258 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16259 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16260
16261 return UP(pattern_node);
16262 }
16263
16264 break;
16265 }
16266 case PM_HASH_PATTERN_NODE: {
16267 pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
16268
16269 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16270 pattern_node->base.location.start = node->location.start;
16271 pattern_node->base.location.end = closing.end;
16272
16273 pattern_node->constant = node;
16274 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16275 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16276
16277 return UP(pattern_node);
16278 }
16279
16280 break;
16281 }
16282 default:
16283 break;
16284 }
16285
16286 // If we got here, then we didn't return one of the inner patterns by
16287 // attaching its constant. In this case we'll create an array pattern and
16288 // attach our constant to it.
16289 pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16290 pm_array_pattern_node_requireds_append(pattern_node, inner);
16291 return UP(pattern_node);
16292}
16293
16297static pm_splat_node_t *
16298parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16299 assert(parser->previous.type == PM_TOKEN_USTAR);
16300 pm_token_t operator = parser->previous;
16301 pm_node_t *name = NULL;
16302
16303 // Rest patterns don't necessarily have a name associated with them. So we
16304 // will check for that here. If they do, then we'll add it to the local
16305 // table since this pattern will cause it to become a local variable.
16306 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16307 pm_token_t identifier = parser->previous;
16308 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &identifier);
16309
16310 int depth;
16311 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16312 pm_parser_local_add(parser, constant_id, identifier.start, identifier.end, 0);
16313 }
16314
16315 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier));
16316 name = UP(pm_local_variable_target_node_create(
16317 parser,
16318 &PM_LOCATION_TOKEN_VALUE(&identifier),
16319 constant_id,
16320 (uint32_t) (depth == -1 ? 0 : depth)
16321 ));
16322 }
16323
16324 // Finally we can return the created node.
16325 return pm_splat_node_create(parser, &operator, name);
16326}
16327
16331static pm_node_t *
16332parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16333 assert(parser->current.type == PM_TOKEN_USTAR_STAR);
16334 parser_lex(parser);
16335
16336 pm_token_t operator = parser->previous;
16337 pm_node_t *value = NULL;
16338
16339 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
16340 return UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
16341 }
16342
16343 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16344 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16345
16346 int depth;
16347 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16348 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16349 }
16350
16351 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
16352 value = UP(pm_local_variable_target_node_create(
16353 parser,
16354 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
16355 constant_id,
16356 (uint32_t) (depth == -1 ? 0 : depth)
16357 ));
16358 }
16359
16360 return UP(pm_assoc_splat_node_create(parser, value, &operator));
16361}
16362
16367static bool
16368pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
16369 ptrdiff_t length = end - start;
16370 if (length == 0) return false;
16371
16372 // First ensure that it starts with a valid identifier starting character.
16373 size_t width = char_is_identifier_start(parser, start, end - start);
16374 if (width == 0) return false;
16375
16376 // Next, ensure that it's not an uppercase character.
16377 if (parser->encoding_changed) {
16378 if (parser->encoding->isupper_char(start, length)) return false;
16379 } else {
16380 if (pm_encoding_utf_8_isupper_char(start, length)) return false;
16381 }
16382
16383 // Next, iterate through all of the bytes of the string to ensure that they
16384 // are all valid identifier characters.
16385 const uint8_t *cursor = start + width;
16386 while ((width = char_is_identifier(parser, cursor, end - cursor))) cursor += width;
16387 return cursor == end;
16388}
16389
16394static pm_node_t *
16395parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
16396 const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
16397
16398 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
16399 int depth = -1;
16400
16401 if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
16402 depth = pm_parser_local_depth_constant_id(parser, constant_id);
16403 } else {
16404 pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
16405
16406 if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
16407 PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
16408 }
16409 }
16410
16411 if (depth == -1) {
16412 pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0);
16413 }
16414
16415 parse_pattern_capture(parser, captures, constant_id, value_loc);
16416 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
16417 parser,
16418 value_loc,
16419 constant_id,
16420 (uint32_t) (depth == -1 ? 0 : depth)
16421 );
16422
16423 return UP(pm_implicit_node_create(parser, UP(target)));
16424}
16425
16430static void
16431parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
16432 if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
16433 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
16434 }
16435}
16436
16441parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, uint16_t depth) {
16442 pm_node_list_t assocs = { 0 };
16443 pm_static_literals_t keys = { 0 };
16444 pm_node_t *rest = NULL;
16445
16446 switch (PM_NODE_TYPE(first_node)) {
16447 case PM_ASSOC_SPLAT_NODE:
16448 case PM_NO_KEYWORDS_PARAMETER_NODE:
16449 rest = first_node;
16450 break;
16451 case PM_SYMBOL_NODE: {
16452 if (pm_symbol_node_label_p(first_node)) {
16453 parse_pattern_hash_key(parser, &keys, first_node);
16454 pm_node_t *value;
16455
16456 if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
16457 // Otherwise, we will create an implicit local variable
16458 // target for the value.
16459 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
16460 } else {
16461 // Here we have a value for the first assoc in the list, so
16462 // we will parse it now.
16463 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
16464 }
16465
16466 pm_token_t operator = not_provided(parser);
16467 pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, &operator, value));
16468
16469 pm_node_list_append(&assocs, assoc);
16470 break;
16471 }
16472 }
16474 default: {
16475 // If we get anything else, then this is an error. For this we'll
16476 // create a missing node for the value and create an assoc node for
16477 // the first node in the list.
16478 pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
16479 pm_parser_err_node(parser, first_node, diag_id);
16480
16481 pm_token_t operator = not_provided(parser);
16482 pm_node_t *value = UP(pm_missing_node_create(parser, first_node->location.start, first_node->location.end));
16483 pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, &operator, value));
16484
16485 pm_node_list_append(&assocs, assoc);
16486 break;
16487 }
16488 }
16489
16490 // If there are any other assocs, then we'll parse them now.
16491 while (accept1(parser, PM_TOKEN_COMMA)) {
16492 // Here we need to break to support trailing commas.
16493 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
16494 // Trailing commas are not allowed to follow a rest pattern.
16495 if (rest != NULL) {
16496 pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16497 }
16498
16499 break;
16500 }
16501
16502 if (match1(parser, PM_TOKEN_USTAR_STAR)) {
16503 pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures);
16504
16505 if (rest == NULL) {
16506 rest = assoc;
16507 } else {
16508 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16509 pm_node_list_append(&assocs, assoc);
16510 }
16511 } else {
16512 pm_node_t *key;
16513
16514 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16515 key = parse_strings(parser, NULL, true, (uint16_t) (depth + 1));
16516
16517 if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
16518 pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
16519 } else if (!pm_symbol_node_label_p(key)) {
16520 pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16521 }
16522 } else {
16523 expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16524 key = UP(pm_symbol_node_label_create(parser, &parser->previous));
16525 }
16526
16527 parse_pattern_hash_key(parser, &keys, key);
16528 pm_node_t *value = NULL;
16529
16530 if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
16531 if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) {
16532 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
16533 } else {
16534 value = UP(pm_missing_node_create(parser, key->location.end, key->location.end));
16535 }
16536 } else {
16537 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
16538 }
16539
16540 pm_token_t operator = not_provided(parser);
16541 pm_node_t *assoc = UP(pm_assoc_node_create(parser, key, &operator, value));
16542
16543 if (rest != NULL) {
16544 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16545 }
16546
16547 pm_node_list_append(&assocs, assoc);
16548 }
16549 }
16550
16551 pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
16552 xfree(assocs.nodes);
16553
16554 pm_static_literals_free(&keys);
16555 return node;
16556}
16557
16561static pm_node_t *
16562parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id, uint16_t depth) {
16563 switch (parser->current.type) {
16564 case PM_TOKEN_IDENTIFIER:
16565 case PM_TOKEN_METHOD_NAME: {
16566 parser_lex(parser);
16567 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16568
16569 int depth;
16570 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16571 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16572 }
16573
16574 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
16575 return UP(pm_local_variable_target_node_create(
16576 parser,
16577 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
16578 constant_id,
16579 (uint32_t) (depth == -1 ? 0 : depth)
16580 ));
16581 }
16582 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
16583 pm_token_t opening = parser->current;
16584 parser_lex(parser);
16585
16586 if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16587 // If we have an empty array pattern, then we'll just return a new
16588 // array pattern node.
16589 return UP(pm_array_pattern_node_empty_create(parser, &opening, &parser->previous));
16590 }
16591
16592 // Otherwise, we'll parse the inner pattern, then deal with it depending
16593 // on the type it returns.
16594 pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16595
16596 accept1(parser, PM_TOKEN_NEWLINE);
16597 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
16598 pm_token_t closing = parser->previous;
16599
16600 switch (PM_NODE_TYPE(inner)) {
16601 case PM_ARRAY_PATTERN_NODE: {
16602 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16603 if (pattern_node->opening_loc.start == NULL) {
16604 pattern_node->base.location.start = opening.start;
16605 pattern_node->base.location.end = closing.end;
16606
16607 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16608 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16609
16610 return UP(pattern_node);
16611 }
16612
16613 break;
16614 }
16615 case PM_FIND_PATTERN_NODE: {
16616 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16617 if (pattern_node->opening_loc.start == NULL) {
16618 pattern_node->base.location.start = opening.start;
16619 pattern_node->base.location.end = closing.end;
16620
16621 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16622 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16623
16624 return UP(pattern_node);
16625 }
16626
16627 break;
16628 }
16629 default:
16630 break;
16631 }
16632
16633 pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
16634 pm_array_pattern_node_requireds_append(node, inner);
16635 return UP(node);
16636 }
16637 case PM_TOKEN_BRACE_LEFT: {
16638 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
16639 parser->pattern_matching_newlines = false;
16640
16642 pm_token_t opening = parser->current;
16643 parser_lex(parser);
16644
16645 if (accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
16646 // If we have an empty hash pattern, then we'll just return a new hash
16647 // pattern node.
16648 node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
16649 } else {
16650 pm_node_t *first_node;
16651
16652 switch (parser->current.type) {
16653 case PM_TOKEN_LABEL:
16654 parser_lex(parser);
16655 first_node = UP(pm_symbol_node_label_create(parser, &parser->previous));
16656 break;
16657 case PM_TOKEN_USTAR_STAR:
16658 first_node = parse_pattern_keyword_rest(parser, captures);
16659 break;
16660 case PM_TOKEN_STRING_BEGIN:
16661 first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
16662 break;
16663 default: {
16664 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
16665 parser_lex(parser);
16666
16667 first_node = UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
16668 break;
16669 }
16670 }
16671
16672 node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
16673
16674 accept1(parser, PM_TOKEN_NEWLINE);
16675 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
16676 pm_token_t closing = parser->previous;
16677
16678 node->base.location.start = opening.start;
16679 node->base.location.end = closing.end;
16680
16681 node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16682 node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16683 }
16684
16685 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
16686 return UP(node);
16687 }
16688 case PM_TOKEN_UDOT_DOT:
16689 case PM_TOKEN_UDOT_DOT_DOT: {
16690 pm_token_t operator = parser->current;
16691 parser_lex(parser);
16692
16693 // Since we have a unary range operator, we need to parse the subsequent
16694 // expression as the right side of the range.
16695 switch (parser->current.type) {
16696 case PM_CASE_PRIMITIVE: {
16697 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
16698 return UP(pm_range_node_create(parser, NULL, &operator, right));
16699 }
16700 default: {
16701 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
16702 pm_node_t *right = UP(pm_missing_node_create(parser, operator.start, operator.end));
16703 return UP(pm_range_node_create(parser, NULL, &operator, right));
16704 }
16705 }
16706 }
16707 case PM_CASE_PRIMITIVE: {
16708 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, diag_id, (uint16_t) (depth + 1));
16709
16710 // If we found a label, we need to immediately return to the caller.
16711 if (pm_symbol_node_label_p(node)) return node;
16712
16713 // Call nodes (arithmetic operations) are not allowed in patterns
16714 if (PM_NODE_TYPE(node) == PM_CALL_NODE) {
16715 pm_parser_err_node(parser, node, diag_id);
16716 pm_missing_node_t *missing_node = pm_missing_node_create(parser, node->location.start, node->location.end);
16717
16718 pm_node_unreference(parser, node);
16719 pm_node_destroy(parser, node);
16720 return UP(missing_node);
16721 }
16722
16723 // Now that we have a primitive, we need to check if it's part of a range.
16724 if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
16725 pm_token_t operator = parser->previous;
16726
16727 // Now that we have the operator, we need to check if this is followed
16728 // by another expression. If it is, then we will create a full range
16729 // node. Otherwise, we'll create an endless range.
16730 switch (parser->current.type) {
16731 case PM_CASE_PRIMITIVE: {
16732 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
16733 return UP(pm_range_node_create(parser, node, &operator, right));
16734 }
16735 default:
16736 return UP(pm_range_node_create(parser, node, &operator, NULL));
16737 }
16738 }
16739
16740 return node;
16741 }
16742 case PM_TOKEN_CARET: {
16743 parser_lex(parser);
16744 pm_token_t operator = parser->previous;
16745
16746 // At this point we have a pin operator. We need to check the subsequent
16747 // expression to determine if it's a variable or an expression.
16748 switch (parser->current.type) {
16749 case PM_TOKEN_IDENTIFIER: {
16750 parser_lex(parser);
16751 pm_node_t *variable = UP(parse_variable(parser));
16752
16753 if (variable == NULL) {
16754 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
16755 variable = UP(pm_local_variable_read_node_missing_create(parser, &parser->previous, 0));
16756 }
16757
16758 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16759 }
16760 case PM_TOKEN_INSTANCE_VARIABLE: {
16761 parser_lex(parser);
16762 pm_node_t *variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
16763
16764 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16765 }
16766 case PM_TOKEN_CLASS_VARIABLE: {
16767 parser_lex(parser);
16768 pm_node_t *variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
16769
16770 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16771 }
16772 case PM_TOKEN_GLOBAL_VARIABLE: {
16773 parser_lex(parser);
16774 pm_node_t *variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
16775
16776 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16777 }
16778 case PM_TOKEN_NUMBERED_REFERENCE: {
16779 parser_lex(parser);
16780 pm_node_t *variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
16781
16782 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16783 }
16784 case PM_TOKEN_BACK_REFERENCE: {
16785 parser_lex(parser);
16786 pm_node_t *variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
16787
16788 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16789 }
16790 case PM_TOKEN_PARENTHESIS_LEFT: {
16791 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
16792 parser->pattern_matching_newlines = false;
16793
16794 pm_token_t lparen = parser->current;
16795 parser_lex(parser);
16796
16797 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
16798 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
16799
16800 accept1(parser, PM_TOKEN_NEWLINE);
16801 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
16802 return UP(pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous));
16803 }
16804 default: {
16805 // If we get here, then we have a pin operator followed by something
16806 // not understood. We'll create a missing node and return that.
16807 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
16808 pm_node_t *variable = UP(pm_missing_node_create(parser, operator.start, operator.end));
16809 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16810 }
16811 }
16812 }
16813 case PM_TOKEN_UCOLON_COLON: {
16814 pm_token_t delimiter = parser->current;
16815 parser_lex(parser);
16816
16817 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16818 pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
16819
16820 return parse_pattern_constant_path(parser, captures, UP(node), (uint16_t) (depth + 1));
16821 }
16822 case PM_TOKEN_CONSTANT: {
16823 pm_token_t constant = parser->current;
16824 parser_lex(parser);
16825
16826 pm_node_t *node = UP(pm_constant_read_node_create(parser, &constant));
16827 return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
16828 }
16829 default:
16830 pm_parser_err_current(parser, diag_id);
16831 return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
16832 }
16833}
16834
16835static bool
16836parse_pattern_alternation_error_each(const pm_node_t *node, void *data) {
16837 switch (PM_NODE_TYPE(node)) {
16838 case PM_LOCAL_VARIABLE_TARGET_NODE:
16839 pm_parser_err((pm_parser_t *) data, node->location.start, node->location.end, PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE);
16840 return false;
16841 default:
16842 return true;
16843 }
16844}
16845
16850static void
16851parse_pattern_alternation_error(pm_parser_t *parser, const pm_node_t *node) {
16852 pm_visit_node(node, parse_pattern_alternation_error_each, parser);
16853}
16854
16859static pm_node_t *
16860parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
16861 pm_node_t *node = first_node;
16862 bool alternation = false;
16863
16864 while ((node == NULL) || (alternation = accept1(parser, PM_TOKEN_PIPE))) {
16865 if (alternation && !PM_NODE_TYPE_P(node, PM_ALTERNATION_PATTERN_NODE) && captures->size) {
16866 parse_pattern_alternation_error(parser, node);
16867 }
16868
16869 switch (parser->current.type) {
16870 case PM_TOKEN_IDENTIFIER:
16871 case PM_TOKEN_BRACKET_LEFT_ARRAY:
16872 case PM_TOKEN_BRACE_LEFT:
16873 case PM_TOKEN_CARET:
16874 case PM_TOKEN_CONSTANT:
16875 case PM_TOKEN_UCOLON_COLON:
16876 case PM_TOKEN_UDOT_DOT:
16877 case PM_TOKEN_UDOT_DOT_DOT:
16878 case PM_CASE_PRIMITIVE: {
16879 if (!alternation) {
16880 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
16881 } else {
16882 pm_token_t operator = parser->previous;
16883 pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
16884
16885 if (captures->size) parse_pattern_alternation_error(parser, right);
16886 node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
16887 }
16888
16889 break;
16890 }
16891 case PM_TOKEN_PARENTHESIS_LEFT:
16892 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
16893 pm_token_t operator = parser->previous;
16894 pm_token_t opening = parser->current;
16895 parser_lex(parser);
16896
16897 pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16898 accept1(parser, PM_TOKEN_NEWLINE);
16899 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
16900 pm_node_t *right = UP(pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0));
16901
16902 if (!alternation) {
16903 node = right;
16904 } else {
16905 if (captures->size) parse_pattern_alternation_error(parser, right);
16906 node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
16907 }
16908
16909 break;
16910 }
16911 default: {
16912 pm_parser_err_current(parser, diag_id);
16913 pm_node_t *right = UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
16914
16915 if (!alternation) {
16916 node = right;
16917 } else {
16918 if (captures->size) parse_pattern_alternation_error(parser, right);
16919 node = UP(pm_alternation_pattern_node_create(parser, node, right, &parser->previous));
16920 }
16921
16922 break;
16923 }
16924 }
16925 }
16926
16927 // If we have an =>, then we are assigning this pattern to a variable.
16928 // In this case we should create an assignment node.
16929 while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
16930 pm_token_t operator = parser->previous;
16931 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
16932
16933 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16934 int depth;
16935
16936 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16937 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16938 }
16939
16940 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
16941 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
16942 parser,
16943 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
16944 constant_id,
16945 (uint32_t) (depth == -1 ? 0 : depth)
16946 );
16947
16948 node = UP(pm_capture_pattern_node_create(parser, node, target, &operator));
16949 }
16950
16951 return node;
16952}
16953
16957static pm_node_t *
16958parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
16959 pm_node_t *node = NULL;
16960
16961 bool leading_rest = false;
16962 bool trailing_rest = false;
16963
16964 switch (parser->current.type) {
16965 case PM_TOKEN_LABEL: {
16966 parser_lex(parser);
16967 pm_node_t *key = UP(pm_symbol_node_label_create(parser, &parser->previous));
16968 node = UP(parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1)));
16969
16970 if (!(flags & PM_PARSE_PATTERN_TOP)) {
16971 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
16972 }
16973
16974 return node;
16975 }
16976 case PM_TOKEN_USTAR_STAR: {
16977 node = parse_pattern_keyword_rest(parser, captures);
16978 node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
16979
16980 if (!(flags & PM_PARSE_PATTERN_TOP)) {
16981 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
16982 }
16983
16984 return node;
16985 }
16986 case PM_TOKEN_STRING_BEGIN: {
16987 // We need special handling for string beginnings because they could
16988 // be dynamic symbols leading to hash patterns.
16989 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
16990
16991 if (pm_symbol_node_label_p(node)) {
16992 node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
16993
16994 if (!(flags & PM_PARSE_PATTERN_TOP)) {
16995 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
16996 }
16997
16998 return node;
16999 }
17000
17001 node = parse_pattern_primitives(parser, captures, node, diag_id, (uint16_t) (depth + 1));
17002 break;
17003 }
17004 case PM_TOKEN_USTAR: {
17005 if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
17006 parser_lex(parser);
17007 node = UP(parse_pattern_rest(parser, captures));
17008 leading_rest = true;
17009 break;
17010 }
17011 }
17013 default:
17014 node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
17015 break;
17016 }
17017
17018 // If we got a dynamic label symbol, then we need to treat it like the
17019 // beginning of a hash pattern.
17020 if (pm_symbol_node_label_p(node)) {
17021 return UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
17022 }
17023
17024 if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
17025 // If we have a comma, then we are now parsing either an array pattern
17026 // or a find pattern. We need to parse all of the patterns, put them
17027 // into a big list, and then determine which type of node we have.
17028 pm_node_list_t nodes = { 0 };
17029 pm_node_list_append(&nodes, node);
17030
17031 // Gather up all of the patterns into the list.
17032 while (accept1(parser, PM_TOKEN_COMMA)) {
17033 // Break early here in case we have a trailing comma.
17034 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
17035 node = UP(pm_implicit_rest_node_create(parser, &parser->previous));
17036 pm_node_list_append(&nodes, node);
17037 trailing_rest = true;
17038 break;
17039 }
17040
17041 if (accept1(parser, PM_TOKEN_USTAR)) {
17042 node = UP(parse_pattern_rest(parser, captures));
17043
17044 // If we have already parsed a splat pattern, then this is an
17045 // error. We will continue to parse the rest of the patterns,
17046 // but we will indicate it as an error.
17047 if (trailing_rest) {
17048 pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
17049 }
17050
17051 trailing_rest = true;
17052 } else {
17053 node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
17054 }
17055
17056 pm_node_list_append(&nodes, node);
17057 }
17058
17059 // If the first pattern and the last pattern are rest patterns, then we
17060 // will call this a find pattern, regardless of how many rest patterns
17061 // are in between because we know we already added the appropriate
17062 // errors. Otherwise we will create an array pattern.
17063 if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
17064 node = UP(pm_find_pattern_node_create(parser, &nodes));
17065
17066 if (nodes.size == 2) {
17067 pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
17068 }
17069 } else {
17070 node = UP(pm_array_pattern_node_node_list_create(parser, &nodes));
17071
17072 if (leading_rest && trailing_rest) {
17073 pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
17074 }
17075 }
17076
17077 xfree(nodes.nodes);
17078 } else if (leading_rest) {
17079 // Otherwise, if we parsed a single splat pattern, then we know we have
17080 // an array pattern, so we can go ahead and create that node.
17081 node = UP(pm_array_pattern_node_rest_create(parser, node));
17082 }
17083
17084 return node;
17085}
17086
17092static inline void
17093parse_negative_numeric(pm_node_t *node) {
17094 switch (PM_NODE_TYPE(node)) {
17095 case PM_INTEGER_NODE: {
17096 pm_integer_node_t *cast = (pm_integer_node_t *) node;
17097 cast->base.location.start--;
17098 cast->value.negative = true;
17099 break;
17100 }
17101 case PM_FLOAT_NODE: {
17102 pm_float_node_t *cast = (pm_float_node_t *) node;
17103 cast->base.location.start--;
17104 cast->value = -cast->value;
17105 break;
17106 }
17107 case PM_RATIONAL_NODE: {
17108 pm_rational_node_t *cast = (pm_rational_node_t *) node;
17109 cast->base.location.start--;
17110 cast->numerator.negative = true;
17111 break;
17112 }
17113 case PM_IMAGINARY_NODE:
17114 node->location.start--;
17115 parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
17116 break;
17117 default:
17118 assert(false && "unreachable");
17119 break;
17120 }
17121}
17122
17128static void
17129pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
17130 switch (diag_id) {
17131 case PM_ERR_HASH_KEY: {
17132 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
17133 break;
17134 }
17135 case PM_ERR_HASH_VALUE:
17136 case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17137 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17138 break;
17139 }
17140 case PM_ERR_UNARY_RECEIVER: {
17141 const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
17142 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
17143 break;
17144 }
17145 case PM_ERR_UNARY_DISALLOWED:
17146 case PM_ERR_EXPECT_ARGUMENT: {
17147 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17148 break;
17149 }
17150 default:
17151 pm_parser_err_previous(parser, diag_id);
17152 break;
17153 }
17154}
17155
17159static void
17160parse_retry(pm_parser_t *parser, const pm_node_t *node) {
17161#define CONTEXT_NONE 0
17162#define CONTEXT_THROUGH_ENSURE 1
17163#define CONTEXT_THROUGH_ELSE 2
17164
17165 pm_context_node_t *context_node = parser->current_context;
17166 int context = CONTEXT_NONE;
17167
17168 while (context_node != NULL) {
17169 switch (context_node->context) {
17177 case PM_CONTEXT_DEFINED:
17179 // These are the good cases. We're allowed to have a retry here.
17180 return;
17181 case PM_CONTEXT_CLASS:
17182 case PM_CONTEXT_DEF:
17184 case PM_CONTEXT_MAIN:
17185 case PM_CONTEXT_MODULE:
17186 case PM_CONTEXT_PREEXE:
17187 case PM_CONTEXT_SCLASS:
17188 // These are the bad cases. We're not allowed to have a retry in
17189 // these contexts.
17190 if (context == CONTEXT_NONE) {
17191 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE);
17192 } else if (context == CONTEXT_THROUGH_ENSURE) {
17193 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ENSURE);
17194 } else if (context == CONTEXT_THROUGH_ELSE) {
17195 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ELSE);
17196 }
17197 return;
17205 // These are also bad cases, but with a more specific error
17206 // message indicating the else.
17207 context = CONTEXT_THROUGH_ELSE;
17208 break;
17216 // These are also bad cases, but with a more specific error
17217 // message indicating the ensure.
17218 context = CONTEXT_THROUGH_ENSURE;
17219 break;
17220 case PM_CONTEXT_NONE:
17221 // This case should never happen.
17222 assert(false && "unreachable");
17223 break;
17224 case PM_CONTEXT_BEGIN:
17228 case PM_CONTEXT_CASE_IN:
17231 case PM_CONTEXT_ELSE:
17232 case PM_CONTEXT_ELSIF:
17233 case PM_CONTEXT_EMBEXPR:
17235 case PM_CONTEXT_FOR:
17236 case PM_CONTEXT_IF:
17241 case PM_CONTEXT_PARENS:
17242 case PM_CONTEXT_POSTEXE:
17244 case PM_CONTEXT_TERNARY:
17245 case PM_CONTEXT_UNLESS:
17246 case PM_CONTEXT_UNTIL:
17247 case PM_CONTEXT_WHILE:
17248 // In these contexts we should continue walking up the list of
17249 // contexts.
17250 break;
17251 }
17252
17253 context_node = context_node->prev;
17254 }
17255
17256#undef CONTEXT_NONE
17257#undef CONTEXT_ENSURE
17258#undef CONTEXT_ELSE
17259}
17260
17264static void
17265parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17266 pm_context_node_t *context_node = parser->current_context;
17267
17268 while (context_node != NULL) {
17269 switch (context_node->context) {
17270 case PM_CONTEXT_DEF:
17272 case PM_CONTEXT_DEFINED:
17276 // These are the good cases. We're allowed to have a block exit
17277 // in these contexts.
17278 return;
17279 case PM_CONTEXT_CLASS:
17283 case PM_CONTEXT_MAIN:
17284 case PM_CONTEXT_MODULE:
17288 case PM_CONTEXT_SCLASS:
17292 // These are the bad cases. We're not allowed to have a retry in
17293 // these contexts.
17294 pm_parser_err_node(parser, node, PM_ERR_INVALID_YIELD);
17295 return;
17296 case PM_CONTEXT_NONE:
17297 // This case should never happen.
17298 assert(false && "unreachable");
17299 break;
17300 case PM_CONTEXT_BEGIN:
17310 case PM_CONTEXT_CASE_IN:
17313 case PM_CONTEXT_ELSE:
17314 case PM_CONTEXT_ELSIF:
17315 case PM_CONTEXT_EMBEXPR:
17317 case PM_CONTEXT_FOR:
17318 case PM_CONTEXT_IF:
17326 case PM_CONTEXT_PARENS:
17327 case PM_CONTEXT_POSTEXE:
17329 case PM_CONTEXT_PREEXE:
17331 case PM_CONTEXT_TERNARY:
17332 case PM_CONTEXT_UNLESS:
17333 case PM_CONTEXT_UNTIL:
17334 case PM_CONTEXT_WHILE:
17335 // In these contexts we should continue walking up the list of
17336 // contexts.
17337 break;
17338 }
17339
17340 context_node = context_node->prev;
17341 }
17342}
17343
17348typedef struct {
17351
17353 const uint8_t *start;
17354
17356 const uint8_t *end;
17357
17366
17371static void
17372parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
17374 pm_location_t location;
17375
17376 if (callback_data->shared) {
17377 location = (pm_location_t) { .start = start, .end = end };
17378 } else {
17379 location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
17380 }
17381
17382 PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
17383}
17384
17388static void
17389parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
17390 const pm_string_t *unescaped = &node->unescaped;
17392 .parser = parser,
17393 .start = node->base.location.start,
17394 .end = node->base.location.end,
17395 .shared = unescaped->type == PM_STRING_SHARED
17396 };
17397
17398 pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED), NULL, NULL, parse_regular_expression_error, &error_data);
17399}
17400
17404static inline pm_node_t *
17405parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
17406 switch (parser->current.type) {
17407 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
17408 parser_lex(parser);
17409
17410 pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
17411 pm_accepts_block_stack_push(parser, true);
17412 bool parsed_bare_hash = false;
17413
17414 while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
17415 bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
17416
17417 // Handle the case where we don't have a comma and we have a
17418 // newline followed by a right bracket.
17419 if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17420 break;
17421 }
17422
17423 // Ensure that we have a comma between elements in the array.
17424 if (array->elements.size > 0) {
17425 if (accept1(parser, PM_TOKEN_COMMA)) {
17426 // If there was a comma but we also accepts a newline,
17427 // then this is a syntax error.
17428 if (accepted_newline) {
17429 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
17430 }
17431 } else {
17432 // If there was no comma, then we need to add a syntax
17433 // error.
17434 const uint8_t *location = parser->previous.end;
17435 PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
17436
17437 parser->previous.start = location;
17438 parser->previous.type = PM_TOKEN_MISSING;
17439 }
17440 }
17441
17442 // If we have a right bracket immediately following a comma,
17443 // this is allowed since it's a trailing comma. In this case we
17444 // can break out of the loop.
17445 if (match1(parser, PM_TOKEN_BRACKET_RIGHT)) break;
17446
17447 pm_node_t *element;
17448
17449 if (accept1(parser, PM_TOKEN_USTAR)) {
17450 pm_token_t operator = parser->previous;
17451 pm_node_t *expression = NULL;
17452
17453 if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
17454 pm_parser_scope_forwarding_positionals_check(parser, &operator);
17455 } else {
17456 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
17457 }
17458
17459 element = UP(pm_splat_node_create(parser, &operator, expression));
17460 } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
17461 if (parsed_bare_hash) {
17462 pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
17463 }
17464
17465 element = UP(pm_keyword_hash_node_create(parser));
17466 pm_static_literals_t hash_keys = { 0 };
17467
17468 if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
17469 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
17470 }
17471
17472 pm_static_literals_free(&hash_keys);
17473 parsed_bare_hash = true;
17474 } else {
17475 element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
17476
17477 if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
17478 if (parsed_bare_hash) {
17479 pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
17480 }
17481
17482 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
17483 pm_static_literals_t hash_keys = { 0 };
17484 pm_hash_key_static_literals_add(parser, &hash_keys, element);
17485
17486 pm_token_t operator;
17487 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
17488 operator = parser->previous;
17489 } else {
17490 operator = not_provided(parser);
17491 }
17492
17493 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
17494 pm_node_t *assoc = UP(pm_assoc_node_create(parser, element, &operator, value));
17495 pm_keyword_hash_node_elements_append(hash, assoc);
17496
17497 element = UP(hash);
17498 if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17499 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
17500 }
17501
17502 pm_static_literals_free(&hash_keys);
17503 parsed_bare_hash = true;
17504 }
17505 }
17506
17507 pm_array_node_elements_append(array, element);
17508 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
17509 }
17510
17511 accept1(parser, PM_TOKEN_NEWLINE);
17512
17513 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17514 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
17515 parser->previous.start = parser->previous.end;
17516 parser->previous.type = PM_TOKEN_MISSING;
17517 }
17518
17519 pm_array_node_close_set(array, &parser->previous);
17520 pm_accepts_block_stack_pop(parser);
17521
17522 return UP(array);
17523 }
17524 case PM_TOKEN_PARENTHESIS_LEFT:
17525 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
17526 pm_token_t opening = parser->current;
17527 pm_node_flags_t flags = 0;
17528
17529 pm_node_list_t current_block_exits = { 0 };
17530 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
17531
17532 parser_lex(parser);
17533 while (true) {
17534 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
17535 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17536 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
17537 break;
17538 }
17539 }
17540
17541 // If this is the end of the file or we match a right parenthesis, then
17542 // we have an empty parentheses node, and we can immediately return.
17543 if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
17544 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
17545
17546 pop_block_exits(parser, previous_block_exits);
17547 pm_node_list_free(&current_block_exits);
17548
17549 return UP(pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, flags));
17550 }
17551
17552 // Otherwise, we're going to parse the first statement in the list
17553 // of statements within the parentheses.
17554 pm_accepts_block_stack_push(parser, true);
17555 context_push(parser, PM_CONTEXT_PARENS);
17556 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
17557 context_pop(parser);
17558
17559 // Determine if this statement is followed by a terminator. In the
17560 // case of a single statement, this is fine. But in the case of
17561 // multiple statements it's required.
17562 bool terminator_found = false;
17563
17564 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
17565 terminator_found = true;
17566 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17567 } else if (accept1(parser, PM_TOKEN_NEWLINE)) {
17568 terminator_found = true;
17569 }
17570
17571 if (terminator_found) {
17572 while (true) {
17573 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
17574 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17575 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
17576 break;
17577 }
17578 }
17579 }
17580
17581 // If we hit a right parenthesis, then we're done parsing the
17582 // parentheses node, and we can check which kind of node we should
17583 // return.
17584 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
17585 if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
17586 lex_state_set(parser, PM_LEX_STATE_ENDARG);
17587 }
17588
17589 parser_lex(parser);
17590 pm_accepts_block_stack_pop(parser);
17591
17592 pop_block_exits(parser, previous_block_exits);
17593 pm_node_list_free(&current_block_exits);
17594
17595 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
17596 // If we have a single statement and are ending on a right
17597 // parenthesis, then we need to check if this is possibly a
17598 // multiple target node.
17599 pm_multi_target_node_t *multi_target;
17600
17601 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) {
17602 multi_target = (pm_multi_target_node_t *) statement;
17603 } else {
17604 multi_target = pm_multi_target_node_create(parser);
17605 pm_multi_target_node_targets_append(parser, multi_target, statement);
17606 }
17607
17608 pm_location_t lparen_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17609 pm_location_t rparen_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
17610
17611 multi_target->lparen_loc = lparen_loc;
17612 multi_target->rparen_loc = rparen_loc;
17613 multi_target->base.location.start = lparen_loc.start;
17614 multi_target->base.location.end = rparen_loc.end;
17615
17616 pm_node_t *result;
17617 if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
17618 result = parse_targets(parser, UP(multi_target), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17619 accept1(parser, PM_TOKEN_NEWLINE);
17620 } else {
17621 result = UP(multi_target);
17622 }
17623
17624 if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
17625 // All set, this is explicitly allowed by the parent
17626 // context.
17627 } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
17628 // All set, we're inside a for loop and we're parsing
17629 // multiple targets.
17630 } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
17631 // Multi targets are not allowed when it's not a
17632 // statement level.
17633 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
17634 } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
17635 // Multi targets must be followed by an equal sign in
17636 // order to be valid (or a right parenthesis if they are
17637 // nested).
17638 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
17639 }
17640
17641 return result;
17642 }
17643
17644 // If we have a single statement and are ending on a right parenthesis
17645 // and we didn't return a multiple assignment node, then we can return a
17646 // regular parentheses node now.
17647 pm_statements_node_t *statements = pm_statements_node_create(parser);
17648 pm_statements_node_body_append(parser, statements, statement, true);
17649
17650 return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, flags));
17651 }
17652
17653 // If we have more than one statement in the set of parentheses,
17654 // then we are going to parse all of them as a list of statements.
17655 // We'll do that here.
17656 context_push(parser, PM_CONTEXT_PARENS);
17657 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17658
17659 pm_statements_node_t *statements = pm_statements_node_create(parser);
17660 pm_statements_node_body_append(parser, statements, statement, true);
17661
17662 // If we didn't find a terminator and we didn't find a right
17663 // parenthesis, then this is a syntax error.
17664 if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
17665 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
17666 }
17667
17668 // Parse each statement within the parentheses.
17669 while (true) {
17670 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
17671 pm_statements_node_body_append(parser, statements, node, true);
17672
17673 // If we're recovering from a syntax error, then we need to stop
17674 // parsing the statements now.
17675 if (parser->recovering) {
17676 // If this is the level of context where the recovery has
17677 // happened, then we can mark the parser as done recovering.
17678 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
17679 break;
17680 }
17681
17682 // If we couldn't parse an expression at all, then we need to
17683 // bail out of the loop.
17684 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) break;
17685
17686 // If we successfully parsed a statement, then we are going to
17687 // need terminator to delimit them.
17688 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
17689 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
17690 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
17691 } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
17692 break;
17693 } else if (!match1(parser, PM_TOKEN_EOF)) {
17694 // If we're at the end of the file, then we're going to add
17695 // an error after this for the ) anyway.
17696 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
17697 }
17698 }
17699
17700 context_pop(parser);
17701 pm_accepts_block_stack_pop(parser);
17702 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
17703
17704 // When we're parsing multi targets, we allow them to be followed by
17705 // a right parenthesis if they are at the statement level. This is
17706 // only possible if they are the final statement in a parentheses.
17707 // We need to explicitly reject that here.
17708 {
17709 pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
17710
17711 if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
17712 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
17713 pm_multi_target_node_targets_append(parser, multi_target, statement);
17714
17715 statement = UP(multi_target);
17716 statements->body.nodes[statements->body.size - 1] = statement;
17717 }
17718
17719 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
17720 const uint8_t *offset = statement->location.end;
17721 pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
17722 pm_node_t *value = UP(pm_missing_node_create(parser, offset, offset));
17723
17724 statement = UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value));
17725 statements->body.nodes[statements->body.size - 1] = statement;
17726
17727 pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
17728 }
17729 }
17730
17731 pop_block_exits(parser, previous_block_exits);
17732 pm_node_list_free(&current_block_exits);
17733
17734 pm_void_statements_check(parser, statements, true);
17735 return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, flags));
17736 }
17737 case PM_TOKEN_BRACE_LEFT: {
17738 // If we were passed a current_hash_keys via the parser, then that
17739 // means we're already parsing a hash and we want to share the set
17740 // of hash keys with this inner hash we're about to parse for the
17741 // sake of warnings. We'll set it to NULL after we grab it to make
17742 // sure subsequent expressions don't use it. Effectively this is a
17743 // way of getting around passing it to every call to
17744 // parse_expression.
17745 pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
17746 parser->current_hash_keys = NULL;
17747
17748 pm_accepts_block_stack_push(parser, true);
17749 parser_lex(parser);
17750
17751 pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
17752
17753 if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
17754 if (current_hash_keys != NULL) {
17755 parse_assocs(parser, current_hash_keys, UP(node), (uint16_t) (depth + 1));
17756 } else {
17757 pm_static_literals_t hash_keys = { 0 };
17758 parse_assocs(parser, &hash_keys, UP(node), (uint16_t) (depth + 1));
17759 pm_static_literals_free(&hash_keys);
17760 }
17761
17762 accept1(parser, PM_TOKEN_NEWLINE);
17763 }
17764
17765 pm_accepts_block_stack_pop(parser);
17766 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
17767 pm_hash_node_closing_loc_set(node, &parser->previous);
17768
17769 return UP(node);
17770 }
17771 case PM_TOKEN_CHARACTER_LITERAL: {
17772 pm_token_t closing = not_provided(parser);
17773 pm_node_t *node = UP(pm_string_node_create_current_string(
17774 parser,
17775 &(pm_token_t) {
17776 .type = PM_TOKEN_STRING_BEGIN,
17777 .start = parser->current.start,
17778 .end = parser->current.start + 1
17779 },
17780 &(pm_token_t) {
17781 .type = PM_TOKEN_STRING_CONTENT,
17782 .start = parser->current.start + 1,
17783 .end = parser->current.end
17784 },
17785 &closing
17786 ));
17787
17788 pm_node_flag_set(node, parse_unescaped_encoding(parser));
17789
17790 // Skip past the character literal here, since now we have handled
17791 // parser->explicit_encoding correctly.
17792 parser_lex(parser);
17793
17794 // Characters can be followed by strings in which case they are
17795 // automatically concatenated.
17796 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
17797 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
17798 }
17799
17800 return node;
17801 }
17802 case PM_TOKEN_CLASS_VARIABLE: {
17803 parser_lex(parser);
17804 pm_node_t *node = UP(pm_class_variable_read_node_create(parser, &parser->previous));
17805
17806 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17807 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17808 }
17809
17810 return node;
17811 }
17812 case PM_TOKEN_CONSTANT: {
17813 parser_lex(parser);
17814 pm_token_t constant = parser->previous;
17815
17816 // If a constant is immediately followed by parentheses, then this is in
17817 // fact a method call, not a constant read.
17818 if (
17819 match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
17820 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
17821 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
17822 match1(parser, PM_TOKEN_BRACE_LEFT)
17823 ) {
17824 pm_arguments_t arguments = { 0 };
17825 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
17826 return UP(pm_call_node_fcall_create(parser, &constant, &arguments));
17827 }
17828
17829 pm_node_t *node = UP(pm_constant_read_node_create(parser, &parser->previous));
17830
17831 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17832 // If we get here, then we have a comma immediately following a
17833 // constant, so we're going to parse this as a multiple assignment.
17834 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17835 }
17836
17837 return node;
17838 }
17839 case PM_TOKEN_UCOLON_COLON: {
17840 parser_lex(parser);
17841 pm_token_t delimiter = parser->previous;
17842
17843 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17844 pm_node_t *node = UP(pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous));
17845
17846 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17847 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17848 }
17849
17850 return node;
17851 }
17852 case PM_TOKEN_UDOT_DOT:
17853 case PM_TOKEN_UDOT_DOT_DOT: {
17854 pm_token_t operator = parser->current;
17855 parser_lex(parser);
17856
17857 pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
17858
17859 // Unary .. and ... are special because these are non-associative
17860 // operators that can also be unary operators. In this case we need
17861 // to explicitly reject code that has a .. or ... that follows this
17862 // expression.
17863 if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
17864 pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
17865 }
17866
17867 return UP(pm_range_node_create(parser, NULL, &operator, right));
17868 }
17869 case PM_TOKEN_FLOAT:
17870 parser_lex(parser);
17871 return UP(pm_float_node_create(parser, &parser->previous));
17872 case PM_TOKEN_FLOAT_IMAGINARY:
17873 parser_lex(parser);
17874 return UP(pm_float_node_imaginary_create(parser, &parser->previous));
17875 case PM_TOKEN_FLOAT_RATIONAL:
17876 parser_lex(parser);
17877 return UP(pm_float_node_rational_create(parser, &parser->previous));
17878 case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
17879 parser_lex(parser);
17880 return UP(pm_float_node_rational_imaginary_create(parser, &parser->previous));
17881 case PM_TOKEN_NUMBERED_REFERENCE: {
17882 parser_lex(parser);
17883 pm_node_t *node = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
17884
17885 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17886 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17887 }
17888
17889 return node;
17890 }
17891 case PM_TOKEN_GLOBAL_VARIABLE: {
17892 parser_lex(parser);
17893 pm_node_t *node = UP(pm_global_variable_read_node_create(parser, &parser->previous));
17894
17895 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17896 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17897 }
17898
17899 return node;
17900 }
17901 case PM_TOKEN_BACK_REFERENCE: {
17902 parser_lex(parser);
17903 pm_node_t *node = UP(pm_back_reference_read_node_create(parser, &parser->previous));
17904
17905 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17906 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17907 }
17908
17909 return node;
17910 }
17911 case PM_TOKEN_IDENTIFIER:
17912 case PM_TOKEN_METHOD_NAME: {
17913 parser_lex(parser);
17914 pm_token_t identifier = parser->previous;
17915 pm_node_t *node = parse_variable_call(parser);
17916
17917 if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
17918 // If parse_variable_call returned with a call node, then we
17919 // know the identifier is not in the local table. In that case
17920 // we need to check if there are arguments following the
17921 // identifier.
17922 pm_call_node_t *call = (pm_call_node_t *) node;
17923 pm_arguments_t arguments = { 0 };
17924
17925 if (parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1))) {
17926 // Since we found arguments, we need to turn off the
17927 // variable call bit in the flags.
17928 pm_node_flag_unset(UP(call), PM_CALL_NODE_FLAGS_VARIABLE_CALL);
17929
17930 call->opening_loc = arguments.opening_loc;
17931 call->arguments = arguments.arguments;
17932 call->closing_loc = arguments.closing_loc;
17933 call->block = arguments.block;
17934
17935 const uint8_t *end = pm_arguments_end(&arguments);
17936 if (!end) {
17937 end = call->message_loc.end;
17938 }
17939 call->base.location.end = end;
17940 }
17941 } else {
17942 // Otherwise, we know the identifier is in the local table. This
17943 // can still be a method call if it is followed by arguments or
17944 // a block, so we need to check for that here.
17945 if (
17946 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
17947 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
17948 match1(parser, PM_TOKEN_BRACE_LEFT)
17949 ) {
17950 pm_arguments_t arguments = { 0 };
17951 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
17952 pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
17953
17954 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
17955 // If we're about to convert an 'it' implicit local
17956 // variable read into a method call, we need to remove
17957 // it from the list of implicit local variables.
17958 pm_node_unreference(parser, node);
17959 } else {
17960 // Otherwise, we're about to convert a regular local
17961 // variable read into a method call, in which case we
17962 // need to indicate that this was not a read for the
17963 // purposes of warnings.
17964 assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
17965
17966 if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
17967 pm_node_unreference(parser, node);
17968 } else {
17970 pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
17971 }
17972 }
17973
17974 pm_node_destroy(parser, node);
17975 return UP(fcall);
17976 }
17977 }
17978
17979 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17980 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17981 }
17982
17983 return node;
17984 }
17985 case PM_TOKEN_HEREDOC_START: {
17986 // Here we have found a heredoc. We'll parse it and add it to the
17987 // list of strings.
17988 assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
17989 pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
17990
17991 size_t common_whitespace = (size_t) -1;
17992 parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
17993
17994 parser_lex(parser);
17995 pm_token_t opening = parser->previous;
17996
17997 pm_node_t *node;
17998 pm_node_t *part;
17999
18000 if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18001 // If we get here, then we have an empty heredoc. We'll create
18002 // an empty content token and return an empty string node.
18003 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18004 pm_token_t content = parse_strings_empty_content(parser->previous.start);
18005
18006 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18007 node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
18008 } else {
18009 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
18010 }
18011
18012 node->location.end = opening.end;
18013 } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
18014 // If we get here, then we tried to find something in the
18015 // heredoc but couldn't actually parse anything, so we'll just
18016 // return a missing node.
18017 //
18018 // parse_string_part handles its own errors, so there is no need
18019 // for us to add one here.
18020 node = UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
18021 } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18022 // If we get here, then the part that we parsed was plain string
18023 // content and we're at the end of the heredoc, so we can return
18024 // just a string node with the heredoc opening and closing as
18025 // its opening and closing.
18026 pm_node_flag_set(part, parse_unescaped_encoding(parser));
18027 pm_string_node_t *cast = (pm_string_node_t *) part;
18028
18029 cast->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18030 cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current);
18031 cast->base.location = cast->opening_loc;
18032
18033 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18034 assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
18035 cast->base.type = PM_X_STRING_NODE;
18036 }
18037
18038 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18039 parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
18040 }
18041
18042 node = UP(cast);
18043 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18044 } else {
18045 // If we get here, then we have multiple parts in the heredoc,
18046 // so we'll need to create an interpolated string node to hold
18047 // them all.
18048 pm_node_list_t parts = { 0 };
18049 pm_node_list_append(&parts, part);
18050
18051 while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18052 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
18053 pm_node_list_append(&parts, part);
18054 }
18055 }
18056
18057 // Now that we have all of the parts, create the correct type of
18058 // interpolated node.
18059 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18060 pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
18061 cast->parts = parts;
18062
18063 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18064 pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
18065
18066 cast->base.location = cast->opening_loc;
18067 node = UP(cast);
18068 } else {
18069 pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
18070 pm_node_list_free(&parts);
18071
18072 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18073 pm_interpolated_string_node_closing_set(cast, &parser->previous);
18074
18075 cast->base.location = cast->opening_loc;
18076 node = UP(cast);
18077 }
18078
18079 // If this is a heredoc that is indented with a ~, then we need
18080 // to dedent each line by the common leading whitespace.
18081 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18082 pm_node_list_t *nodes;
18083 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18084 nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
18085 } else {
18086 nodes = &((pm_interpolated_string_node_t *) node)->parts;
18087 }
18088
18089 parse_heredoc_dedent(parser, nodes, common_whitespace);
18090 }
18091 }
18092
18093 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18094 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18095 }
18096
18097 return node;
18098 }
18099 case PM_TOKEN_INSTANCE_VARIABLE: {
18100 parser_lex(parser);
18101 pm_node_t *node = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
18102
18103 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18104 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18105 }
18106
18107 return node;
18108 }
18109 case PM_TOKEN_INTEGER: {
18110 pm_node_flags_t base = parser->integer_base;
18111 parser_lex(parser);
18112 return UP(pm_integer_node_create(parser, base, &parser->previous));
18113 }
18114 case PM_TOKEN_INTEGER_IMAGINARY: {
18115 pm_node_flags_t base = parser->integer_base;
18116 parser_lex(parser);
18117 return UP(pm_integer_node_imaginary_create(parser, base, &parser->previous));
18118 }
18119 case PM_TOKEN_INTEGER_RATIONAL: {
18120 pm_node_flags_t base = parser->integer_base;
18121 parser_lex(parser);
18122 return UP(pm_integer_node_rational_create(parser, base, &parser->previous));
18123 }
18124 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: {
18125 pm_node_flags_t base = parser->integer_base;
18126 parser_lex(parser);
18127 return UP(pm_integer_node_rational_imaginary_create(parser, base, &parser->previous));
18128 }
18129 case PM_TOKEN_KEYWORD___ENCODING__:
18130 parser_lex(parser);
18131 return UP(pm_source_encoding_node_create(parser, &parser->previous));
18132 case PM_TOKEN_KEYWORD___FILE__:
18133 parser_lex(parser);
18134 return UP(pm_source_file_node_create(parser, &parser->previous));
18135 case PM_TOKEN_KEYWORD___LINE__:
18136 parser_lex(parser);
18137 return UP(pm_source_line_node_create(parser, &parser->previous));
18138 case PM_TOKEN_KEYWORD_ALIAS: {
18139 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18140 pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
18141 }
18142
18143 parser_lex(parser);
18144 pm_token_t keyword = parser->previous;
18145
18146 pm_node_t *new_name = parse_alias_argument(parser, true, (uint16_t) (depth + 1));
18147 pm_node_t *old_name = parse_alias_argument(parser, false, (uint16_t) (depth + 1));
18148
18149 switch (PM_NODE_TYPE(new_name)) {
18150 case PM_BACK_REFERENCE_READ_NODE:
18151 case PM_NUMBERED_REFERENCE_READ_NODE:
18152 case PM_GLOBAL_VARIABLE_READ_NODE: {
18153 if (PM_NODE_TYPE_P(old_name, PM_BACK_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_GLOBAL_VARIABLE_READ_NODE)) {
18154 if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) {
18155 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
18156 }
18157 } else {
18158 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18159 }
18160
18161 return UP(pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name));
18162 }
18163 case PM_SYMBOL_NODE:
18164 case PM_INTERPOLATED_SYMBOL_NODE: {
18165 if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE)) {
18166 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18167 }
18168 }
18170 default:
18171 return UP(pm_alias_method_node_create(parser, &keyword, new_name, old_name));
18172 }
18173 }
18174 case PM_TOKEN_KEYWORD_CASE: {
18175 size_t opening_newline_index = token_newline_index(parser);
18176 parser_lex(parser);
18177
18178 pm_token_t case_keyword = parser->previous;
18179 pm_node_t *predicate = NULL;
18180
18181 pm_node_list_t current_block_exits = { 0 };
18182 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18183
18184 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18185 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18186 predicate = NULL;
18187 } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
18188 predicate = NULL;
18189 } else if (!token_begins_expression_p(parser->current.type)) {
18190 predicate = NULL;
18191 } else {
18192 predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
18193 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18194 }
18195
18196 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
18197 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18198 parser_lex(parser);
18199
18200 pop_block_exits(parser, previous_block_exits);
18201 pm_node_list_free(&current_block_exits);
18202
18203 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18204 return UP(pm_case_node_create(parser, &case_keyword, predicate, &parser->previous));
18205 }
18206
18207 // At this point we can create a case node, though we don't yet know
18208 // if it is a case-in or case-when node.
18209 pm_token_t end_keyword = not_provided(parser);
18210 pm_node_t *node;
18211
18212 if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18213 pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword);
18214 pm_static_literals_t literals = { 0 };
18215
18216 // At this point we've seen a when keyword, so we know this is a
18217 // case-when node. We will continue to parse the when nodes
18218 // until we hit the end of the list.
18219 while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18220 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18221 parser_lex(parser);
18222
18223 pm_token_t when_keyword = parser->previous;
18224 pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
18225
18226 do {
18227 if (accept1(parser, PM_TOKEN_USTAR)) {
18228 pm_token_t operator = parser->previous;
18229 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18230
18231 pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
18232 pm_when_node_conditions_append(when_node, UP(splat_node));
18233
18234 if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
18235 } else {
18236 pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
18237 pm_when_node_conditions_append(when_node, condition);
18238
18239 // If we found a missing node, then this is a syntax
18240 // error and we should stop looping.
18241 if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
18242
18243 // If this is a string node, then we need to mark it
18244 // as frozen because when clause strings are frozen.
18245 if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
18246 pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
18247 } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
18248 pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
18249 }
18250
18251 pm_when_clause_static_literals_add(parser, &literals, condition);
18252 }
18253 } while (accept1(parser, PM_TOKEN_COMMA));
18254
18255 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18256 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18257 pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18258 }
18259 } else {
18260 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
18261 pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18262 }
18263
18264 if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18265 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
18266 if (statements != NULL) {
18267 pm_when_node_statements_set(when_node, statements);
18268 }
18269 }
18270
18271 pm_case_node_condition_append(case_node, UP(when_node));
18272 }
18273
18274 // If we didn't parse any conditions (in or when) then we need
18275 // to indicate that we have an error.
18276 if (case_node->conditions.size == 0) {
18277 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18278 }
18279
18280 pm_static_literals_free(&literals);
18281 node = UP(case_node);
18282 } else {
18283 pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword);
18284
18285 // If this is a case-match node (i.e., it is a pattern matching
18286 // case statement) then we must have a predicate.
18287 if (predicate == NULL) {
18288 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
18289 }
18290
18291 // At this point we expect that we're parsing a case-in node. We
18292 // will continue to parse the in nodes until we hit the end of
18293 // the list.
18294 while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
18295 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18296
18297 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
18298 parser->pattern_matching_newlines = true;
18299
18300 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
18301 parser->command_start = false;
18302 parser_lex(parser);
18303
18304 pm_token_t in_keyword = parser->previous;
18305
18306 pm_constant_id_list_t captures = { 0 };
18307 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
18308
18309 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
18310 pm_constant_id_list_free(&captures);
18311
18312 // Since we're in the top-level of the case-in node we need
18313 // to check for guard clauses in the form of `if` or
18314 // `unless` statements.
18315 if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
18316 pm_token_t keyword = parser->previous;
18317 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
18318 pattern = UP(pm_if_node_modifier_create(parser, pattern, &keyword, predicate));
18319 } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
18320 pm_token_t keyword = parser->previous;
18321 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
18322 pattern = UP(pm_unless_node_modifier_create(parser, pattern, &keyword, predicate));
18323 }
18324
18325 // Now we need to check for the terminator of the in node's
18326 // pattern. It can be a newline or semicolon optionally
18327 // followed by a `then` keyword.
18328 pm_token_t then_keyword;
18329 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18330 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18331 then_keyword = parser->previous;
18332 } else {
18333 then_keyword = not_provided(parser);
18334 }
18335 } else {
18336 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
18337 then_keyword = parser->previous;
18338 }
18339
18340 // Now we can actually parse the statements associated with
18341 // the in node.
18342 pm_statements_node_t *statements;
18343 if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18344 statements = NULL;
18345 } else {
18346 statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
18347 }
18348
18349 // Now that we have the full pattern and statements, we can
18350 // create the node and attach it to the case node.
18351 pm_node_t *condition = UP(pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword));
18352 pm_case_match_node_condition_append(case_node, condition);
18353 }
18354
18355 // If we didn't parse any conditions (in or when) then we need
18356 // to indicate that we have an error.
18357 if (case_node->conditions.size == 0) {
18358 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18359 }
18360
18361 node = UP(case_node);
18362 }
18363
18364 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18365 if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
18366 pm_token_t else_keyword = parser->previous;
18367 pm_else_node_t *else_node;
18368
18369 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
18370 else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
18371 } else {
18372 else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
18373 }
18374
18375 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18376 pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
18377 } else {
18378 pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
18379 }
18380 }
18381
18382 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18383 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM);
18384
18385 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18386 pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous);
18387 } else {
18388 pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous);
18389 }
18390
18391 pop_block_exits(parser, previous_block_exits);
18392 pm_node_list_free(&current_block_exits);
18393
18394 return node;
18395 }
18396 case PM_TOKEN_KEYWORD_BEGIN: {
18397 size_t opening_newline_index = token_newline_index(parser);
18398 parser_lex(parser);
18399
18400 pm_token_t begin_keyword = parser->previous;
18401 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18402
18403 pm_node_list_t current_block_exits = { 0 };
18404 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18405 pm_statements_node_t *begin_statements = NULL;
18406
18407 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18408 pm_accepts_block_stack_push(parser, true);
18409 begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN, (uint16_t) (depth + 1));
18410 pm_accepts_block_stack_pop(parser);
18411 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18412 }
18413
18414 pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
18415 parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
18416 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM);
18417
18418 begin_node->base.location.end = parser->previous.end;
18419 pm_begin_node_end_keyword_set(begin_node, &parser->previous);
18420
18421 pop_block_exits(parser, previous_block_exits);
18422 pm_node_list_free(&current_block_exits);
18423
18424 return UP(begin_node);
18425 }
18426 case PM_TOKEN_KEYWORD_BEGIN_UPCASE: {
18427 pm_node_list_t current_block_exits = { 0 };
18428 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18429
18430 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18431 pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
18432 }
18433
18434 parser_lex(parser);
18435 pm_token_t keyword = parser->previous;
18436
18437 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE);
18438 pm_token_t opening = parser->previous;
18439 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
18440
18441 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM);
18442 pm_context_t context = parser->current_context->context;
18443 if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
18444 pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
18445 }
18446
18447 flush_block_exits(parser, previous_block_exits);
18448 pm_node_list_free(&current_block_exits);
18449
18450 return UP(pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
18451 }
18452 case PM_TOKEN_KEYWORD_BREAK:
18453 case PM_TOKEN_KEYWORD_NEXT:
18454 case PM_TOKEN_KEYWORD_RETURN: {
18455 parser_lex(parser);
18456
18457 pm_token_t keyword = parser->previous;
18458 pm_arguments_t arguments = { 0 };
18459
18460 if (
18461 token_begins_expression_p(parser->current.type) ||
18462 match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)
18463 ) {
18464 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
18465
18466 if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
18467 pm_token_t next = parser->current;
18468 parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, (uint16_t) (depth + 1));
18469
18470 // Reject `foo && return bar`.
18471 if (!accepts_command_call && arguments.arguments != NULL) {
18472 PM_PARSER_ERR_TOKEN_FORMAT(parser, next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(next.type));
18473 }
18474 }
18475 }
18476
18477 switch (keyword.type) {
18478 case PM_TOKEN_KEYWORD_BREAK: {
18479 pm_node_t *node = UP(pm_break_node_create(parser, &keyword, arguments.arguments));
18480 if (!parser->partial_script) parse_block_exit(parser, node);
18481 return node;
18482 }
18483 case PM_TOKEN_KEYWORD_NEXT: {
18484 pm_node_t *node = UP(pm_next_node_create(parser, &keyword, arguments.arguments));
18485 if (!parser->partial_script) parse_block_exit(parser, node);
18486 return node;
18487 }
18488 case PM_TOKEN_KEYWORD_RETURN: {
18489 pm_node_t *node = UP(pm_return_node_create(parser, &keyword, arguments.arguments));
18490 parse_return(parser, node);
18491 return node;
18492 }
18493 default:
18494 assert(false && "unreachable");
18495 return UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
18496 }
18497 }
18498 case PM_TOKEN_KEYWORD_SUPER: {
18499 parser_lex(parser);
18500
18501 pm_token_t keyword = parser->previous;
18502 pm_arguments_t arguments = { 0 };
18503 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18504
18505 if (
18506 arguments.opening_loc.start == NULL &&
18507 arguments.arguments == NULL &&
18508 ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
18509 ) {
18510 return UP(pm_forwarding_super_node_create(parser, &keyword, &arguments));
18511 }
18512
18513 return UP(pm_super_node_create(parser, &keyword, &arguments));
18514 }
18515 case PM_TOKEN_KEYWORD_YIELD: {
18516 parser_lex(parser);
18517
18518 pm_token_t keyword = parser->previous;
18519 pm_arguments_t arguments = { 0 };
18520 parse_arguments_list(parser, &arguments, false, accepts_command_call, (uint16_t) (depth + 1));
18521
18522 // It's possible that we've parsed a block argument through our
18523 // call to parse_arguments_list. If we found one, we should mark it
18524 // as invalid and destroy it, as we don't have a place for it on the
18525 // yield node.
18526 if (arguments.block != NULL) {
18527 pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
18528 pm_node_unreference(parser, arguments.block);
18529 pm_node_destroy(parser, arguments.block);
18530 arguments.block = NULL;
18531 }
18532
18533 pm_node_t *node = UP(pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc));
18534 if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
18535
18536 return node;
18537 }
18538 case PM_TOKEN_KEYWORD_CLASS: {
18539 size_t opening_newline_index = token_newline_index(parser);
18540 parser_lex(parser);
18541
18542 pm_token_t class_keyword = parser->previous;
18543 pm_do_loop_stack_push(parser, false);
18544
18545 pm_node_list_t current_block_exits = { 0 };
18546 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18547
18548 if (accept1(parser, PM_TOKEN_LESS_LESS)) {
18549 pm_token_t operator = parser->previous;
18550 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
18551
18552 pm_parser_scope_push(parser, true);
18553 if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18554 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type));
18555 }
18556
18557 pm_node_t *statements = NULL;
18558 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18559 pm_accepts_block_stack_push(parser, true);
18560 statements = UP(parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1)));
18561 pm_accepts_block_stack_pop(parser);
18562 }
18563
18564 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
18565 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18566 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1)));
18567 } else {
18568 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
18569 }
18570
18571 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
18572
18573 pm_constant_id_list_t locals;
18574 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18575
18576 pm_parser_scope_pop(parser);
18577 pm_do_loop_stack_pop(parser);
18578
18579 flush_block_exits(parser, previous_block_exits);
18580 pm_node_list_free(&current_block_exits);
18581
18582 return UP(pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous));
18583 }
18584
18585 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
18586 pm_token_t name = parser->previous;
18587 if (name.type != PM_TOKEN_CONSTANT) {
18588 pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
18589 }
18590
18591 pm_token_t inheritance_operator;
18592 pm_node_t *superclass;
18593
18594 if (match1(parser, PM_TOKEN_LESS)) {
18595 inheritance_operator = parser->current;
18596 lex_state_set(parser, PM_LEX_STATE_BEG);
18597
18598 parser->command_start = true;
18599 parser_lex(parser);
18600
18601 superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
18602 } else {
18603 inheritance_operator = not_provided(parser);
18604 superclass = NULL;
18605 }
18606
18607 pm_parser_scope_push(parser, true);
18608
18609 if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
18610 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
18611 } else {
18612 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18613 }
18614 pm_node_t *statements = NULL;
18615
18616 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18617 pm_accepts_block_stack_push(parser, true);
18618 statements = UP(parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1)));
18619 pm_accepts_block_stack_pop(parser);
18620 }
18621
18622 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
18623 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18624 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1)));
18625 } else {
18626 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
18627 }
18628
18629 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
18630
18631 if (context_def_p(parser)) {
18632 pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
18633 }
18634
18635 pm_constant_id_list_t locals;
18636 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18637
18638 pm_parser_scope_pop(parser);
18639 pm_do_loop_stack_pop(parser);
18640
18641 if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
18642 pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
18643 }
18644
18645 pop_block_exits(parser, previous_block_exits);
18646 pm_node_list_free(&current_block_exits);
18647
18648 return UP(pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous));
18649 }
18650 case PM_TOKEN_KEYWORD_DEF: {
18651 pm_node_list_t current_block_exits = { 0 };
18652 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18653
18654 pm_token_t def_keyword = parser->current;
18655 size_t opening_newline_index = token_newline_index(parser);
18656
18657 pm_node_t *receiver = NULL;
18658 pm_token_t operator = not_provided(parser);
18659 pm_token_t name;
18660
18661 // This context is necessary for lexing `...` in a bare params
18662 // correctly. It must be pushed before lexing the first param, so it
18663 // is here.
18664 context_push(parser, PM_CONTEXT_DEF_PARAMS);
18665 parser_lex(parser);
18666
18667 // This will be false if the method name is not a valid identifier
18668 // but could be followed by an operator.
18669 bool valid_name = true;
18670
18671 switch (parser->current.type) {
18672 case PM_CASE_OPERATOR:
18673 pm_parser_scope_push(parser, true);
18674 lex_state_set(parser, PM_LEX_STATE_ENDFN);
18675 parser_lex(parser);
18676
18677 name = parser->previous;
18678 break;
18679 case PM_TOKEN_IDENTIFIER: {
18680 parser_lex(parser);
18681
18682 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
18683 receiver = parse_variable_call(parser);
18684
18685 pm_parser_scope_push(parser, true);
18686 lex_state_set(parser, PM_LEX_STATE_FNAME);
18687 parser_lex(parser);
18688
18689 operator = parser->previous;
18690 name = parse_method_definition_name(parser);
18691 } else {
18692 pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
18693 pm_parser_scope_push(parser, true);
18694
18695 name = parser->previous;
18696 }
18697
18698 break;
18699 }
18700 case PM_TOKEN_INSTANCE_VARIABLE:
18701 case PM_TOKEN_CLASS_VARIABLE:
18702 case PM_TOKEN_GLOBAL_VARIABLE:
18703 valid_name = false;
18705 case PM_TOKEN_CONSTANT:
18706 case PM_TOKEN_KEYWORD_NIL:
18707 case PM_TOKEN_KEYWORD_SELF:
18708 case PM_TOKEN_KEYWORD_TRUE:
18709 case PM_TOKEN_KEYWORD_FALSE:
18710 case PM_TOKEN_KEYWORD___FILE__:
18711 case PM_TOKEN_KEYWORD___LINE__:
18712 case PM_TOKEN_KEYWORD___ENCODING__: {
18713 pm_parser_scope_push(parser, true);
18714 parser_lex(parser);
18715
18716 pm_token_t identifier = parser->previous;
18717
18718 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
18719 lex_state_set(parser, PM_LEX_STATE_FNAME);
18720 parser_lex(parser);
18721 operator = parser->previous;
18722
18723 switch (identifier.type) {
18724 case PM_TOKEN_CONSTANT:
18725 receiver = UP(pm_constant_read_node_create(parser, &identifier));
18726 break;
18727 case PM_TOKEN_INSTANCE_VARIABLE:
18728 receiver = UP(pm_instance_variable_read_node_create(parser, &identifier));
18729 break;
18730 case PM_TOKEN_CLASS_VARIABLE:
18731 receiver = UP(pm_class_variable_read_node_create(parser, &identifier));
18732 break;
18733 case PM_TOKEN_GLOBAL_VARIABLE:
18734 receiver = UP(pm_global_variable_read_node_create(parser, &identifier));
18735 break;
18736 case PM_TOKEN_KEYWORD_NIL:
18737 receiver = UP(pm_nil_node_create(parser, &identifier));
18738 break;
18739 case PM_TOKEN_KEYWORD_SELF:
18740 receiver = UP(pm_self_node_create(parser, &identifier));
18741 break;
18742 case PM_TOKEN_KEYWORD_TRUE:
18743 receiver = UP(pm_true_node_create(parser, &identifier));
18744 break;
18745 case PM_TOKEN_KEYWORD_FALSE:
18746 receiver = UP(pm_false_node_create(parser, &identifier));
18747 break;
18748 case PM_TOKEN_KEYWORD___FILE__:
18749 receiver = UP(pm_source_file_node_create(parser, &identifier));
18750 break;
18751 case PM_TOKEN_KEYWORD___LINE__:
18752 receiver = UP(pm_source_line_node_create(parser, &identifier));
18753 break;
18754 case PM_TOKEN_KEYWORD___ENCODING__:
18755 receiver = UP(pm_source_encoding_node_create(parser, &identifier));
18756 break;
18757 default:
18758 break;
18759 }
18760
18761 name = parse_method_definition_name(parser);
18762 } else {
18763 if (!valid_name) {
18764 PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
18765 }
18766
18767 name = identifier;
18768 }
18769 break;
18770 }
18771 case PM_TOKEN_PARENTHESIS_LEFT: {
18772 // The current context is `PM_CONTEXT_DEF_PARAMS`, however
18773 // the inner expression of this parenthesis should not be
18774 // processed under this context. Thus, the context is popped
18775 // here.
18776 context_pop(parser);
18777 parser_lex(parser);
18778
18779 pm_token_t lparen = parser->previous;
18780 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
18781
18782 accept1(parser, PM_TOKEN_NEWLINE);
18783 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18784 pm_token_t rparen = parser->previous;
18785
18786 lex_state_set(parser, PM_LEX_STATE_FNAME);
18787 expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
18788
18789 operator = parser->previous;
18790 receiver = UP(pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0));
18791
18792 // To push `PM_CONTEXT_DEF_PARAMS` again is for the same
18793 // reason as described the above.
18794 pm_parser_scope_push(parser, true);
18795 context_push(parser, PM_CONTEXT_DEF_PARAMS);
18796 name = parse_method_definition_name(parser);
18797 break;
18798 }
18799 default:
18800 pm_parser_scope_push(parser, true);
18801 name = parse_method_definition_name(parser);
18802 break;
18803 }
18804
18805 pm_token_t lparen;
18806 pm_token_t rparen;
18807 pm_parameters_node_t *params;
18808
18809 bool accept_endless_def = true;
18810 switch (parser->current.type) {
18811 case PM_TOKEN_PARENTHESIS_LEFT: {
18812 parser_lex(parser);
18813 lparen = parser->previous;
18814
18815 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18816 params = NULL;
18817 } else {
18818 params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1));
18819 }
18820
18821 lex_state_set(parser, PM_LEX_STATE_BEG);
18822 parser->command_start = true;
18823
18824 context_pop(parser);
18825 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18826 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
18827 parser->previous.start = parser->previous.end;
18828 parser->previous.type = PM_TOKEN_MISSING;
18829 }
18830
18831 rparen = parser->previous;
18832 break;
18833 }
18834 case PM_CASE_PARAMETER: {
18835 // If we're about to lex a label, we need to add the label
18836 // state to make sure the next newline is ignored.
18837 if (parser->current.type == PM_TOKEN_LABEL) {
18838 lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
18839 }
18840
18841 lparen = not_provided(parser);
18842 rparen = not_provided(parser);
18843 params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1));
18844
18845 // Reject `def * = 1` and similar. We have to specifically check
18846 // for them because they create ambiguity with optional arguments.
18847 accept_endless_def = false;
18848
18849 context_pop(parser);
18850 break;
18851 }
18852 default: {
18853 lparen = not_provided(parser);
18854 rparen = not_provided(parser);
18855 params = NULL;
18856
18857 context_pop(parser);
18858 break;
18859 }
18860 }
18861
18862 pm_node_t *statements = NULL;
18863 pm_token_t equal;
18864 pm_token_t end_keyword;
18865
18866 if (accept1(parser, PM_TOKEN_EQUAL)) {
18867 if (token_is_setter_name(&name)) {
18868 pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
18869 }
18870 if (!accept_endless_def) {
18871 pm_parser_err_previous(parser, PM_ERR_DEF_ENDLESS_PARAMETERS);
18872 }
18873 if (
18876 ) {
18877 PM_PARSER_ERR_FORMAT(parser, def_keyword.start, parser->previous.end, PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE, "endless method definition");
18878 }
18879 equal = parser->previous;
18880
18881 context_push(parser, PM_CONTEXT_DEF);
18882 pm_do_loop_stack_push(parser, false);
18883 statements = UP(pm_statements_node_create(parser));
18884
18885 bool allow_command_call;
18886 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
18887 allow_command_call = accepts_command_call;
18888 } else {
18889 // Allow `def foo = puts "Hello"` but not `private def foo = puts "Hello"`
18890 allow_command_call = binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION;
18891 }
18892
18893 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_command_call, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
18894
18895 if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
18896 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
18897
18898 pm_token_t rescue_keyword = parser->previous;
18899 pm_node_t *value = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
18900 context_pop(parser);
18901
18902 statement = UP(pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value));
18903 }
18904
18905 pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
18906 pm_do_loop_stack_pop(parser);
18907 context_pop(parser);
18908 end_keyword = not_provided(parser);
18909 } else {
18910 equal = not_provided(parser);
18911
18912 if (lparen.type == PM_TOKEN_NOT_PROVIDED) {
18913 lex_state_set(parser, PM_LEX_STATE_BEG);
18914 parser->command_start = true;
18915 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
18916 } else {
18917 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18918 }
18919
18920 pm_accepts_block_stack_push(parser, true);
18921 pm_do_loop_stack_push(parser, false);
18922
18923 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18924 pm_accepts_block_stack_push(parser, true);
18925 statements = UP(parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1)));
18926 pm_accepts_block_stack_pop(parser);
18927 }
18928
18929 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
18930 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18931 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1)));
18932 } else {
18933 parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
18934 }
18935
18936 pm_accepts_block_stack_pop(parser);
18937 pm_do_loop_stack_pop(parser);
18938
18939 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM);
18940 end_keyword = parser->previous;
18941 }
18942
18943 pm_constant_id_list_t locals;
18944 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18945 pm_parser_scope_pop(parser);
18946
18952 pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
18953
18954 flush_block_exits(parser, previous_block_exits);
18955 pm_node_list_free(&current_block_exits);
18956
18957 return UP(pm_def_node_create(
18958 parser,
18959 name_id,
18960 &name,
18961 receiver,
18962 params,
18963 statements,
18964 &locals,
18965 &def_keyword,
18966 &operator,
18967 &lparen,
18968 &rparen,
18969 &equal,
18970 &end_keyword
18971 ));
18972 }
18973 case PM_TOKEN_KEYWORD_DEFINED: {
18974 parser_lex(parser);
18975 pm_token_t keyword = parser->previous;
18976
18977 pm_token_t lparen;
18978 pm_token_t rparen;
18979 pm_node_t *expression;
18980
18981 context_push(parser, PM_CONTEXT_DEFINED);
18982 bool newline = accept1(parser, PM_TOKEN_NEWLINE);
18983
18984 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
18985 lparen = parser->previous;
18986
18987 if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18988 expression = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
18989 lparen = not_provided(parser);
18990 rparen = not_provided(parser);
18991 } else {
18992 expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
18993
18994 if (parser->recovering) {
18995 rparen = not_provided(parser);
18996 } else {
18997 accept1(parser, PM_TOKEN_NEWLINE);
18998 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18999 rparen = parser->previous;
19000 }
19001 }
19002 } else {
19003 lparen = not_provided(parser);
19004 rparen = not_provided(parser);
19005 expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19006 }
19007
19008 context_pop(parser);
19009 return UP(pm_defined_node_create(
19010 parser,
19011 &lparen,
19012 expression,
19013 &rparen,
19014 &keyword
19015 ));
19016 }
19017 case PM_TOKEN_KEYWORD_END_UPCASE: {
19018 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19019 pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
19020 }
19021
19022 parser_lex(parser);
19023 pm_token_t keyword = parser->previous;
19024
19025 if (context_def_p(parser)) {
19026 pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
19027 }
19028
19029 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
19030 pm_token_t opening = parser->previous;
19031 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
19032
19033 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM);
19034 return UP(pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
19035 }
19036 case PM_TOKEN_KEYWORD_FALSE:
19037 parser_lex(parser);
19038 return UP(pm_false_node_create(parser, &parser->previous));
19039 case PM_TOKEN_KEYWORD_FOR: {
19040 size_t opening_newline_index = token_newline_index(parser);
19041 parser_lex(parser);
19042
19043 pm_token_t for_keyword = parser->previous;
19044 pm_node_t *index;
19045
19046 context_push(parser, PM_CONTEXT_FOR_INDEX);
19047
19048 // First, parse out the first index expression.
19049 if (accept1(parser, PM_TOKEN_USTAR)) {
19050 pm_token_t star_operator = parser->previous;
19051 pm_node_t *name = NULL;
19052
19053 if (token_begins_expression_p(parser->current.type)) {
19054 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19055 }
19056
19057 index = UP(pm_splat_node_create(parser, &star_operator, name));
19058 } else if (token_begins_expression_p(parser->current.type)) {
19059 index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
19060 } else {
19061 pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
19062 index = UP(pm_missing_node_create(parser, for_keyword.start, for_keyword.end));
19063 }
19064
19065 // Now, if there are multiple index expressions, parse them out.
19066 if (match1(parser, PM_TOKEN_COMMA)) {
19067 index = parse_targets(parser, index, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19068 } else {
19069 index = parse_target(parser, index, false, false);
19070 }
19071
19072 context_pop(parser);
19073 pm_do_loop_stack_push(parser, true);
19074
19075 expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
19076 pm_token_t in_keyword = parser->previous;
19077
19078 pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
19079 pm_do_loop_stack_pop(parser);
19080
19081 pm_token_t do_keyword;
19082 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19083 do_keyword = parser->previous;
19084 } else {
19085 do_keyword = not_provided(parser);
19086 if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
19087 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type));
19088 }
19089 }
19090
19091 pm_statements_node_t *statements = NULL;
19092 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19093 statements = parse_statements(parser, PM_CONTEXT_FOR, (uint16_t) (depth + 1));
19094 }
19095
19096 parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
19097 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM);
19098
19099 return UP(pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous));
19100 }
19101 case PM_TOKEN_KEYWORD_IF:
19102 if (parser_end_of_line_p(parser)) {
19103 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
19104 }
19105
19106 size_t opening_newline_index = token_newline_index(parser);
19107 bool if_after_else = parser->previous.type == PM_TOKEN_KEYWORD_ELSE;
19108 parser_lex(parser);
19109
19110 return parse_conditional(parser, PM_CONTEXT_IF, opening_newline_index, if_after_else, (uint16_t) (depth + 1));
19111 case PM_TOKEN_KEYWORD_UNDEF: {
19112 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19113 pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
19114 }
19115
19116 parser_lex(parser);
19117 pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
19118 pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19119
19120 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19121 pm_node_destroy(parser, name);
19122 } else {
19123 pm_undef_node_append(undef, name);
19124
19125 while (match1(parser, PM_TOKEN_COMMA)) {
19126 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
19127 parser_lex(parser);
19128 name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19129
19130 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19131 pm_node_destroy(parser, name);
19132 break;
19133 }
19134
19135 pm_undef_node_append(undef, name);
19136 }
19137 }
19138
19139 return UP(undef);
19140 }
19141 case PM_TOKEN_KEYWORD_NOT: {
19142 parser_lex(parser);
19143
19144 pm_token_t message = parser->previous;
19145 pm_arguments_t arguments = { 0 };
19146 pm_node_t *receiver = NULL;
19147
19148 // If we do not accept a command call, then we also do not accept a
19149 // not without parentheses. In this case we need to reject this
19150 // syntax.
19151 if (!accepts_command_call && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19152 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) {
19153 pm_parser_err(parser, parser->previous.end, parser->previous.end + 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN);
19154 } else {
19155 accept1(parser, PM_TOKEN_NEWLINE);
19156 pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER);
19157 }
19158
19159 return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
19160 }
19161
19162 accept1(parser, PM_TOKEN_NEWLINE);
19163
19164 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19165 pm_token_t lparen = parser->previous;
19166
19167 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19168 receiver = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
19169 } else {
19170 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&lparen);
19171 receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19172
19173 if (!parser->recovering) {
19174 accept1(parser, PM_TOKEN_NEWLINE);
19175 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19176 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
19177 }
19178 }
19179 } else {
19180 receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19181 }
19182
19183 return UP(pm_call_node_not_create(parser, receiver, &message, &arguments));
19184 }
19185 case PM_TOKEN_KEYWORD_UNLESS: {
19186 size_t opening_newline_index = token_newline_index(parser);
19187 parser_lex(parser);
19188
19189 return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
19190 }
19191 case PM_TOKEN_KEYWORD_MODULE: {
19192 pm_node_list_t current_block_exits = { 0 };
19193 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19194
19195 size_t opening_newline_index = token_newline_index(parser);
19196 parser_lex(parser);
19197 pm_token_t module_keyword = parser->previous;
19198
19199 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
19200 pm_token_t name;
19201
19202 // If we can recover from a syntax error that occurred while parsing
19203 // the name of the module, then we'll handle that here.
19204 if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) {
19205 pop_block_exits(parser, previous_block_exits);
19206 pm_node_list_free(&current_block_exits);
19207
19208 pm_token_t missing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19209 return UP(pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing));
19210 }
19211
19212 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
19213 pm_token_t double_colon = parser->previous;
19214
19215 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
19216 constant_path = UP(pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous));
19217 }
19218
19219 // Here we retrieve the name of the module. If it wasn't a constant,
19220 // then it's possible that `module foo` was passed, which is a
19221 // syntax error. We handle that here as well.
19222 name = parser->previous;
19223 if (name.type != PM_TOKEN_CONSTANT) {
19224 pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
19225 }
19226
19227 pm_parser_scope_push(parser, true);
19228 accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
19229 pm_node_t *statements = NULL;
19230
19231 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19232 pm_accepts_block_stack_push(parser, true);
19233 statements = UP(parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1)));
19234 pm_accepts_block_stack_pop(parser);
19235 }
19236
19237 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
19238 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19239 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1)));
19240 } else {
19241 parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
19242 }
19243
19244 pm_constant_id_list_t locals;
19245 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19246
19247 pm_parser_scope_pop(parser);
19248 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
19249
19250 if (context_def_p(parser)) {
19251 pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
19252 }
19253
19254 pop_block_exits(parser, previous_block_exits);
19255 pm_node_list_free(&current_block_exits);
19256
19257 return UP(pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous));
19258 }
19259 case PM_TOKEN_KEYWORD_NIL:
19260 parser_lex(parser);
19261 return UP(pm_nil_node_create(parser, &parser->previous));
19262 case PM_TOKEN_KEYWORD_REDO: {
19263 parser_lex(parser);
19264
19265 pm_node_t *node = UP(pm_redo_node_create(parser, &parser->previous));
19266 if (!parser->partial_script) parse_block_exit(parser, node);
19267
19268 return node;
19269 }
19270 case PM_TOKEN_KEYWORD_RETRY: {
19271 parser_lex(parser);
19272
19273 pm_node_t *node = UP(pm_retry_node_create(parser, &parser->previous));
19274 parse_retry(parser, node);
19275
19276 return node;
19277 }
19278 case PM_TOKEN_KEYWORD_SELF:
19279 parser_lex(parser);
19280 return UP(pm_self_node_create(parser, &parser->previous));
19281 case PM_TOKEN_KEYWORD_TRUE:
19282 parser_lex(parser);
19283 return UP(pm_true_node_create(parser, &parser->previous));
19284 case PM_TOKEN_KEYWORD_UNTIL: {
19285 size_t opening_newline_index = token_newline_index(parser);
19286
19287 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19288 pm_do_loop_stack_push(parser, true);
19289
19290 parser_lex(parser);
19291 pm_token_t keyword = parser->previous;
19292 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
19293
19294 pm_do_loop_stack_pop(parser);
19295 context_pop(parser);
19296
19297 pm_token_t do_keyword;
19298 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19299 do_keyword = parser->previous;
19300 } else {
19301 do_keyword = not_provided(parser);
19302 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19303 }
19304
19305 pm_statements_node_t *statements = NULL;
19306 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19307 pm_accepts_block_stack_push(parser, true);
19308 statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
19309 pm_accepts_block_stack_pop(parser);
19310 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19311 }
19312
19313 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19314 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
19315
19316 return UP(pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0));
19317 }
19318 case PM_TOKEN_KEYWORD_WHILE: {
19319 size_t opening_newline_index = token_newline_index(parser);
19320
19321 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19322 pm_do_loop_stack_push(parser, true);
19323
19324 parser_lex(parser);
19325 pm_token_t keyword = parser->previous;
19326 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
19327
19328 pm_do_loop_stack_pop(parser);
19329 context_pop(parser);
19330
19331 pm_token_t do_keyword;
19332 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19333 do_keyword = parser->previous;
19334 } else {
19335 do_keyword = not_provided(parser);
19336 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19337 }
19338
19339 pm_statements_node_t *statements = NULL;
19340 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19341 pm_accepts_block_stack_push(parser, true);
19342 statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
19343 pm_accepts_block_stack_pop(parser);
19344 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19345 }
19346
19347 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19348 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
19349
19350 return UP(pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0));
19351 }
19352 case PM_TOKEN_PERCENT_LOWER_I: {
19353 parser_lex(parser);
19354 pm_token_t opening = parser->previous;
19355 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19356 pm_node_t *current = NULL;
19357
19358 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19359 accept1(parser, PM_TOKEN_WORDS_SEP);
19360 if (match1(parser, PM_TOKEN_STRING_END)) break;
19361
19362 // Interpolation is not possible but nested heredocs can still lead to
19363 // consecutive (disjoint) string tokens when the final newline is escaped.
19364 while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19365 pm_token_t opening = not_provided(parser);
19366 pm_token_t closing = not_provided(parser);
19367
19368 // Record the string node, moving to interpolation if needed.
19369 if (current == NULL) {
19370 current = UP(pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
19371 parser_lex(parser);
19372 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19373 pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
19374 parser_lex(parser);
19375 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
19376 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19377 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
19378 pm_token_t bounds = not_provided(parser);
19379
19380 pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
19381 pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped));
19382 pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing));
19383 parser_lex(parser);
19384
19385 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19386 pm_interpolated_symbol_node_append(interpolated, first_string);
19387 pm_interpolated_symbol_node_append(interpolated, second_string);
19388
19389 xfree(current);
19390 current = UP(interpolated);
19391 } else {
19392 assert(false && "unreachable");
19393 }
19394 }
19395
19396 if (current) {
19397 pm_array_node_elements_append(array, current);
19398 current = NULL;
19399 } else {
19400 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
19401 }
19402 }
19403
19404 pm_token_t closing = parser->current;
19405 if (match1(parser, PM_TOKEN_EOF)) {
19406 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
19407 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19408 } else {
19409 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
19410 }
19411 pm_array_node_close_set(array, &closing);
19412
19413 return UP(array);
19414 }
19415 case PM_TOKEN_PERCENT_UPPER_I: {
19416 parser_lex(parser);
19417 pm_token_t opening = parser->previous;
19418 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19419
19420 // This is the current node that we are parsing that will be added to the
19421 // list of elements.
19422 pm_node_t *current = NULL;
19423
19424 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19425 switch (parser->current.type) {
19426 case PM_TOKEN_WORDS_SEP: {
19427 if (current == NULL) {
19428 // If we hit a separator before we have any content, then we don't
19429 // need to do anything.
19430 } else {
19431 // If we hit a separator after we've hit content, then we need to
19432 // append that content to the list and reset the current node.
19433 pm_array_node_elements_append(array, current);
19434 current = NULL;
19435 }
19436
19437 parser_lex(parser);
19438 break;
19439 }
19440 case PM_TOKEN_STRING_CONTENT: {
19441 pm_token_t opening = not_provided(parser);
19442 pm_token_t closing = not_provided(parser);
19443
19444 if (current == NULL) {
19445 // If we hit content and the current node is NULL, then this is
19446 // the first string content we've seen. In that case we're going
19447 // to create a new string node and set that to the current.
19448 current = UP(pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
19449 parser_lex(parser);
19450 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19451 // If we hit string content and the current node is an
19452 // interpolated string, then we need to append the string content
19453 // to the list of child nodes.
19454 pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
19455 parser_lex(parser);
19456
19457 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
19458 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19459 // If we hit string content and the current node is a symbol node,
19460 // then we need to convert the current node into an interpolated
19461 // string and add the string content to the list of child nodes.
19462 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
19463 pm_token_t bounds = not_provided(parser);
19464
19465 pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
19466 pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped));
19467 pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing));
19468 parser_lex(parser);
19469
19470 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19471 pm_interpolated_symbol_node_append(interpolated, first_string);
19472 pm_interpolated_symbol_node_append(interpolated, second_string);
19473
19474 xfree(current);
19475 current = UP(interpolated);
19476 } else {
19477 assert(false && "unreachable");
19478 }
19479
19480 break;
19481 }
19482 case PM_TOKEN_EMBVAR: {
19483 bool start_location_set = false;
19484 if (current == NULL) {
19485 // If we hit an embedded variable and the current node is NULL,
19486 // then this is the start of a new string. We'll set the current
19487 // node to a new interpolated string.
19488 pm_token_t opening = not_provided(parser);
19489 pm_token_t closing = not_provided(parser);
19490 current = UP(pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing));
19491 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19492 // If we hit an embedded variable and the current node is a string
19493 // node, then we'll convert the current into an interpolated
19494 // string and add the string node to the list of parts.
19495 pm_token_t opening = not_provided(parser);
19496 pm_token_t closing = not_provided(parser);
19497 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19498
19499 current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
19500 pm_interpolated_symbol_node_append(interpolated, current);
19501 interpolated->base.location.start = current->location.start;
19502 start_location_set = true;
19503 current = UP(interpolated);
19504 } else {
19505 // If we hit an embedded variable and the current node is an
19506 // interpolated string, then we'll just add the embedded variable.
19507 }
19508
19509 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19510 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
19511 if (!start_location_set) {
19512 current->location.start = part->location.start;
19513 }
19514 break;
19515 }
19516 case PM_TOKEN_EMBEXPR_BEGIN: {
19517 bool start_location_set = false;
19518 if (current == NULL) {
19519 // If we hit an embedded expression and the current node is NULL,
19520 // then this is the start of a new string. We'll set the current
19521 // node to a new interpolated string.
19522 pm_token_t opening = not_provided(parser);
19523 pm_token_t closing = not_provided(parser);
19524 current = UP(pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing));
19525 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19526 // If we hit an embedded expression and the current node is a
19527 // string node, then we'll convert the current into an
19528 // interpolated string and add the string node to the list of
19529 // parts.
19530 pm_token_t opening = not_provided(parser);
19531 pm_token_t closing = not_provided(parser);
19532 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19533
19534 current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
19535 pm_interpolated_symbol_node_append(interpolated, current);
19536 interpolated->base.location.start = current->location.start;
19537 start_location_set = true;
19538 current = UP(interpolated);
19539 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19540 // If we hit an embedded expression and the current node is an
19541 // interpolated string, then we'll just continue on.
19542 } else {
19543 assert(false && "unreachable");
19544 }
19545
19546 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19547 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
19548 if (!start_location_set) {
19549 current->location.start = part->location.start;
19550 }
19551 break;
19552 }
19553 default:
19554 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
19555 parser_lex(parser);
19556 break;
19557 }
19558 }
19559
19560 // If we have a current node, then we need to append it to the list.
19561 if (current) {
19562 pm_array_node_elements_append(array, current);
19563 }
19564
19565 pm_token_t closing = parser->current;
19566 if (match1(parser, PM_TOKEN_EOF)) {
19567 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
19568 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19569 } else {
19570 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
19571 }
19572 pm_array_node_close_set(array, &closing);
19573
19574 return UP(array);
19575 }
19576 case PM_TOKEN_PERCENT_LOWER_W: {
19577 parser_lex(parser);
19578 pm_token_t opening = parser->previous;
19579 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19580 pm_node_t *current = NULL;
19581
19582 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19583 accept1(parser, PM_TOKEN_WORDS_SEP);
19584 if (match1(parser, PM_TOKEN_STRING_END)) break;
19585
19586 // Interpolation is not possible but nested heredocs can still lead to
19587 // consecutive (disjoint) string tokens when the final newline is escaped.
19588 while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19589 pm_token_t opening = not_provided(parser);
19590 pm_token_t closing = not_provided(parser);
19591
19592 pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
19593
19594 // Record the string node, moving to interpolation if needed.
19595 if (current == NULL) {
19596 current = string;
19597 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
19598 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
19599 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19600 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
19601 pm_interpolated_string_node_append(interpolated, current);
19602 pm_interpolated_string_node_append(interpolated, string);
19603 current = UP(interpolated);
19604 } else {
19605 assert(false && "unreachable");
19606 }
19607 parser_lex(parser);
19608 }
19609
19610 if (current) {
19611 pm_array_node_elements_append(array, current);
19612 current = NULL;
19613 } else {
19614 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
19615 }
19616 }
19617
19618 pm_token_t closing = parser->current;
19619 if (match1(parser, PM_TOKEN_EOF)) {
19620 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
19621 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19622 } else {
19623 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
19624 }
19625
19626 pm_array_node_close_set(array, &closing);
19627 return UP(array);
19628 }
19629 case PM_TOKEN_PERCENT_UPPER_W: {
19630 parser_lex(parser);
19631 pm_token_t opening = parser->previous;
19632 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19633
19634 // This is the current node that we are parsing that will be added
19635 // to the list of elements.
19636 pm_node_t *current = NULL;
19637
19638 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19639 switch (parser->current.type) {
19640 case PM_TOKEN_WORDS_SEP: {
19641 // Reset the explicit encoding if we hit a separator
19642 // since each element can have its own encoding.
19643 parser->explicit_encoding = NULL;
19644
19645 if (current == NULL) {
19646 // If we hit a separator before we have any content,
19647 // then we don't need to do anything.
19648 } else {
19649 // If we hit a separator after we've hit content,
19650 // then we need to append that content to the list
19651 // and reset the current node.
19652 pm_array_node_elements_append(array, current);
19653 current = NULL;
19654 }
19655
19656 parser_lex(parser);
19657 break;
19658 }
19659 case PM_TOKEN_STRING_CONTENT: {
19660 pm_token_t opening = not_provided(parser);
19661 pm_token_t closing = not_provided(parser);
19662
19663 pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
19664 pm_node_flag_set(string, parse_unescaped_encoding(parser));
19665 parser_lex(parser);
19666
19667 if (current == NULL) {
19668 // If we hit content and the current node is NULL,
19669 // then this is the first string content we've seen.
19670 // In that case we're going to create a new string
19671 // node and set that to the current.
19672 current = string;
19673 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
19674 // If we hit string content and the current node is
19675 // an interpolated string, then we need to append
19676 // the string content to the list of child nodes.
19677 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
19678 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19679 // If we hit string content and the current node is
19680 // a string node, then we need to convert the
19681 // current node into an interpolated string and add
19682 // the string content to the list of child nodes.
19683 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
19684 pm_interpolated_string_node_append(interpolated, current);
19685 pm_interpolated_string_node_append(interpolated, string);
19686 current = UP(interpolated);
19687 } else {
19688 assert(false && "unreachable");
19689 }
19690
19691 break;
19692 }
19693 case PM_TOKEN_EMBVAR: {
19694 if (current == NULL) {
19695 // If we hit an embedded variable and the current
19696 // node is NULL, then this is the start of a new
19697 // string. We'll set the current node to a new
19698 // interpolated string.
19699 pm_token_t opening = not_provided(parser);
19700 pm_token_t closing = not_provided(parser);
19701 current = UP(pm_interpolated_string_node_create(parser, &opening, NULL, &closing));
19702 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19703 // If we hit an embedded variable and the current
19704 // node is a string node, then we'll convert the
19705 // current into an interpolated string and add the
19706 // string node to the list of parts.
19707 pm_token_t opening = not_provided(parser);
19708 pm_token_t closing = not_provided(parser);
19709 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
19710 pm_interpolated_string_node_append(interpolated, current);
19711 current = UP(interpolated);
19712 } else {
19713 // If we hit an embedded variable and the current
19714 // node is an interpolated string, then we'll just
19715 // add the embedded variable.
19716 }
19717
19718 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19719 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
19720 break;
19721 }
19722 case PM_TOKEN_EMBEXPR_BEGIN: {
19723 if (current == NULL) {
19724 // If we hit an embedded expression and the current
19725 // node is NULL, then this is the start of a new
19726 // string. We'll set the current node to a new
19727 // interpolated string.
19728 pm_token_t opening = not_provided(parser);
19729 pm_token_t closing = not_provided(parser);
19730 current = UP(pm_interpolated_string_node_create(parser, &opening, NULL, &closing));
19731 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19732 // If we hit an embedded expression and the current
19733 // node is a string node, then we'll convert the
19734 // current into an interpolated string and add the
19735 // string node to the list of parts.
19736 pm_token_t opening = not_provided(parser);
19737 pm_token_t closing = not_provided(parser);
19738 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
19739 pm_interpolated_string_node_append(interpolated, current);
19740 current = UP(interpolated);
19741 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
19742 // If we hit an embedded expression and the current
19743 // node is an interpolated string, then we'll just
19744 // continue on.
19745 } else {
19746 assert(false && "unreachable");
19747 }
19748
19749 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19750 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
19751 break;
19752 }
19753 default:
19754 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
19755 parser_lex(parser);
19756 break;
19757 }
19758 }
19759
19760 // If we have a current node, then we need to append it to the list.
19761 if (current) {
19762 pm_array_node_elements_append(array, current);
19763 }
19764
19765 pm_token_t closing = parser->current;
19766 if (match1(parser, PM_TOKEN_EOF)) {
19767 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
19768 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19769 } else {
19770 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
19771 }
19772
19773 pm_array_node_close_set(array, &closing);
19774 return UP(array);
19775 }
19776 case PM_TOKEN_REGEXP_BEGIN: {
19777 pm_token_t opening = parser->current;
19778 parser_lex(parser);
19779
19780 if (match1(parser, PM_TOKEN_REGEXP_END)) {
19781 // If we get here, then we have an end immediately after a start. In
19782 // that case we'll create an empty content token and return an
19783 // uninterpolated regular expression.
19784 pm_token_t content = (pm_token_t) {
19785 .type = PM_TOKEN_STRING_CONTENT,
19786 .start = parser->previous.end,
19787 .end = parser->previous.end
19788 };
19789
19790 parser_lex(parser);
19791
19792 pm_node_t *node = UP(pm_regular_expression_node_create(parser, &opening, &content, &parser->previous));
19793 pm_node_flag_set(node, PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING);
19794
19795 return node;
19796 }
19797
19799
19800 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19801 // In this case we've hit string content so we know the regular
19802 // expression at least has something in it. We'll need to check if the
19803 // following token is the end (in which case we can return a plain
19804 // regular expression) or if it's not then it has interpolation.
19805 pm_string_t unescaped = parser->current_string;
19806 pm_token_t content = parser->current;
19807 bool ascii_only = parser->current_regular_expression_ascii_only;
19808 parser_lex(parser);
19809
19810 // If we hit an end, then we can create a regular expression
19811 // node without interpolation, which can be represented more
19812 // succinctly and more easily compiled.
19813 if (accept1(parser, PM_TOKEN_REGEXP_END)) {
19814 pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
19815
19816 // If we're not immediately followed by a =~, then we want
19817 // to parse all of the errors at this point. If it is
19818 // followed by a =~, then it will get parsed higher up while
19819 // parsing the named captures as well.
19820 if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
19821 parse_regular_expression_errors(parser, node);
19822 }
19823
19824 pm_node_flag_set(UP(node), parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, FL(node)));
19825 return UP(node);
19826 }
19827
19828 // If we get here, then we have interpolation so we'll need to create
19829 // a regular expression node with interpolation.
19830 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
19831
19832 pm_token_t opening = not_provided(parser);
19833 pm_token_t closing = not_provided(parser);
19834 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped));
19835
19836 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
19837 // This is extremely strange, but the first string part of a
19838 // regular expression will always be tagged as binary if we
19839 // are in a US-ASCII file, no matter its contents.
19840 pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
19841 }
19842
19843 pm_interpolated_regular_expression_node_append(interpolated, part);
19844 } else {
19845 // If the first part of the body of the regular expression is not a
19846 // string content, then we have interpolation and we need to create an
19847 // interpolated regular expression node.
19848 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
19849 }
19850
19851 // Now that we're here and we have interpolation, we'll parse all of the
19852 // parts into the list.
19853 pm_node_t *part;
19854 while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
19855 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
19856 pm_interpolated_regular_expression_node_append(interpolated, part);
19857 }
19858 }
19859
19860 pm_token_t closing = parser->current;
19861 if (match1(parser, PM_TOKEN_EOF)) {
19862 pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
19863 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19864 } else {
19865 expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
19866 }
19867
19868 pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
19869 return UP(interpolated);
19870 }
19871 case PM_TOKEN_BACKTICK:
19872 case PM_TOKEN_PERCENT_LOWER_X: {
19873 parser_lex(parser);
19874 pm_token_t opening = parser->previous;
19875
19876 // When we get here, we don't know if this string is going to have
19877 // interpolation or not, even though it is allowed. Still, we want to be
19878 // able to return a string node without interpolation if we can since
19879 // it'll be faster.
19880 if (match1(parser, PM_TOKEN_STRING_END)) {
19881 // If we get here, then we have an end immediately after a start. In
19882 // that case we'll create an empty content token and return an
19883 // uninterpolated string.
19884 pm_token_t content = (pm_token_t) {
19885 .type = PM_TOKEN_STRING_CONTENT,
19886 .start = parser->previous.end,
19887 .end = parser->previous.end
19888 };
19889
19890 parser_lex(parser);
19891 return UP(pm_xstring_node_create(parser, &opening, &content, &parser->previous));
19892 }
19893
19895
19896 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19897 // In this case we've hit string content so we know the string
19898 // at least has something in it. We'll need to check if the
19899 // following token is the end (in which case we can return a
19900 // plain string) or if it's not then it has interpolation.
19901 pm_string_t unescaped = parser->current_string;
19902 pm_token_t content = parser->current;
19903 parser_lex(parser);
19904
19905 if (match1(parser, PM_TOKEN_STRING_END)) {
19906 pm_node_t *node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
19907 pm_node_flag_set(node, parse_unescaped_encoding(parser));
19908 parser_lex(parser);
19909 return node;
19910 }
19911
19912 // If we get here, then we have interpolation so we'll need to
19913 // create a string node with interpolation.
19914 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
19915
19916 pm_token_t opening = not_provided(parser);
19917 pm_token_t closing = not_provided(parser);
19918
19919 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped));
19920 pm_node_flag_set(part, parse_unescaped_encoding(parser));
19921
19922 pm_interpolated_xstring_node_append(node, part);
19923 } else {
19924 // If the first part of the body of the string is not a string
19925 // content, then we have interpolation and we need to create an
19926 // interpolated string node.
19927 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
19928 }
19929
19930 pm_node_t *part;
19931 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19932 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
19933 pm_interpolated_xstring_node_append(node, part);
19934 }
19935 }
19936
19937 pm_token_t closing = parser->current;
19938 if (match1(parser, PM_TOKEN_EOF)) {
19939 pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
19940 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19941 } else {
19942 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
19943 }
19944 pm_interpolated_xstring_node_closing_set(node, &closing);
19945
19946 return UP(node);
19947 }
19948 case PM_TOKEN_USTAR: {
19949 parser_lex(parser);
19950
19951 // * operators at the beginning of expressions are only valid in the
19952 // context of a multiple assignment. We enforce that here. We'll
19953 // still lex past it though and create a missing node place.
19954 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19955 pm_parser_err_prefix(parser, diag_id);
19956 return UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
19957 }
19958
19959 pm_token_t operator = parser->previous;
19960 pm_node_t *name = NULL;
19961
19962 if (token_begins_expression_p(parser->current.type)) {
19963 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19964 }
19965
19966 pm_node_t *splat = UP(pm_splat_node_create(parser, &operator, name));
19967
19968 if (match1(parser, PM_TOKEN_COMMA)) {
19969 return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19970 } else {
19971 return parse_target_validate(parser, splat, true);
19972 }
19973 }
19974 case PM_TOKEN_BANG: {
19975 if (binding_power > PM_BINDING_POWER_UNARY) {
19976 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
19977 }
19978
19979 parser_lex(parser);
19980
19981 pm_token_t operator = parser->previous;
19982 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
19983 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
19984
19985 pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
19986 return UP(node);
19987 }
19988 case PM_TOKEN_TILDE: {
19989 if (binding_power > PM_BINDING_POWER_UNARY) {
19990 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
19991 }
19992 parser_lex(parser);
19993
19994 pm_token_t operator = parser->previous;
19995 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
19996 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
19997
19998 return UP(node);
19999 }
20000 case PM_TOKEN_UMINUS: {
20001 if (binding_power > PM_BINDING_POWER_UNARY) {
20002 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20003 }
20004 parser_lex(parser);
20005
20006 pm_token_t operator = parser->previous;
20007 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20008 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
20009
20010 return UP(node);
20011 }
20012 case PM_TOKEN_UMINUS_NUM: {
20013 parser_lex(parser);
20014
20015 pm_token_t operator = parser->previous;
20016 pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20017
20018 if (accept1(parser, PM_TOKEN_STAR_STAR)) {
20019 pm_token_t exponent_operator = parser->previous;
20020 pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
20021 node = UP(pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0));
20022 node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
20023 } else {
20024 switch (PM_NODE_TYPE(node)) {
20025 case PM_INTEGER_NODE:
20026 case PM_FLOAT_NODE:
20027 case PM_RATIONAL_NODE:
20028 case PM_IMAGINARY_NODE:
20029 parse_negative_numeric(node);
20030 break;
20031 default:
20032 node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
20033 break;
20034 }
20035 }
20036
20037 return node;
20038 }
20039 case PM_TOKEN_MINUS_GREATER: {
20040 int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting;
20042
20043 size_t opening_newline_index = token_newline_index(parser);
20044 pm_accepts_block_stack_push(parser, true);
20045 parser_lex(parser);
20046
20047 pm_token_t operator = parser->previous;
20048 pm_parser_scope_push(parser, false);
20049
20050 pm_block_parameters_node_t *block_parameters;
20051
20052 switch (parser->current.type) {
20053 case PM_TOKEN_PARENTHESIS_LEFT: {
20054 pm_token_t opening = parser->current;
20055 parser_lex(parser);
20056
20057 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
20058 block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
20059 } else {
20060 block_parameters = parse_block_parameters(parser, false, &opening, true, true, (uint16_t) (depth + 1));
20061 }
20062
20063 accept1(parser, PM_TOKEN_NEWLINE);
20064 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
20065
20066 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
20067 break;
20068 }
20069 case PM_CASE_PARAMETER: {
20070 pm_accepts_block_stack_push(parser, false);
20071 pm_token_t opening = not_provided(parser);
20072 block_parameters = parse_block_parameters(parser, false, &opening, true, false, (uint16_t) (depth + 1));
20073 pm_accepts_block_stack_pop(parser);
20074 break;
20075 }
20076 default: {
20077 block_parameters = NULL;
20078 break;
20079 }
20080 }
20081
20082 pm_token_t opening;
20083 pm_node_t *body = NULL;
20084 parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
20085
20086 if (accept1(parser, PM_TOKEN_LAMBDA_BEGIN)) {
20087 opening = parser->previous;
20088
20089 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
20090 body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1)));
20091 }
20092
20093 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20094 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE);
20095 } else {
20096 expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
20097 opening = parser->previous;
20098
20099 if (!match3(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20100 pm_accepts_block_stack_push(parser, true);
20101 body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1)));
20102 pm_accepts_block_stack_pop(parser);
20103 }
20104
20105 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20106 assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
20107 body = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1)));
20108 } else {
20109 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20110 }
20111
20112 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END);
20113 }
20114
20115 pm_constant_id_list_t locals;
20116 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
20117 pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &operator, &parser->previous);
20118
20119 pm_parser_scope_pop(parser);
20120 pm_accepts_block_stack_pop(parser);
20121
20122 return UP(pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body));
20123 }
20124 case PM_TOKEN_UPLUS: {
20125 if (binding_power > PM_BINDING_POWER_UNARY) {
20126 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20127 }
20128 parser_lex(parser);
20129
20130 pm_token_t operator = parser->previous;
20131 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20132 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
20133
20134 return UP(node);
20135 }
20136 case PM_TOKEN_STRING_BEGIN:
20137 return parse_strings(parser, NULL, accepts_label, (uint16_t) (depth + 1));
20138 case PM_TOKEN_SYMBOL_BEGIN: {
20139 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
20140 parser_lex(parser);
20141
20142 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END, (uint16_t) (depth + 1));
20143 }
20144 default: {
20145 pm_context_t recoverable = context_recoverable(parser, &parser->current);
20146
20147 if (recoverable != PM_CONTEXT_NONE) {
20148 parser->recovering = true;
20149
20150 // If the given error is not the generic one, then we'll add it
20151 // here because it will provide more context in addition to the
20152 // recoverable error that we will also add.
20153 if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
20154 pm_parser_err_prefix(parser, diag_id);
20155 }
20156
20157 // If we get here, then we are assuming this token is closing a
20158 // parent context, so we'll indicate that to the user so that
20159 // they know how we behaved.
20160 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
20161 } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
20162 // We're going to make a special case here, because "cannot
20163 // parse expression" is pretty generic, and we know here that we
20164 // have an unexpected token.
20165 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
20166 } else {
20167 pm_parser_err_prefix(parser, diag_id);
20168 }
20169
20170 return UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
20171 }
20172 }
20173}
20174
20184static pm_node_t *
20185parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20186 pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, false, diag_id, (uint16_t) (depth + 1));
20187
20188 // Contradicting binding powers, the right-hand-side value of the assignment
20189 // allows the `rescue` modifier.
20190 if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20191 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20192
20193 pm_token_t rescue = parser->current;
20194 parser_lex(parser);
20195
20196 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20197 context_pop(parser);
20198
20199 return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
20200 }
20201
20202 return value;
20203}
20204
20209static void
20210parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
20211 switch (PM_NODE_TYPE(node)) {
20212 case PM_BEGIN_NODE: {
20213 const pm_begin_node_t *cast = (const pm_begin_node_t *) node;
20214 if (cast->statements != NULL) parse_assignment_value_local(parser, (const pm_node_t *) cast->statements);
20215 break;
20216 }
20217 case PM_LOCAL_VARIABLE_WRITE_NODE: {
20219 pm_locals_read(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
20220 break;
20221 }
20222 case PM_PARENTHESES_NODE: {
20223 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
20224 if (cast->body != NULL) parse_assignment_value_local(parser, cast->body);
20225 break;
20226 }
20227 case PM_STATEMENTS_NODE: {
20228 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
20229 const pm_node_t *statement;
20230
20231 PM_NODE_LIST_FOREACH(&cast->body, index, statement) {
20232 parse_assignment_value_local(parser, statement);
20233 }
20234 break;
20235 }
20236 default:
20237 break;
20238 }
20239}
20240
20253static pm_node_t *
20254parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20255 bool permitted = true;
20256 if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
20257
20258 pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MODIFIER, diag_id, (uint16_t) (depth + 1));
20259 if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
20260
20261 parse_assignment_value_local(parser, value);
20262 bool single_value = true;
20263
20264 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
20265 single_value = false;
20266
20267 pm_token_t opening = not_provided(parser);
20268 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20269
20270 pm_array_node_elements_append(array, value);
20271 value = UP(array);
20272
20273 while (accept1(parser, PM_TOKEN_COMMA)) {
20274 pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
20275
20276 pm_array_node_elements_append(array, element);
20277 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
20278
20279 parse_assignment_value_local(parser, element);
20280 }
20281 }
20282
20283 // Contradicting binding powers, the right-hand-side value of the assignment
20284 // allows the `rescue` modifier.
20285 if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20286 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20287
20288 pm_token_t rescue = parser->current;
20289 parser_lex(parser);
20290
20291 bool accepts_command_call_inner = false;
20292
20293 // RHS can accept command call iff the value is a call with arguments
20294 // but without parenthesis.
20295 if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
20296 pm_call_node_t *call_node = (pm_call_node_t *) value;
20297 if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) {
20298 accepts_command_call_inner = true;
20299 }
20300 }
20301
20302 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20303 context_pop(parser);
20304
20305 return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
20306 }
20307
20308 return value;
20309}
20310
20318static void
20319parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
20320 if (call_node->arguments != NULL) {
20321 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
20322 pm_node_unreference(parser, UP(call_node->arguments));
20323 pm_node_destroy(parser, UP(call_node->arguments));
20324 call_node->arguments = NULL;
20325 }
20326
20327 if (call_node->block != NULL) {
20328 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
20329 pm_node_unreference(parser, UP(call_node->block));
20330 pm_node_destroy(parser, UP(call_node->block));
20331 call_node->block = NULL;
20332 }
20333}
20334
20359
20360static inline const uint8_t *
20361pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20362 cursor++;
20363
20364 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20365 uint8_t value = escape_hexadecimal_digit(*cursor);
20366 cursor++;
20367
20368 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20369 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(*cursor));
20370 cursor++;
20371 }
20372
20373 pm_buffer_append_byte(unescaped, value);
20374 } else {
20375 pm_buffer_append_string(unescaped, "\\x", 2);
20376 }
20377
20378 return cursor;
20379}
20380
20381static inline const uint8_t *
20382pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20383 uint8_t value = (uint8_t) (*cursor - '0');
20384 cursor++;
20385
20386 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20387 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20388 cursor++;
20389
20390 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20391 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20392 cursor++;
20393 }
20394 }
20395
20396 pm_buffer_append_byte(unescaped, value);
20397 return cursor;
20398}
20399
20400static inline const uint8_t *
20401pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end, const pm_location_t *error_location) {
20402 const uint8_t *start = cursor - 1;
20403 cursor++;
20404
20405 if (cursor >= end) {
20406 pm_buffer_append_string(unescaped, "\\u", 2);
20407 return cursor;
20408 }
20409
20410 if (*cursor != '{') {
20411 size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
20412 uint32_t value = escape_unicode(parser, cursor, length, error_location);
20413
20414 if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
20415 pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start));
20416 }
20417
20418 return cursor + length;
20419 }
20420
20421 cursor++;
20422 for (;;) {
20423 while (cursor < end && *cursor == ' ') cursor++;
20424
20425 if (cursor >= end) break;
20426 if (*cursor == '}') {
20427 cursor++;
20428 break;
20429 }
20430
20431 size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor);
20432 if (length == 0) {
20433 break;
20434 }
20435 uint32_t value = escape_unicode(parser, cursor, length, error_location);
20436
20437 (void) pm_buffer_append_unicode_codepoint(unescaped, value);
20438 cursor += length;
20439 }
20440
20441 return cursor;
20442}
20443
20444static void
20445pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor, const pm_location_t *error_location) {
20446 const uint8_t *end = source + length;
20447 pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source));
20448
20449 for (;;) {
20450 if (++cursor >= end) {
20451 pm_buffer_append_byte(unescaped, '\\');
20452 return;
20453 }
20454
20455 switch (*cursor) {
20456 case 'x':
20457 cursor = pm_named_capture_escape_hex(unescaped, cursor, end);
20458 break;
20459 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
20460 cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
20461 break;
20462 case 'u':
20463 cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end, error_location);
20464 break;
20465 default:
20466 pm_buffer_append_byte(unescaped, '\\');
20467 break;
20468 }
20469
20470 const uint8_t *next_cursor = pm_memchr(cursor, '\\', (size_t) (end - cursor), parser->encoding_changed, parser->encoding);
20471 if (next_cursor == NULL) break;
20472
20473 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (next_cursor - cursor));
20474 cursor = next_cursor;
20475 }
20476
20477 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (end - cursor));
20478}
20479
20484static void
20485parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
20487
20488 pm_parser_t *parser = callback_data->parser;
20489 pm_call_node_t *call = callback_data->call;
20490 pm_constant_id_list_t *names = &callback_data->names;
20491
20492 const uint8_t *source = pm_string_source(capture);
20493 size_t length = pm_string_length(capture);
20494 pm_buffer_t unescaped = { 0 };
20495
20496 // First, we need to handle escapes within the name of the capture group.
20497 // This is because regular expressions have three different representations
20498 // in prism. The first is the plain source code. The second is the
20499 // representation that will be sent to the regular expression engine, which
20500 // is the value of the "unescaped" field. This is poorly named, because it
20501 // actually still contains escapes, just a subset of them that the regular
20502 // expression engine knows how to handle. The third representation is fully
20503 // unescaped, which is what we need.
20504 const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding);
20505 if (PRISM_UNLIKELY(cursor != NULL)) {
20506 pm_named_capture_escape(parser, &unescaped, source, length, cursor, callback_data->shared ? NULL : &call->receiver->location);
20507 source = (const uint8_t *) pm_buffer_value(&unescaped);
20508 length = pm_buffer_length(&unescaped);
20509 }
20510
20511 pm_location_t location;
20512 pm_constant_id_t name;
20513
20514 // If the name of the capture group isn't a valid identifier, we do
20515 // not add it to the local table.
20516 if (!pm_slice_is_valid_local(parser, source, source + length)) {
20517 pm_buffer_free(&unescaped);
20518 return;
20519 }
20520
20521 if (callback_data->shared) {
20522 // If the unescaped string is a slice of the source, then we can
20523 // copy the names directly. The pointers will line up.
20524 location = (pm_location_t) { .start = source, .end = source + length };
20525 name = pm_parser_constant_id_location(parser, location.start, location.end);
20526 } else {
20527 // Otherwise, the name is a slice of the malloc-ed owned string,
20528 // in which case we need to copy it out into a new string.
20529 location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
20530
20531 void *memory = xmalloc(length);
20532 if (memory == NULL) abort();
20533
20534 memcpy(memory, source, length);
20535 name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
20536 }
20537
20538 // Add this name to the list of constants if it is valid, not duplicated,
20539 // and not a keyword.
20540 if (name != 0 && !pm_constant_id_list_includes(names, name)) {
20541 pm_constant_id_list_append(names, name);
20542
20543 int depth;
20544 if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
20545 // If the local is not already a local but it is a keyword, then we
20546 // do not want to add a capture for this.
20547 if (pm_local_is_keyword((const char *) source, length)) {
20548 pm_buffer_free(&unescaped);
20549 return;
20550 }
20551
20552 // If the identifier is not already a local, then we will add it to
20553 // the local table.
20554 pm_parser_local_add(parser, name, location.start, location.end, 0);
20555 }
20556
20557 // Here we lazily create the MatchWriteNode since we know we're
20558 // about to add a target.
20559 if (callback_data->match == NULL) {
20560 callback_data->match = pm_match_write_node_create(parser, call);
20561 }
20562
20563 // Next, create the local variable target and add it to the list of
20564 // targets for the match.
20565 pm_node_t *target = UP(pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth));
20566 pm_node_list_append(&callback_data->match->targets, target);
20567 }
20568
20569 pm_buffer_free(&unescaped);
20570}
20571
20576static pm_node_t *
20577parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
20579 .parser = parser,
20580 .call = call,
20581 .names = { 0 },
20582 .shared = content->type == PM_STRING_SHARED
20583 };
20584
20586 .parser = parser,
20587 .start = call->receiver->location.start,
20588 .end = call->receiver->location.end,
20589 .shared = content->type == PM_STRING_SHARED
20590 };
20591
20592 pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), extended_mode, parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
20593 pm_constant_id_list_free(&callback_data.names);
20594
20595 if (callback_data.match != NULL) {
20596 return UP(callback_data.match);
20597 } else {
20598 return UP(call);
20599 }
20600}
20601
20602static inline pm_node_t *
20603parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, uint16_t depth) {
20604 pm_token_t token = parser->current;
20605
20606 switch (token.type) {
20607 case PM_TOKEN_EQUAL: {
20608 switch (PM_NODE_TYPE(node)) {
20609 case PM_CALL_NODE: {
20610 // If we have no arguments to the call node and we need this
20611 // to be a target then this is either a method call or a
20612 // local variable write. This _must_ happen before the value
20613 // is parsed because it could be referenced in the value.
20614 pm_call_node_t *call_node = (pm_call_node_t *) node;
20615 if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20616 pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
20617 }
20618 }
20620 case PM_CASE_WRITABLE: {
20621 // When we have `it = value`, we need to add `it` as a local
20622 // variable before parsing the value, in case the value
20623 // references the variable.
20624 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
20625 pm_parser_local_add_location(parser, node->location.start, node->location.end, 0);
20626 }
20627
20628 parser_lex(parser);
20629 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20630
20631 if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
20632 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
20633 }
20634
20635 return parse_write(parser, node, &token, value);
20636 }
20637 case PM_SPLAT_NODE: {
20638 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
20639 pm_multi_target_node_targets_append(parser, multi_target, node);
20640
20641 parser_lex(parser);
20642 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20643 return parse_write(parser, UP(multi_target), &token, value);
20644 }
20645 case PM_SOURCE_ENCODING_NODE:
20646 case PM_FALSE_NODE:
20647 case PM_SOURCE_FILE_NODE:
20648 case PM_SOURCE_LINE_NODE:
20649 case PM_NIL_NODE:
20650 case PM_SELF_NODE:
20651 case PM_TRUE_NODE: {
20652 // In these special cases, we have specific error messages
20653 // and we will replace them with local variable writes.
20654 parser_lex(parser);
20655 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20656 return parse_unwriteable_write(parser, node, &token, value);
20657 }
20658 default:
20659 // In this case we have an = sign, but we don't know what
20660 // it's for. We need to treat it as an error. We'll mark it
20661 // as an error and skip past it.
20662 parser_lex(parser);
20663 pm_parser_err_token(parser, &token, PM_ERR_EXPRESSION_NOT_WRITABLE);
20664 return node;
20665 }
20666 }
20667 case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL: {
20668 switch (PM_NODE_TYPE(node)) {
20669 case PM_BACK_REFERENCE_READ_NODE:
20670 case PM_NUMBERED_REFERENCE_READ_NODE:
20671 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20673 case PM_GLOBAL_VARIABLE_READ_NODE: {
20674 parser_lex(parser);
20675
20676 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20677 pm_node_t *result = UP(pm_global_variable_and_write_node_create(parser, node, &token, value));
20678
20679 pm_node_destroy(parser, node);
20680 return result;
20681 }
20682 case PM_CLASS_VARIABLE_READ_NODE: {
20683 parser_lex(parser);
20684
20685 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20686 pm_node_t *result = UP(pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
20687
20688 pm_node_destroy(parser, node);
20689 return result;
20690 }
20691 case PM_CONSTANT_PATH_NODE: {
20692 parser_lex(parser);
20693
20694 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20695 pm_node_t *write = UP(pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
20696
20697 return parse_shareable_constant_write(parser, write);
20698 }
20699 case PM_CONSTANT_READ_NODE: {
20700 parser_lex(parser);
20701
20702 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20703 pm_node_t *write = UP(pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
20704
20705 pm_node_destroy(parser, node);
20706 return parse_shareable_constant_write(parser, write);
20707 }
20708 case PM_INSTANCE_VARIABLE_READ_NODE: {
20709 parser_lex(parser);
20710
20711 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20712 pm_node_t *result = UP(pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
20713
20714 pm_node_destroy(parser, node);
20715 return result;
20716 }
20717 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
20718 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
20719 parser_lex(parser);
20720
20721 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20722 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0));
20723
20724 pm_node_unreference(parser, node);
20725 pm_node_destroy(parser, node);
20726 return result;
20727 }
20728 case PM_LOCAL_VARIABLE_READ_NODE: {
20729 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
20730 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
20731 pm_node_unreference(parser, node);
20732 }
20733
20735 parser_lex(parser);
20736
20737 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20738 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth));
20739
20740 pm_node_destroy(parser, node);
20741 return result;
20742 }
20743 case PM_CALL_NODE: {
20744 pm_call_node_t *cast = (pm_call_node_t *) node;
20745
20746 // If we have a vcall (a method with no arguments and no
20747 // receiver that could have been a local variable) then we
20748 // will transform it into a local variable write.
20749 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20750 pm_location_t *message_loc = &cast->message_loc;
20751 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
20752
20753 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
20754 parser_lex(parser);
20755
20756 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20757 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
20758
20759 pm_node_destroy(parser, UP(cast));
20760 return result;
20761 }
20762
20763 // Move past the token here so that we have already added
20764 // the local variable by this point.
20765 parser_lex(parser);
20766
20767 // If there is no call operator and the message is "[]" then
20768 // this is an aref expression, and we can transform it into
20769 // an aset expression.
20770 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
20771 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20772 return UP(pm_index_and_write_node_create(parser, cast, &token, value));
20773 }
20774
20775 // If this node cannot be writable, then we have an error.
20776 if (pm_call_node_writable_p(parser, cast)) {
20777 parse_write_name(parser, &cast->name);
20778 } else {
20779 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
20780 }
20781
20782 parse_call_operator_write(parser, cast, &token);
20783 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20784 return UP(pm_call_and_write_node_create(parser, cast, &token, value));
20785 }
20786 case PM_MULTI_WRITE_NODE: {
20787 parser_lex(parser);
20788 pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
20789 return node;
20790 }
20791 default:
20792 parser_lex(parser);
20793
20794 // In this case we have an &&= sign, but we don't know what it's for.
20795 // We need to treat it as an error. For now, we'll mark it as an error
20796 // and just skip right past it.
20797 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
20798 return node;
20799 }
20800 }
20801 case PM_TOKEN_PIPE_PIPE_EQUAL: {
20802 switch (PM_NODE_TYPE(node)) {
20803 case PM_BACK_REFERENCE_READ_NODE:
20804 case PM_NUMBERED_REFERENCE_READ_NODE:
20805 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20807 case PM_GLOBAL_VARIABLE_READ_NODE: {
20808 parser_lex(parser);
20809
20810 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20811 pm_node_t *result = UP(pm_global_variable_or_write_node_create(parser, node, &token, value));
20812
20813 pm_node_destroy(parser, node);
20814 return result;
20815 }
20816 case PM_CLASS_VARIABLE_READ_NODE: {
20817 parser_lex(parser);
20818
20819 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20820 pm_node_t *result = UP(pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
20821
20822 pm_node_destroy(parser, node);
20823 return result;
20824 }
20825 case PM_CONSTANT_PATH_NODE: {
20826 parser_lex(parser);
20827
20828 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20829 pm_node_t *write = UP(pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
20830
20831 return parse_shareable_constant_write(parser, write);
20832 }
20833 case PM_CONSTANT_READ_NODE: {
20834 parser_lex(parser);
20835
20836 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20837 pm_node_t *write = UP(pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
20838
20839 pm_node_destroy(parser, node);
20840 return parse_shareable_constant_write(parser, write);
20841 }
20842 case PM_INSTANCE_VARIABLE_READ_NODE: {
20843 parser_lex(parser);
20844
20845 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20846 pm_node_t *result = UP(pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
20847
20848 pm_node_destroy(parser, node);
20849 return result;
20850 }
20851 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
20852 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
20853 parser_lex(parser);
20854
20855 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20856 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0));
20857
20858 pm_node_unreference(parser, node);
20859 pm_node_destroy(parser, node);
20860 return result;
20861 }
20862 case PM_LOCAL_VARIABLE_READ_NODE: {
20863 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
20864 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
20865 pm_node_unreference(parser, node);
20866 }
20867
20869 parser_lex(parser);
20870
20871 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20872 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth));
20873
20874 pm_node_destroy(parser, node);
20875 return result;
20876 }
20877 case PM_CALL_NODE: {
20878 pm_call_node_t *cast = (pm_call_node_t *) node;
20879
20880 // If we have a vcall (a method with no arguments and no
20881 // receiver that could have been a local variable) then we
20882 // will transform it into a local variable write.
20883 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20884 pm_location_t *message_loc = &cast->message_loc;
20885 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
20886
20887 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
20888 parser_lex(parser);
20889
20890 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20891 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
20892
20893 pm_node_destroy(parser, UP(cast));
20894 return result;
20895 }
20896
20897 // Move past the token here so that we have already added
20898 // the local variable by this point.
20899 parser_lex(parser);
20900
20901 // If there is no call operator and the message is "[]" then
20902 // this is an aref expression, and we can transform it into
20903 // an aset expression.
20904 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
20905 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20906 return UP(pm_index_or_write_node_create(parser, cast, &token, value));
20907 }
20908
20909 // If this node cannot be writable, then we have an error.
20910 if (pm_call_node_writable_p(parser, cast)) {
20911 parse_write_name(parser, &cast->name);
20912 } else {
20913 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
20914 }
20915
20916 parse_call_operator_write(parser, cast, &token);
20917 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20918 return UP(pm_call_or_write_node_create(parser, cast, &token, value));
20919 }
20920 case PM_MULTI_WRITE_NODE: {
20921 parser_lex(parser);
20922 pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
20923 return node;
20924 }
20925 default:
20926 parser_lex(parser);
20927
20928 // In this case we have an ||= sign, but we don't know what it's for.
20929 // We need to treat it as an error. For now, we'll mark it as an error
20930 // and just skip right past it.
20931 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
20932 return node;
20933 }
20934 }
20935 case PM_TOKEN_AMPERSAND_EQUAL:
20936 case PM_TOKEN_CARET_EQUAL:
20937 case PM_TOKEN_GREATER_GREATER_EQUAL:
20938 case PM_TOKEN_LESS_LESS_EQUAL:
20939 case PM_TOKEN_MINUS_EQUAL:
20940 case PM_TOKEN_PERCENT_EQUAL:
20941 case PM_TOKEN_PIPE_EQUAL:
20942 case PM_TOKEN_PLUS_EQUAL:
20943 case PM_TOKEN_SLASH_EQUAL:
20944 case PM_TOKEN_STAR_EQUAL:
20945 case PM_TOKEN_STAR_STAR_EQUAL: {
20946 switch (PM_NODE_TYPE(node)) {
20947 case PM_BACK_REFERENCE_READ_NODE:
20948 case PM_NUMBERED_REFERENCE_READ_NODE:
20949 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20951 case PM_GLOBAL_VARIABLE_READ_NODE: {
20952 parser_lex(parser);
20953
20954 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20955 pm_node_t *result = UP(pm_global_variable_operator_write_node_create(parser, node, &token, value));
20956
20957 pm_node_destroy(parser, node);
20958 return result;
20959 }
20960 case PM_CLASS_VARIABLE_READ_NODE: {
20961 parser_lex(parser);
20962
20963 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20964 pm_node_t *result = UP(pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
20965
20966 pm_node_destroy(parser, node);
20967 return result;
20968 }
20969 case PM_CONSTANT_PATH_NODE: {
20970 parser_lex(parser);
20971
20972 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20973 pm_node_t *write = UP(pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
20974
20975 return parse_shareable_constant_write(parser, write);
20976 }
20977 case PM_CONSTANT_READ_NODE: {
20978 parser_lex(parser);
20979
20980 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20981 pm_node_t *write = UP(pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
20982
20983 pm_node_destroy(parser, node);
20984 return parse_shareable_constant_write(parser, write);
20985 }
20986 case PM_INSTANCE_VARIABLE_READ_NODE: {
20987 parser_lex(parser);
20988
20989 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20990 pm_node_t *result = UP(pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
20991
20992 pm_node_destroy(parser, node);
20993 return result;
20994 }
20995 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
20996 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
20997 parser_lex(parser);
20998
20999 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21000 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0));
21001
21002 pm_node_unreference(parser, node);
21003 pm_node_destroy(parser, node);
21004 return result;
21005 }
21006 case PM_LOCAL_VARIABLE_READ_NODE: {
21007 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21008 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21009 pm_node_unreference(parser, node);
21010 }
21011
21013 parser_lex(parser);
21014
21015 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21016 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth));
21017
21018 pm_node_destroy(parser, node);
21019 return result;
21020 }
21021 case PM_CALL_NODE: {
21022 parser_lex(parser);
21023 pm_call_node_t *cast = (pm_call_node_t *) node;
21024
21025 // If we have a vcall (a method with no arguments and no
21026 // receiver that could have been a local variable) then we
21027 // will transform it into a local variable write.
21028 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21029 pm_location_t *message_loc = &cast->message_loc;
21030 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21031
21032 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21033 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21034 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
21035
21036 pm_node_destroy(parser, UP(cast));
21037 return result;
21038 }
21039
21040 // If there is no call operator and the message is "[]" then
21041 // this is an aref expression, and we can transform it into
21042 // an aset expression.
21043 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21044 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21045 return UP(pm_index_operator_write_node_create(parser, cast, &token, value));
21046 }
21047
21048 // If this node cannot be writable, then we have an error.
21049 if (pm_call_node_writable_p(parser, cast)) {
21050 parse_write_name(parser, &cast->name);
21051 } else {
21052 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21053 }
21054
21055 parse_call_operator_write(parser, cast, &token);
21056 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21057 return UP(pm_call_operator_write_node_create(parser, cast, &token, value));
21058 }
21059 case PM_MULTI_WRITE_NODE: {
21060 parser_lex(parser);
21061 pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
21062 return node;
21063 }
21064 default:
21065 parser_lex(parser);
21066
21067 // In this case we have an operator but we don't know what it's for.
21068 // We need to treat it as an error. For now, we'll mark it as an error
21069 // and just skip right past it.
21070 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
21071 return node;
21072 }
21073 }
21074 case PM_TOKEN_AMPERSAND_AMPERSAND:
21075 case PM_TOKEN_KEYWORD_AND: {
21076 parser_lex(parser);
21077
21078 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21079 return UP(pm_and_node_create(parser, node, &token, right));
21080 }
21081 case PM_TOKEN_KEYWORD_OR:
21082 case PM_TOKEN_PIPE_PIPE: {
21083 parser_lex(parser);
21084
21085 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21086 return UP(pm_or_node_create(parser, node, &token, right));
21087 }
21088 case PM_TOKEN_EQUAL_TILDE: {
21089 // Note that we _must_ parse the value before adding the local
21090 // variables in order to properly mirror the behavior of Ruby. For
21091 // example,
21092 //
21093 // /(?<foo>bar)/ =~ foo
21094 //
21095 // In this case, `foo` should be a method call and not a local yet.
21096 parser_lex(parser);
21097 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21098
21099 // By default, we're going to create a call node and then return it.
21100 pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
21101 pm_node_t *result = UP(call);
21102
21103 // If the receiver of this =~ is a regular expression node, then we
21104 // need to introduce local variables for it based on its named
21105 // capture groups.
21106 if (PM_NODE_TYPE_P(node, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE)) {
21107 // It's possible to have an interpolated regular expression node
21108 // that only contains strings. This is because it can be split
21109 // up by a heredoc. In this case we need to concat the unescaped
21110 // strings together and then parse them as a regular expression.
21112
21113 bool interpolated = false;
21114 size_t total_length = 0;
21115
21116 pm_node_t *part;
21117 PM_NODE_LIST_FOREACH(parts, index, part) {
21118 if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
21119 total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
21120 } else {
21121 interpolated = true;
21122 break;
21123 }
21124 }
21125
21126 if (!interpolated && total_length > 0) {
21127 void *memory = xmalloc(total_length);
21128 if (!memory) abort();
21129
21130 uint8_t *cursor = memory;
21131 PM_NODE_LIST_FOREACH(parts, index, part) {
21132 pm_string_t *unescaped = &((pm_string_node_t *) part)->unescaped;
21133 size_t length = pm_string_length(unescaped);
21134
21135 memcpy(cursor, pm_string_source(unescaped), length);
21136 cursor += length;
21137 }
21138
21139 pm_string_t owned;
21140 pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
21141
21142 result = parse_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21143 pm_string_free(&owned);
21144 }
21145 } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
21146 // If we have a regular expression node, then we can just parse
21147 // the named captures directly off the unescaped string.
21148 const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
21149 result = parse_regular_expression_named_captures(parser, content, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21150 }
21151
21152 return result;
21153 }
21154 case PM_TOKEN_UAMPERSAND:
21155 case PM_TOKEN_USTAR:
21156 case PM_TOKEN_USTAR_STAR:
21157 // The only times this will occur are when we are in an error state,
21158 // but we'll put them in here so that errors can propagate.
21159 case PM_TOKEN_BANG_EQUAL:
21160 case PM_TOKEN_BANG_TILDE:
21161 case PM_TOKEN_EQUAL_EQUAL:
21162 case PM_TOKEN_EQUAL_EQUAL_EQUAL:
21163 case PM_TOKEN_LESS_EQUAL_GREATER:
21164 case PM_TOKEN_CARET:
21165 case PM_TOKEN_PIPE:
21166 case PM_TOKEN_AMPERSAND:
21167 case PM_TOKEN_GREATER_GREATER:
21168 case PM_TOKEN_LESS_LESS:
21169 case PM_TOKEN_MINUS:
21170 case PM_TOKEN_PLUS:
21171 case PM_TOKEN_PERCENT:
21172 case PM_TOKEN_SLASH:
21173 case PM_TOKEN_STAR:
21174 case PM_TOKEN_STAR_STAR: {
21175 parser_lex(parser);
21176 pm_token_t operator = parser->previous;
21177 switch (PM_NODE_TYPE(node)) {
21178 case PM_RESCUE_MODIFIER_NODE: {
21180 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21181 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21182 }
21183 break;
21184 }
21185 case PM_AND_NODE: {
21186 pm_and_node_t *cast = (pm_and_node_t *) node;
21187 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21188 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21189 }
21190 break;
21191 }
21192 case PM_OR_NODE: {
21193 pm_or_node_t *cast = (pm_or_node_t *) node;
21194 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21195 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21196 }
21197 break;
21198 }
21199 default:
21200 break;
21201 }
21202
21203 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21204 return UP(pm_call_node_binary_create(parser, node, &token, argument, 0));
21205 }
21206 case PM_TOKEN_GREATER:
21207 case PM_TOKEN_GREATER_EQUAL:
21208 case PM_TOKEN_LESS:
21209 case PM_TOKEN_LESS_EQUAL: {
21210 if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
21211 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
21212 }
21213
21214 parser_lex(parser);
21215 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21216 return UP(pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON));
21217 }
21218 case PM_TOKEN_AMPERSAND_DOT:
21219 case PM_TOKEN_DOT: {
21220 parser_lex(parser);
21221 pm_token_t operator = parser->previous;
21222 pm_arguments_t arguments = { 0 };
21223
21224 // This if statement handles the foo.() syntax.
21225 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
21226 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21227 return UP(pm_call_node_shorthand_create(parser, node, &operator, &arguments));
21228 }
21229
21230 switch (PM_NODE_TYPE(node)) {
21231 case PM_RESCUE_MODIFIER_NODE: {
21233 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21234 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21235 }
21236 break;
21237 }
21238 case PM_AND_NODE: {
21239 pm_and_node_t *cast = (pm_and_node_t *) node;
21240 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21241 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21242 }
21243 break;
21244 }
21245 case PM_OR_NODE: {
21246 pm_or_node_t *cast = (pm_or_node_t *) node;
21247 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21248 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21249 }
21250 break;
21251 }
21252 default:
21253 break;
21254 }
21255
21256 pm_token_t message;
21257
21258 switch (parser->current.type) {
21259 case PM_CASE_OPERATOR:
21260 case PM_CASE_KEYWORD:
21261 case PM_TOKEN_CONSTANT:
21262 case PM_TOKEN_IDENTIFIER:
21263 case PM_TOKEN_METHOD_NAME: {
21264 parser_lex(parser);
21265 message = parser->previous;
21266 break;
21267 }
21268 default: {
21269 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type));
21270 message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21271 }
21272 }
21273
21274 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21275 pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
21276
21277 if (
21278 (previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
21279 arguments.arguments == NULL &&
21280 arguments.opening_loc.start == NULL &&
21281 match1(parser, PM_TOKEN_COMMA)
21282 ) {
21283 return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21284 } else {
21285 return UP(call);
21286 }
21287 }
21288 case PM_TOKEN_DOT_DOT:
21289 case PM_TOKEN_DOT_DOT_DOT: {
21290 parser_lex(parser);
21291
21292 pm_node_t *right = NULL;
21293 if (token_begins_expression_p(parser->current.type)) {
21294 right = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21295 }
21296
21297 return UP(pm_range_node_create(parser, node, &token, right));
21298 }
21299 case PM_TOKEN_KEYWORD_IF_MODIFIER: {
21300 pm_token_t keyword = parser->current;
21301 parser_lex(parser);
21302
21303 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
21304 return UP(pm_if_node_modifier_create(parser, node, &keyword, predicate));
21305 }
21306 case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: {
21307 pm_token_t keyword = parser->current;
21308 parser_lex(parser);
21309
21310 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
21311 return UP(pm_unless_node_modifier_create(parser, node, &keyword, predicate));
21312 }
21313 case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: {
21314 parser_lex(parser);
21315 pm_statements_node_t *statements = pm_statements_node_create(parser);
21316 pm_statements_node_body_append(parser, statements, node, true);
21317
21318 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
21319 return UP(pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
21320 }
21321 case PM_TOKEN_KEYWORD_WHILE_MODIFIER: {
21322 parser_lex(parser);
21323 pm_statements_node_t *statements = pm_statements_node_create(parser);
21324 pm_statements_node_body_append(parser, statements, node, true);
21325
21326 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
21327 return UP(pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
21328 }
21329 case PM_TOKEN_QUESTION_MARK: {
21330 context_push(parser, PM_CONTEXT_TERNARY);
21331 pm_node_list_t current_block_exits = { 0 };
21332 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21333
21334 pm_token_t qmark = parser->current;
21335 parser_lex(parser);
21336
21337 pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
21338
21339 if (parser->recovering) {
21340 // If parsing the true expression of this ternary resulted in a syntax
21341 // error that we can recover from, then we're going to put missing nodes
21342 // and tokens into the remaining places. We want to be sure to do this
21343 // before the `expect` function call to make sure it doesn't
21344 // accidentally move past a ':' token that occurs after the syntax
21345 // error.
21346 pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21347 pm_node_t *false_expression = UP(pm_missing_node_create(parser, colon.start, colon.end));
21348
21349 context_pop(parser);
21350 pop_block_exits(parser, previous_block_exits);
21351 pm_node_list_free(&current_block_exits);
21352
21353 return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
21354 }
21355
21356 accept1(parser, PM_TOKEN_NEWLINE);
21357 expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
21358
21359 pm_token_t colon = parser->previous;
21360 pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
21361
21362 context_pop(parser);
21363 pop_block_exits(parser, previous_block_exits);
21364 pm_node_list_free(&current_block_exits);
21365
21366 return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
21367 }
21368 case PM_TOKEN_COLON_COLON: {
21369 parser_lex(parser);
21370 pm_token_t delimiter = parser->previous;
21371
21372 switch (parser->current.type) {
21373 case PM_TOKEN_CONSTANT: {
21374 parser_lex(parser);
21375 pm_node_t *path;
21376
21377 if (
21378 (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
21379 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
21380 ) {
21381 // If we have a constant immediately following a '::' operator, then
21382 // this can either be a constant path or a method call, depending on
21383 // what follows the constant.
21384 //
21385 // If we have parentheses, then this is a method call. That would
21386 // look like Foo::Bar().
21387 pm_token_t message = parser->previous;
21388 pm_arguments_t arguments = { 0 };
21389
21390 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21391 path = UP(pm_call_node_call_create(parser, node, &delimiter, &message, &arguments));
21392 } else {
21393 // Otherwise, this is a constant path. That would look like Foo::Bar.
21394 path = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
21395 }
21396
21397 // If this is followed by a comma then it is a multiple assignment.
21398 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21399 return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21400 }
21401
21402 return path;
21403 }
21404 case PM_CASE_OPERATOR:
21405 case PM_CASE_KEYWORD:
21406 case PM_TOKEN_IDENTIFIER:
21407 case PM_TOKEN_METHOD_NAME: {
21408 parser_lex(parser);
21409 pm_token_t message = parser->previous;
21410
21411 // If we have an identifier following a '::' operator, then it is for
21412 // sure a method call.
21413 pm_arguments_t arguments = { 0 };
21414 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21415 pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21416
21417 // If this is followed by a comma then it is a multiple assignment.
21418 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21419 return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21420 }
21421
21422 return UP(call);
21423 }
21424 case PM_TOKEN_PARENTHESIS_LEFT: {
21425 // If we have a parenthesis following a '::' operator, then it is the
21426 // method call shorthand. That would look like Foo::(bar).
21427 pm_arguments_t arguments = { 0 };
21428 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21429
21430 return UP(pm_call_node_shorthand_create(parser, node, &delimiter, &arguments));
21431 }
21432 default: {
21433 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
21434 return UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
21435 }
21436 }
21437 }
21438 case PM_TOKEN_KEYWORD_RESCUE_MODIFIER: {
21439 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
21440 parser_lex(parser);
21441 accept1(parser, PM_TOKEN_NEWLINE);
21442
21443 pm_node_t *value = parse_expression(parser, binding_power, true, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
21444 context_pop(parser);
21445
21446 return UP(pm_rescue_modifier_node_create(parser, node, &token, value));
21447 }
21448 case PM_TOKEN_BRACKET_LEFT: {
21449 parser_lex(parser);
21450
21451 pm_arguments_t arguments = { 0 };
21452 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
21453
21454 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
21455 pm_accepts_block_stack_push(parser, true);
21456 parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint16_t) (depth + 1));
21457 pm_accepts_block_stack_pop(parser);
21458 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
21459 }
21460
21461 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
21462
21463 // If we have a comma after the closing bracket then this is a multiple
21464 // assignment and we should parse the targets.
21465 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21466 pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
21467 return parse_targets_validate(parser, UP(aref), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21468 }
21469
21470 // If we're at the end of the arguments, we can now check if there is a
21471 // block node that starts with a {. If there is, then we can parse it and
21472 // add it to the arguments.
21473 pm_block_node_t *block = NULL;
21474 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
21475 block = parse_block(parser, (uint16_t) (depth + 1));
21476 pm_arguments_validate_block(parser, &arguments, block);
21477 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
21478 block = parse_block(parser, (uint16_t) (depth + 1));
21479 }
21480
21481 if (block != NULL) {
21482 if (arguments.block != NULL) {
21483 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_AFTER_BLOCK);
21484 if (arguments.arguments == NULL) {
21485 arguments.arguments = pm_arguments_node_create(parser);
21486 }
21487 pm_arguments_node_arguments_append(arguments.arguments, arguments.block);
21488 }
21489
21490 arguments.block = UP(block);
21491 }
21492
21493 return UP(pm_call_node_aref_create(parser, node, &arguments));
21494 }
21495 case PM_TOKEN_KEYWORD_IN: {
21496 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21497 parser->pattern_matching_newlines = true;
21498
21499 pm_token_t operator = parser->current;
21500 parser->command_start = false;
21501 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21502 parser_lex(parser);
21503
21504 pm_constant_id_list_t captures = { 0 };
21505 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
21506
21507 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21508 pm_constant_id_list_free(&captures);
21509
21510 return UP(pm_match_predicate_node_create(parser, node, pattern, &operator));
21511 }
21512 case PM_TOKEN_EQUAL_GREATER: {
21513 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21514 parser->pattern_matching_newlines = true;
21515
21516 pm_token_t operator = parser->current;
21517 parser->command_start = false;
21518 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21519 parser_lex(parser);
21520
21521 pm_constant_id_list_t captures = { 0 };
21522 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
21523
21524 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21525 pm_constant_id_list_free(&captures);
21526
21527 return UP(pm_match_required_node_create(parser, node, pattern, &operator));
21528 }
21529 default:
21530 assert(false && "unreachable");
21531 return NULL;
21532 }
21533}
21534
21535#undef PM_PARSE_PATTERN_SINGLE
21536#undef PM_PARSE_PATTERN_TOP
21537#undef PM_PARSE_PATTERN_MULTI
21538
21543static inline bool
21544pm_call_node_command_p(const pm_call_node_t *node) {
21545 return (
21546 (node->opening_loc.start == NULL) &&
21547 (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
21548 (node->arguments != NULL || node->block != NULL)
21549 );
21550}
21551
21560static pm_node_t *
21561parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
21562 if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
21563 pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
21564 return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
21565 }
21566
21567 pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
21568
21569 switch (PM_NODE_TYPE(node)) {
21570 case PM_MISSING_NODE:
21571 // If we found a syntax error, then the type of node returned by
21572 // parse_expression_prefix is going to be a missing node.
21573 return node;
21574 case PM_PRE_EXECUTION_NODE:
21575 case PM_POST_EXECUTION_NODE:
21576 case PM_ALIAS_GLOBAL_VARIABLE_NODE:
21577 case PM_ALIAS_METHOD_NODE:
21578 case PM_MULTI_WRITE_NODE:
21579 case PM_UNDEF_NODE:
21580 // These expressions are statements, and cannot be followed by
21581 // operators (except modifiers).
21582 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21583 return node;
21584 }
21585 break;
21586 case PM_CALL_NODE:
21587 // If we have a call node, then we need to check if it looks like a
21588 // method call without parentheses that contains arguments. If it
21589 // does, then it has different rules for parsing infix operators,
21590 // namely that it only accepts composition (and/or) and modifiers
21591 // (if/unless/etc.).
21592 if ((pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_COMPOSITION) && pm_call_node_command_p((pm_call_node_t *) node)) {
21593 return node;
21594 }
21595 break;
21596 case PM_SYMBOL_NODE:
21597 // If we have a symbol node that is being parsed as a label, then we
21598 // need to immediately return, because there should never be an
21599 // infix operator following this node.
21600 if (pm_symbol_node_label_p(node)) {
21601 return node;
21602 }
21603 break;
21604 default:
21605 break;
21606 }
21607
21608 // Otherwise we'll look and see if the next token can be parsed as an infix
21609 // operator. If it can, then we'll parse it using parse_expression_infix.
21610 pm_binding_powers_t current_binding_powers;
21611 pm_token_type_t current_token_type;
21612
21613 while (
21614 current_token_type = parser->current.type,
21615 current_binding_powers = pm_binding_powers[current_token_type],
21616 binding_power <= current_binding_powers.left &&
21617 current_binding_powers.binary
21618 ) {
21619 node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1));
21620
21621 if (context_terminator(parser->current_context->context, &parser->current)) {
21622 // If this token terminates the current context, then we need to
21623 // stop parsing the expression, as it has become a statement.
21624 return node;
21625 }
21626
21627 switch (PM_NODE_TYPE(node)) {
21628 case PM_MULTI_WRITE_NODE:
21629 // Multi-write nodes are statements, and cannot be followed by
21630 // operators except modifiers.
21631 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21632 return node;
21633 }
21634 break;
21635 case PM_CLASS_VARIABLE_WRITE_NODE:
21636 case PM_CONSTANT_PATH_WRITE_NODE:
21637 case PM_CONSTANT_WRITE_NODE:
21638 case PM_GLOBAL_VARIABLE_WRITE_NODE:
21639 case PM_INSTANCE_VARIABLE_WRITE_NODE:
21640 case PM_LOCAL_VARIABLE_WRITE_NODE:
21641 // These expressions are statements, by virtue of the right-hand
21642 // side of their write being an implicit array.
21643 if (PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21644 return node;
21645 }
21646 break;
21647 case PM_CALL_NODE:
21648 // These expressions are also statements, by virtue of the
21649 // right-hand side of the expression (i.e., the last argument to
21650 // the call node) being an implicit array.
21651 if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21652 return node;
21653 }
21654 break;
21655 default:
21656 break;
21657 }
21658
21659 // If the operator is nonassoc and we should not be able to parse the
21660 // upcoming infix operator, break.
21661 if (current_binding_powers.nonassoc) {
21662 // If this is a non-assoc operator and we are about to parse the
21663 // exact same operator, then we need to add an error.
21664 if (match1(parser, current_token_type)) {
21665 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
21666 break;
21667 }
21668
21669 // If this is an endless range, then we need to reject a couple of
21670 // additional operators because it violates the normal operator
21671 // precedence rules. Those patterns are:
21672 //
21673 // 1.. & 2
21674 // 1.. * 2
21675 //
21676 if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
21677 if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) {
21678 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
21679 break;
21680 }
21681
21682 if (PM_BINDING_POWER_TERM <= pm_binding_powers[parser->current.type].left) {
21683 break;
21684 }
21685 } else if (current_binding_powers.left <= pm_binding_powers[parser->current.type].left) {
21686 break;
21687 }
21688 }
21689
21690 if (accepts_command_call) {
21691 // A command-style method call is only accepted on method chains.
21692 // Thus, we check whether the parsed node can continue method chains.
21693 // The method chain can continue if the parsed node is one of the following five kinds:
21694 // (1) index access: foo[1]
21695 // (2) attribute access: foo.bar
21696 // (3) method call with parenthesis: foo.bar(1)
21697 // (4) method call with a block: foo.bar do end
21698 // (5) constant path: foo::Bar
21699 switch (node->type) {
21700 case PM_CALL_NODE: {
21701 pm_call_node_t *cast = (pm_call_node_t *)node;
21702 if (
21703 // (1) foo[1]
21704 !(
21705 cast->call_operator_loc.start == NULL &&
21706 cast->message_loc.start != NULL &&
21707 cast->message_loc.start[0] == '[' &&
21708 cast->message_loc.end[-1] == ']'
21709 ) &&
21710 // (2) foo.bar
21711 !(
21712 cast->call_operator_loc.start != NULL &&
21713 cast->arguments == NULL &&
21714 cast->block == NULL &&
21715 cast->opening_loc.start == NULL
21716 ) &&
21717 // (3) foo.bar(1)
21718 !(
21719 cast->call_operator_loc.start != NULL &&
21720 cast->opening_loc.start != NULL
21721 ) &&
21722 // (4) foo.bar do end
21723 !(
21724 cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
21725 )
21726 ) {
21727 accepts_command_call = false;
21728 }
21729 break;
21730 }
21731 // (5) foo::Bar
21732 case PM_CONSTANT_PATH_NODE:
21733 break;
21734 default:
21735 accepts_command_call = false;
21736 break;
21737 }
21738 }
21739 }
21740
21741 return node;
21742}
21743
21748static pm_statements_node_t *
21749wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
21750 if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
21751 if (statements == NULL) {
21752 statements = pm_statements_node_create(parser);
21753 }
21754
21755 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21756 pm_arguments_node_arguments_append(
21757 arguments,
21758 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2)))
21759 );
21760
21761 pm_statements_node_body_append(parser, statements, UP(pm_call_node_fcall_synthesized_create(
21762 parser,
21763 arguments,
21764 pm_parser_constant_id_constant(parser, "print", 5)
21765 )), true);
21766 }
21767
21768 if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
21769 if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
21770 if (statements == NULL) {
21771 statements = pm_statements_node_create(parser);
21772 }
21773
21774 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21775 pm_arguments_node_arguments_append(
21776 arguments,
21777 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2)))
21778 );
21779
21780 pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
21781 pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, UP(receiver), "split", arguments);
21782
21783 pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
21784 parser,
21785 pm_parser_constant_id_constant(parser, "$F", 2),
21786 UP(call)
21787 );
21788
21789 pm_statements_node_body_prepend(statements, UP(write));
21790 }
21791
21792 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21793 pm_arguments_node_arguments_append(
21794 arguments,
21795 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2)))
21796 );
21797
21798 if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
21799 pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
21800 pm_keyword_hash_node_elements_append(keywords, UP(pm_assoc_node_create(
21801 parser,
21802 UP(pm_symbol_node_synthesized_create(parser, "chomp")),
21803 &(pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start },
21804 UP(pm_true_node_synthesized_create(parser))
21805 )));
21806
21807 pm_arguments_node_arguments_append(arguments, UP(keywords));
21808 pm_node_flag_set(UP(arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
21809 }
21810
21811 pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
21812 pm_statements_node_body_append(parser, wrapped_statements, UP(pm_while_node_synthesized_create(
21813 parser,
21814 UP(pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4))),
21815 statements
21816 )), true);
21817
21818 statements = wrapped_statements;
21819 }
21820
21821 return statements;
21822}
21823
21827static pm_node_t *
21828parse_program(pm_parser_t *parser) {
21829 // If the current scope is NULL, then we want to push a new top level scope.
21830 // The current scope could exist in the event that we are parsing an eval
21831 // and the user has passed into scopes that already exist.
21832 if (parser->current_scope == NULL) {
21833 pm_parser_scope_push(parser, true);
21834 }
21835
21836 pm_node_list_t current_block_exits = { 0 };
21837 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21838
21839 parser_lex(parser);
21840 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
21841
21842 if (statements != NULL && !parser->parsing_eval) {
21843 // If we have statements, then the top-level statement should be
21844 // explicitly checked as well. We have to do this here because
21845 // everywhere else we check all but the last statement.
21846 assert(statements->body.size > 0);
21847 pm_void_statement_check(parser, statements->body.nodes[statements->body.size - 1]);
21848 }
21849
21850 pm_constant_id_list_t locals;
21851 pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
21852 pm_parser_scope_pop(parser);
21853
21854 // At the top level, see if we need to wrap the statements in a program
21855 // node with a while loop based on the options.
21857 statements = wrap_statements(parser, statements);
21858 } else {
21859 flush_block_exits(parser, previous_block_exits);
21860 }
21861
21862 pm_node_list_free(&current_block_exits);
21863
21864 // If this is an empty file, then we're still going to parse all of the
21865 // statements in order to gather up all of the comments and such. Here we'll
21866 // correct the location information.
21867 if (statements == NULL) {
21868 statements = pm_statements_node_create(parser);
21869 pm_statements_node_location_set(statements, parser->start, parser->start);
21870 }
21871
21872 return UP(pm_program_node_create(parser, &locals, statements));
21873}
21874
21875/******************************************************************************/
21876/* External functions */
21877/******************************************************************************/
21878
21888static const char *
21889pm_strnstr(const char *big, const char *little, size_t big_length) {
21890 size_t little_length = strlen(little);
21891
21892 for (const char *max = big + big_length - little_length; big <= max; big++) {
21893 if (*big == *little && memcmp(big, little, little_length) == 0) return big;
21894 }
21895
21896 return NULL;
21897}
21898
21899#ifdef _WIN32
21900#define pm_parser_warn_shebang_carriage_return(parser, start, length) ((void) 0)
21901#else
21907static void
21908pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
21909 if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
21910 pm_parser_warn(parser, start, start + length, PM_WARN_SHEBANG_CARRIAGE_RETURN);
21911 }
21912}
21913#endif
21914
21919static void
21920pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const char *engine, size_t length) {
21921 const char *switches = pm_strnstr(engine, " -", length);
21922 if (switches == NULL) return;
21923
21924 pm_options_t next_options = *options;
21925 options->shebang_callback(
21926 &next_options,
21927 (const uint8_t *) (switches + 1),
21928 length - ((size_t) (switches - engine)) - 1,
21929 options->shebang_callback_data
21930 );
21931
21932 size_t encoding_length;
21933 if ((encoding_length = pm_string_length(&next_options.encoding)) > 0) {
21934 const uint8_t *encoding_source = pm_string_source(&next_options.encoding);
21935 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
21936 }
21937
21938 parser->command_line = next_options.command_line;
21939 parser->frozen_string_literal = next_options.frozen_string_literal;
21940}
21941
21946pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
21947 assert(source != NULL);
21948
21949 *parser = (pm_parser_t) {
21950 .node_id = 0,
21951 .lex_state = PM_LEX_STATE_BEG,
21952 .enclosure_nesting = 0,
21953 .lambda_enclosure_nesting = -1,
21954 .brace_nesting = 0,
21955 .do_loop_stack = 0,
21956 .accepts_block_stack = 0,
21957 .lex_modes = {
21958 .index = 0,
21959 .stack = {{ .mode = PM_LEX_DEFAULT }},
21960 .current = &parser->lex_modes.stack[0],
21961 },
21962 .start = source,
21963 .end = source + size,
21964 .previous = { .type = PM_TOKEN_EOF, .start = source, .end = source },
21965 .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
21966 .next_start = NULL,
21967 .heredoc_end = NULL,
21968 .data_loc = { .start = NULL, .end = NULL },
21969 .comment_list = { 0 },
21970 .magic_comment_list = { 0 },
21971 .warning_list = { 0 },
21972 .error_list = { 0 },
21973 .current_scope = NULL,
21974 .current_context = NULL,
21975 .encoding = PM_ENCODING_UTF_8_ENTRY,
21976 .encoding_changed_callback = NULL,
21977 .encoding_comment_start = source,
21978 .lex_callback = NULL,
21979 .filepath = { 0 },
21980 .constant_pool = { 0 },
21981 .newline_list = { 0 },
21982 .integer_base = 0,
21983 .current_string = PM_STRING_EMPTY,
21984 .start_line = 1,
21985 .explicit_encoding = NULL,
21986 .command_line = 0,
21987 .parsing_eval = false,
21988 .partial_script = false,
21989 .command_start = true,
21990 .recovering = false,
21991 .encoding_locked = false,
21992 .encoding_changed = false,
21993 .pattern_matching_newlines = false,
21994 .in_keyword_arg = false,
21995 .current_block_exits = NULL,
21996 .semantic_token_seen = false,
21997 .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
21998 .current_regular_expression_ascii_only = false,
21999 .warn_mismatched_indentation = true
22000 };
22001
22002 // Initialize the constant pool. We're going to completely guess as to the
22003 // number of constants that we'll need based on the size of the input. The
22004 // ratio we chose here is actually less arbitrary than you might think.
22005 //
22006 // We took ~50K Ruby files and measured the size of the file versus the
22007 // number of constants that were found in those files. Then we found the
22008 // average and standard deviation of the ratios of constants/bytesize. Then
22009 // we added 1.34 standard deviations to the average to get a ratio that
22010 // would fit 75% of the files (for a two-tailed distribution). This works
22011 // because there was about a 0.77 correlation and the distribution was
22012 // roughly normal.
22013 //
22014 // This ratio will need to change if we add more constants to the constant
22015 // pool for another node type.
22016 uint32_t constant_size = ((uint32_t) size) / 95;
22017 pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size);
22018
22019 // Initialize the newline list. Similar to the constant pool, we're going to
22020 // guess at the number of newlines that we'll need based on the size of the
22021 // input.
22022 size_t newline_size = size / 22;
22023 pm_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
22024
22025 // If options were provided to this parse, establish them here.
22026 if (options != NULL) {
22027 // filepath option
22028 parser->filepath = options->filepath;
22029
22030 // line option
22031 parser->start_line = options->line;
22032
22033 // encoding option
22034 size_t encoding_length = pm_string_length(&options->encoding);
22035 if (encoding_length > 0) {
22036 const uint8_t *encoding_source = pm_string_source(&options->encoding);
22037 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22038 }
22039
22040 // encoding_locked option
22041 parser->encoding_locked = options->encoding_locked;
22042
22043 // frozen_string_literal option
22045
22046 // command_line option
22047 parser->command_line = options->command_line;
22048
22049 // version option
22050 parser->version = options->version;
22051
22052 // partial_script
22053 parser->partial_script = options->partial_script;
22054
22055 // scopes option
22056 parser->parsing_eval = options->scopes_count > 0;
22057 if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
22058
22059 for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
22060 const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index);
22061 pm_parser_scope_push(parser, scope_index == 0);
22062
22063 // Scopes given from the outside are not allowed to have numbered
22064 // parameters.
22065 parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
22066
22067 for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
22068 const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
22069
22070 const uint8_t *source = pm_string_source(local);
22071 size_t length = pm_string_length(local);
22072
22073 void *allocated = xmalloc(length);
22074 if (allocated == NULL) continue;
22075
22076 memcpy(allocated, source, length);
22077 pm_parser_local_add_owned(parser, (uint8_t *) allocated, length);
22078 }
22079 }
22080 }
22081
22082 // Now that we have established the user-provided options, check if
22083 // a version was given and parse as the latest version otherwise.
22084 if (parser->version == PM_OPTIONS_VERSION_UNSET) {
22086 }
22087
22088 pm_accepts_block_stack_push(parser, true);
22089
22090 // Skip past the UTF-8 BOM if it exists.
22091 if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
22092 parser->current.end += 3;
22093 parser->encoding_comment_start += 3;
22094
22095 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
22097 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
22098 }
22099 }
22100
22101 // If the -x command line flag is set, or the first shebang of the file does
22102 // not include "ruby", then we'll search for a shebang that does include
22103 // "ruby" and start parsing from there.
22104 bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
22105
22106 // If the first two bytes of the source are a shebang, then we will do a bit
22107 // of extra processing.
22108 //
22109 // First, we'll indicate that the encoding comment is at the end of the
22110 // shebang. This means that when a shebang is present the encoding comment
22111 // can begin on the second line.
22112 //
22113 // Second, we will check if the shebang includes "ruby". If it does, then we
22114 // we will start parsing from there. We will also potentially warning the
22115 // user if there is a carriage return at the end of the shebang. We will
22116 // also potentially call the shebang callback if this is the main script to
22117 // allow the caller to parse the shebang and find any command-line options.
22118 // If the shebang does not include "ruby" and this is the main script being
22119 // parsed, then we will start searching the file for a shebang that does
22120 // contain "ruby" as if -x were passed on the command line.
22121 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
22122 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->current.end);
22123
22124 if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
22125 const char *engine;
22126
22127 if ((engine = pm_strnstr((const char *) parser->start, "ruby", length)) != NULL) {
22128 if (newline != NULL) {
22129 parser->encoding_comment_start = newline + 1;
22130
22131 if (options == NULL || options->main_script) {
22132 pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1);
22133 }
22134 }
22135
22136 if (options != NULL && options->main_script && options->shebang_callback != NULL) {
22137 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) parser->start)));
22138 }
22139
22140 search_shebang = false;
22141 } else if (options != NULL && options->main_script && !parser->parsing_eval) {
22142 search_shebang = true;
22143 }
22144 }
22145
22146 // Here we're going to find the first shebang that includes "ruby" and start
22147 // parsing from there.
22148 if (search_shebang) {
22149 // If a shebang that includes "ruby" is not found, then we're going to a
22150 // a load error to the list of errors on the parser.
22151 bool found_shebang = false;
22152
22153 // This is going to point to the start of each line as we check it.
22154 // We'll maintain a moving window looking at each line at they come.
22155 const uint8_t *cursor = parser->start;
22156
22157 // The newline pointer points to the end of the current line that we're
22158 // considering. If it is NULL, then we're at the end of the file.
22159 const uint8_t *newline = next_newline(cursor, parser->end - cursor);
22160
22161 while (newline != NULL) {
22162 pm_newline_list_append(&parser->newline_list, newline);
22163
22164 cursor = newline + 1;
22165 newline = next_newline(cursor, parser->end - cursor);
22166
22167 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - cursor);
22168 if (length > 2 && cursor[0] == '#' && cursor[1] == '!') {
22169 const char *engine;
22170 if ((engine = pm_strnstr((const char *) cursor, "ruby", length)) != NULL) {
22171 found_shebang = true;
22172
22173 if (newline != NULL) {
22174 pm_parser_warn_shebang_carriage_return(parser, cursor, length + 1);
22175 parser->encoding_comment_start = newline + 1;
22176 }
22177
22178 if (options != NULL && options->shebang_callback != NULL) {
22179 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) cursor)));
22180 }
22181
22182 break;
22183 }
22184 }
22185 }
22186
22187 if (found_shebang) {
22188 parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22189 parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22190 } else {
22191 pm_parser_err(parser, parser->start, parser->start, PM_ERR_SCRIPT_NOT_FOUND);
22192 pm_newline_list_clear(&parser->newline_list);
22193 }
22194 }
22195
22196 // The encoding comment can start after any amount of inline whitespace, so
22197 // here we'll advance it to the first non-inline-whitespace character so
22198 // that it is ready for future comparisons.
22199 parser->encoding_comment_start += pm_strspn_inline_whitespace(parser->encoding_comment_start, parser->end - parser->encoding_comment_start);
22200}
22201
22210
22214static inline void
22215pm_comment_list_free(pm_list_t *list) {
22216 pm_list_node_t *node, *next;
22217
22218 for (node = list->head; node != NULL; node = next) {
22219 next = node->next;
22220
22221 pm_comment_t *comment = (pm_comment_t *) node;
22222 xfree(comment);
22223 }
22224}
22225
22229static inline void
22230pm_magic_comment_list_free(pm_list_t *list) {
22231 pm_list_node_t *node, *next;
22232
22233 for (node = list->head; node != NULL; node = next) {
22234 next = node->next;
22235
22238 }
22239}
22240
22246 pm_string_free(&parser->filepath);
22247 pm_diagnostic_list_free(&parser->error_list);
22248 pm_diagnostic_list_free(&parser->warning_list);
22249 pm_comment_list_free(&parser->comment_list);
22250 pm_magic_comment_list_free(&parser->magic_comment_list);
22251 pm_constant_pool_free(&parser->constant_pool);
22252 pm_newline_list_free(&parser->newline_list);
22253
22254 while (parser->current_scope != NULL) {
22255 // Normally, popping the scope doesn't free the locals since it is
22256 // assumed that ownership has transferred to the AST. However if we have
22257 // scopes while we're freeing the parser, it's likely they came from
22258 // eval scopes and we need to free them explicitly here.
22259 pm_parser_scope_pop(parser);
22260 }
22261
22262 while (parser->lex_modes.index >= PM_LEX_STACK_SIZE) {
22263 lex_mode_pop(parser);
22264 }
22265}
22266
22272 return parse_program(parser);
22273}
22274
22280static bool
22281pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof) {
22282#define LINE_SIZE 4096
22283 char line[LINE_SIZE];
22284
22285 while (memset(line, '\n', LINE_SIZE), stream_fgets(line, LINE_SIZE, stream) != NULL) {
22286 size_t length = LINE_SIZE;
22287 while (length > 0 && line[length - 1] == '\n') length--;
22288
22289 if (length == LINE_SIZE) {
22290 // If we read a line that is the maximum size and it doesn't end
22291 // with a newline, then we'll just append it to the buffer and
22292 // continue reading.
22293 length--;
22294 pm_buffer_append_string(buffer, line, length);
22295 continue;
22296 }
22297
22298 // Append the line to the buffer.
22299 length--;
22300 pm_buffer_append_string(buffer, line, length);
22301
22302 // Check if the line matches the __END__ marker. If it does, then stop
22303 // reading and return false. In most circumstances, this means we should
22304 // stop reading from the stream so that the DATA constant can pick it
22305 // up.
22306 switch (length) {
22307 case 7:
22308 if (strncmp(line, "__END__", 7) == 0) return false;
22309 break;
22310 case 8:
22311 if (strncmp(line, "__END__\n", 8) == 0) return false;
22312 break;
22313 case 9:
22314 if (strncmp(line, "__END__\r\n", 9) == 0) return false;
22315 break;
22316 }
22317
22318 // All data should be read via gets. If the string returned by gets
22319 // _doesn't_ end with a newline, then we assume we hit EOF condition.
22320 if (stream_feof(stream)) {
22321 break;
22322 }
22323 }
22324
22325 return true;
22326#undef LINE_SIZE
22327}
22328
22338static bool
22339pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
22340 pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) parser->error_list.head;
22341
22342 for (; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
22343 if (diagnostic->diag_id == PM_ERR_HEREDOC_TERM) {
22344 return true;
22345 }
22346 }
22347
22348 return false;
22349}
22350
22358pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) {
22359 pm_buffer_init(buffer);
22360
22361 bool eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
22362
22363 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22364 pm_node_t *node = pm_parse(parser);
22365
22366 while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
22367 pm_node_destroy(parser, node);
22368 eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
22369
22370 pm_parser_free(parser);
22371 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22372 node = pm_parse(parser);
22373 }
22374
22375 return node;
22376}
22377
22382pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
22383 pm_options_t options = { 0 };
22384 pm_options_read(&options, data);
22385
22386 pm_parser_t parser;
22387 pm_parser_init(&parser, source, size, &options);
22388
22389 pm_node_t *node = pm_parse(&parser);
22390 pm_node_destroy(&parser, node);
22391
22392 bool result = parser.error_list.size == 0;
22393 pm_parser_free(&parser);
22394 pm_options_free(&options);
22395
22396 return result;
22397}
22398
22399#undef PM_CASE_KEYWORD
22400#undef PM_CASE_OPERATOR
22401#undef PM_CASE_WRITABLE
22402#undef PM_STRING_EMPTY
22403
22404// We optionally support serializing to a binary string. For systems that don't
22405// want or need this functionality, it can be turned off with the
22406// PRISM_EXCLUDE_SERIALIZATION define.
22407#ifndef PRISM_EXCLUDE_SERIALIZATION
22408
22409static inline void
22410pm_serialize_header(pm_buffer_t *buffer) {
22411 pm_buffer_append_string(buffer, "PRISM", 5);
22412 pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
22413 pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR);
22414 pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH);
22415 pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
22416}
22417
22422pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
22423 pm_serialize_header(buffer);
22424 pm_serialize_content(parser, node, buffer);
22425 pm_buffer_append_byte(buffer, '\0');
22426}
22427
22433pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22434 pm_options_t options = { 0 };
22435 pm_options_read(&options, data);
22436
22437 pm_parser_t parser;
22438 pm_parser_init(&parser, source, size, &options);
22439
22440 pm_node_t *node = pm_parse(&parser);
22441
22442 pm_serialize_header(buffer);
22443 pm_serialize_content(&parser, node, buffer);
22444 pm_buffer_append_byte(buffer, '\0');
22445
22446 pm_node_destroy(&parser, node);
22447 pm_parser_free(&parser);
22448 pm_options_free(&options);
22449}
22450
22456pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data) {
22457 pm_parser_t parser;
22458 pm_options_t options = { 0 };
22459 pm_options_read(&options, data);
22460
22461 pm_buffer_t parser_buffer;
22462 pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, stream_feof, &options);
22463 pm_serialize_header(buffer);
22464 pm_serialize_content(&parser, node, buffer);
22465 pm_buffer_append_byte(buffer, '\0');
22466
22467 pm_node_destroy(&parser, node);
22468 pm_buffer_free(&parser_buffer);
22469 pm_parser_free(&parser);
22470 pm_options_free(&options);
22471}
22472
22477pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22478 pm_options_t options = { 0 };
22479 pm_options_read(&options, data);
22480
22481 pm_parser_t parser;
22482 pm_parser_init(&parser, source, size, &options);
22483
22484 pm_node_t *node = pm_parse(&parser);
22485 pm_serialize_header(buffer);
22486 pm_serialize_encoding(parser.encoding, buffer);
22487 pm_buffer_append_varsint(buffer, parser.start_line);
22488 pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
22489
22490 pm_node_destroy(&parser, node);
22491 pm_parser_free(&parser);
22492 pm_options_free(&options);
22493}
22494
22495#endif
22496
22497/******************************************************************************/
22498/* Slice queries for the Ruby API */
22499/******************************************************************************/
22500
22502typedef enum {
22504 PM_SLICE_TYPE_ERROR = -1,
22505
22507 PM_SLICE_TYPE_NONE,
22508
22510 PM_SLICE_TYPE_LOCAL,
22511
22513 PM_SLICE_TYPE_CONSTANT,
22514
22516 PM_SLICE_TYPE_METHOD_NAME
22517} pm_slice_type_t;
22518
22522pm_slice_type_t
22523pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
22524 // first, get the right encoding object
22525 const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
22526 if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
22527
22528 // check that there is at least one character
22529 if (length == 0) return PM_SLICE_TYPE_NONE;
22530
22531 size_t width;
22532 if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
22533 // valid because alphabetical
22534 } else if (*source == '_') {
22535 // valid because underscore
22536 width = 1;
22537 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
22538 // valid because multibyte
22539 } else {
22540 // invalid because no match
22541 return PM_SLICE_TYPE_NONE;
22542 }
22543
22544 // determine the type of the slice based on the first character
22545 const uint8_t *end = source + length;
22546 pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
22547
22548 // next, iterate through all of the bytes of the string to ensure that they
22549 // are all valid identifier characters
22550 source += width;
22551
22552 while (source < end) {
22553 if ((width = encoding->alnum_char(source, end - source)) != 0) {
22554 // valid because alphanumeric
22555 source += width;
22556 } else if (*source == '_') {
22557 // valid because underscore
22558 source++;
22559 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
22560 // valid because multibyte
22561 source += width;
22562 } else {
22563 // invalid because no match
22564 break;
22565 }
22566 }
22567
22568 // accept a ! or ? at the end of the slice as a method name
22569 if (*source == '!' || *source == '?' || *source == '=') {
22570 source++;
22571 result = PM_SLICE_TYPE_METHOD_NAME;
22572 }
22573
22574 // valid if we are at the end of the slice
22575 return source == end ? result : PM_SLICE_TYPE_NONE;
22576}
22577
22582pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
22583 switch (pm_slice_type(source, length, encoding_name)) {
22584 case PM_SLICE_TYPE_ERROR:
22585 return PM_STRING_QUERY_ERROR;
22586 case PM_SLICE_TYPE_NONE:
22587 case PM_SLICE_TYPE_CONSTANT:
22588 case PM_SLICE_TYPE_METHOD_NAME:
22589 return PM_STRING_QUERY_FALSE;
22590 case PM_SLICE_TYPE_LOCAL:
22591 return PM_STRING_QUERY_TRUE;
22592 }
22593
22594 assert(false && "unreachable");
22595 return PM_STRING_QUERY_FALSE;
22596}
22597
22602pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
22603 switch (pm_slice_type(source, length, encoding_name)) {
22604 case PM_SLICE_TYPE_ERROR:
22605 return PM_STRING_QUERY_ERROR;
22606 case PM_SLICE_TYPE_NONE:
22607 case PM_SLICE_TYPE_LOCAL:
22608 case PM_SLICE_TYPE_METHOD_NAME:
22609 return PM_STRING_QUERY_FALSE;
22610 case PM_SLICE_TYPE_CONSTANT:
22611 return PM_STRING_QUERY_TRUE;
22612 }
22613
22614 assert(false && "unreachable");
22615 return PM_STRING_QUERY_FALSE;
22616}
22617
22622pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
22623#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
22624#define C1(c) (*source == c)
22625#define C2(s) (memcmp(source, s, 2) == 0)
22626#define C3(s) (memcmp(source, s, 3) == 0)
22627
22628 switch (pm_slice_type(source, length, encoding_name)) {
22629 case PM_SLICE_TYPE_ERROR:
22630 return PM_STRING_QUERY_ERROR;
22631 case PM_SLICE_TYPE_NONE:
22632 break;
22633 case PM_SLICE_TYPE_LOCAL:
22634 // numbered parameters are not valid method names
22635 return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
22636 case PM_SLICE_TYPE_CONSTANT:
22637 // all constants are valid method names
22638 case PM_SLICE_TYPE_METHOD_NAME:
22639 // all method names are valid method names
22640 return PM_STRING_QUERY_TRUE;
22641 }
22642
22643 switch (length) {
22644 case 1:
22645 return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
22646 case 2:
22647 return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
22648 case 3:
22649 return B(C3("===") || C3("<=>") || C3("[]="));
22650 default:
22651 return PM_STRING_QUERY_FALSE;
22652 }
22653
22654#undef B
22655#undef C1
22656#undef C2
22657#undef C3
22658}
pm_diagnostic_id_t
The diagnostic IDs of all of the diagnostics, used to communicate the types of errors between the par...
Definition diagnostic.h:31
#define xfree
Old name of ruby_xfree.
Definition xmalloc.h:58
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
#define xcalloc
Old name of ruby_xcalloc.
Definition xmalloc.h:55
VALUE type(ANYARGS)
ANYARGS-ed function type.
PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options)
Free the internal memory associated with the options.
Definition options.c:218
PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index)
Return a pointer to the local at the given index within the given scope.
Definition options.c:202
PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm_options_t *options, size_t index)
Return a pointer to the scope at the given index within the given options.
Definition options.c:182
static const uint8_t PM_OPTIONS_COMMAND_LINE_N
A bit representing whether or not the command line -n option was set.
Definition options.h:225
#define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
String literals should be made frozen.
Definition options.h:20
#define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
String literals should be made mutable.
Definition options.h:31
#define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
String literals may be frozen or mutable depending on the implementation default.
Definition options.h:26
static const uint8_t PM_OPTIONS_COMMAND_LINE_P
A bit representing whether or not the command line -p option was set.
Definition options.h:231
@ PM_OPTIONS_VERSION_CRUBY_3_3
The vendored version of prism in CRuby 3.3.x.
Definition options.h:89
@ PM_OPTIONS_VERSION_LATEST
The current version of prism.
Definition options.h:104
@ PM_OPTIONS_VERSION_UNSET
If an explicit version is not provided, the current version of prism will be used.
Definition options.h:86
@ PM_OPTIONS_VERSION_CRUBY_3_4
The vendored version of prism in CRuby 3.4.x.
Definition options.h:92
@ PM_OPTIONS_VERSION_CRUBY_4_0
The vendored version of prism in CRuby 4.0.x.
Definition options.h:98
pm_heredoc_indent_t
The type of indentation that a heredoc uses.
Definition parser.h:79
struct pm_context_node pm_context_node_t
This is a node in a linked list of contexts.
#define PM_LEX_STACK_SIZE
We pre-allocate a certain number of lex states in order to avoid having to call malloc too many times...
Definition parser.h:262
struct pm_parser pm_parser_t
The parser used to parse Ruby source.
Definition parser.h:267
struct pm_comment pm_comment_t
This is a node in the linked list of comments that we've found while parsing.
pm_lex_state_t
This enum combines the various bits from the above enum into individual values that represent the var...
Definition parser.h:46
struct pm_scope pm_scope_t
This struct represents a node in a linked list of scopes.
pm_heredoc_quote_t
The type of quote that a heredoc uses.
Definition parser.h:69
void(* pm_encoding_changed_callback_t)(pm_parser_t *parser)
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:499
pm_context_t
While parsing, we keep track of a stack of contexts.
Definition parser.h:274
@ PM_CONTEXT_CLASS_RESCUE
a rescue statement within a class statement
Definition parser.h:324
@ PM_CONTEXT_ELSIF
an elsif clause
Definition parser.h:351
@ PM_CONTEXT_DEF_RESCUE
a rescue statement within a method definition
Definition parser.h:336
@ PM_CONTEXT_ELSE
an else clause
Definition parser.h:348
@ PM_CONTEXT_FOR_INDEX
a for loop's index
Definition parser.h:360
@ PM_CONTEXT_CASE_WHEN
a case when statements
Definition parser.h:309
@ PM_CONTEXT_BLOCK_RESCUE
a rescue statement within a do..end block
Definition parser.h:306
@ PM_CONTEXT_MODULE
a module declaration
Definition parser.h:387
@ PM_CONTEXT_DEF_PARAMS
a method definition's parameters
Definition parser.h:339
@ PM_CONTEXT_CASE_IN
a case in statements
Definition parser.h:312
@ PM_CONTEXT_BLOCK_ELSE
a rescue else statement within a do..end block
Definition parser.h:300
@ PM_CONTEXT_LOOP_PREDICATE
the predicate clause of a loop statement
Definition parser.h:381
@ PM_CONTEXT_SCLASS
a singleton class definition
Definition parser.h:417
@ PM_CONTEXT_UNLESS
an unless statement
Definition parser.h:432
@ PM_CONTEXT_POSTEXE
an END block
Definition parser.h:405
@ PM_CONTEXT_IF
an if statement
Definition parser.h:363
@ PM_CONTEXT_MULTI_TARGET
a multiple target expression
Definition parser.h:399
@ PM_CONTEXT_LAMBDA_RESCUE
a rescue statement within a lambda expression
Definition parser.h:378
@ PM_CONTEXT_BEGIN_ELSE
a rescue else statement with an explicit begin
Definition parser.h:285
@ PM_CONTEXT_NONE
a null context, used for returning a value from a function
Definition parser.h:276
@ PM_CONTEXT_CLASS_ELSE
a rescue else statement within a class statement
Definition parser.h:321
@ PM_CONTEXT_LAMBDA_ENSURE
an ensure statement within a lambda expression
Definition parser.h:372
@ PM_CONTEXT_BLOCK_ENSURE
an ensure statement within a do..end block
Definition parser.h:297
@ PM_CONTEXT_CLASS_ENSURE
an ensure statement within a class statement
Definition parser.h:318
@ PM_CONTEXT_LAMBDA_BRACES
a lambda expression with braces
Definition parser.h:366
@ PM_CONTEXT_MODULE_ELSE
a rescue else statement within a module statement
Definition parser.h:393
@ PM_CONTEXT_PARENS
a parenthesized expression
Definition parser.h:402
@ PM_CONTEXT_BLOCK_BRACES
expressions in block arguments using braces
Definition parser.h:291
@ PM_CONTEXT_BLOCK_PARAMETERS
expressions in block parameters foo do |...| end
Definition parser.h:303
@ PM_CONTEXT_DEF_ENSURE
an ensure statement within a method definition
Definition parser.h:330
@ PM_CONTEXT_SCLASS_RESCUE
a rescue statement with a singleton class
Definition parser.h:426
@ PM_CONTEXT_PREEXE
a BEGIN block
Definition parser.h:411
@ PM_CONTEXT_DEFINED
a defined? expression
Definition parser.h:342
@ PM_CONTEXT_MODULE_ENSURE
an ensure statement within a module statement
Definition parser.h:390
@ PM_CONTEXT_BEGIN_RESCUE
a rescue statement with an explicit begin
Definition parser.h:288
@ PM_CONTEXT_UNTIL
an until statement
Definition parser.h:435
@ PM_CONTEXT_DEF_ELSE
a rescue else statement within a method definition
Definition parser.h:333
@ PM_CONTEXT_FOR
a for loop
Definition parser.h:357
@ PM_CONTEXT_PREDICATE
a predicate inside an if/elsif/unless statement
Definition parser.h:408
@ PM_CONTEXT_BEGIN_ENSURE
an ensure statement with an explicit begin
Definition parser.h:282
@ PM_CONTEXT_SCLASS_ENSURE
an ensure statement with a singleton class
Definition parser.h:420
@ PM_CONTEXT_DEFAULT_PARAMS
a method definition's default parameter
Definition parser.h:345
@ PM_CONTEXT_LAMBDA_ELSE
a rescue else statement within a lambda expression
Definition parser.h:375
@ PM_CONTEXT_CLASS
a class declaration
Definition parser.h:315
@ PM_CONTEXT_MAIN
the top level context
Definition parser.h:384
@ PM_CONTEXT_LAMBDA_DO_END
a lambda expression with do..end
Definition parser.h:369
@ PM_CONTEXT_BEGIN
a begin statement
Definition parser.h:279
@ PM_CONTEXT_RESCUE_MODIFIER
a modifier rescue clause
Definition parser.h:414
@ PM_CONTEXT_EMBEXPR
an interpolated expression
Definition parser.h:354
@ PM_CONTEXT_TERNARY
a ternary expression
Definition parser.h:429
@ PM_CONTEXT_DEF
a method definition
Definition parser.h:327
@ PM_CONTEXT_SCLASS_ELSE
a rescue else statement with a singleton class
Definition parser.h:423
@ PM_CONTEXT_MODULE_RESCUE
a rescue statement within a module statement
Definition parser.h:396
@ PM_CONTEXT_BLOCK_KEYWORDS
expressions in block arguments using do..end
Definition parser.h:294
@ PM_CONTEXT_WHILE
a while statement
Definition parser.h:438
uint8_t pm_scope_parameters_t
The flags about scope parameters that can be set.
Definition parser.h:569
uint8_t pm_shareable_constant_value_t
The type of shareable constant value that can be set.
Definition parser.h:525
pm_comment_type_t
This is the type of a comment that we've found while parsing.
Definition parser.h:451
void pm_buffer_free(pm_buffer_t *buffer)
Free the memory associated with the buffer.
Definition pm_buffer.c:355
bool pm_buffer_init(pm_buffer_t *buffer)
Initialize a pm_buffer_t with its default values.
Definition pm_buffer.c:27
size_t pm_buffer_length(const pm_buffer_t *buffer)
Return the length of the buffer.
Definition pm_buffer.c:43
char * pm_buffer_value(const pm_buffer_t *buffer)
Return the value of the buffer.
Definition pm_buffer.c:35
#define PM_CONSTANT_ID_UNSET
When we allocate constants into the pool, we reserve 0 to mean that the slot is not yet filled.
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string)
Returns the length associated with the string.
Definition pm_string.c:351
PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string)
Returns the start pointer associated with the string.
Definition pm_string.c:359
PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string)
Free the associated memory of the given string.
Definition pm_string.c:367
#define PM_STRING_EMPTY
Defines an empty string.
Definition pm_string.h:70
#define PRISM_FALLTHROUGH
We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
Definition defines.h:253
#define PRISM_UNLIKELY(x)
The compiler should predicate that this branch will not be taken.
Definition defines.h:237
#define PRISM_ATTRIBUTE_UNUSED
GCC will warn if you specify a function or parameter that is unused at runtime.
Definition defines.h:81
#define PRISM_DEPTH_MAXIMUM
When we are parsing using recursive descent, we want to protect against malicious payloads that could...
Definition defines.h:37
#define PM_STATIC_ASSERT(line, condition, message)
We want to be able to use static assertions, but they weren't standardized until C11.
Definition defines.h:116
#define PRISM_EXPORTED_FUNCTION
By default, we compile with -fvisibility=hidden.
Definition defines.h:53
#define PM_ENCODING_US_ASCII_ENTRY
This is the US-ASCII encoding.
Definition encoding.h:252
#define PM_ENCODING_UTF_8_ENTRY
This is the default UTF-8 encoding.
Definition encoding.h:245
#define PRISM_ENCODING_ALPHABETIC_BIT
All of the lookup tables use the first bit of each embedded byte to indicate whether the codepoint is...
Definition encoding.h:68
#define PRISM_ENCODING_ALPHANUMERIC_BIT
All of the lookup tables use the second bit of each embedded byte to indicate whether the codepoint i...
Definition encoding.h:74
#define PM_NODE_LIST_FOREACH(list, index, node)
Loop through each node in the node list, writing each node to the given pm_node_t pointer.
Definition node.h:17
#define PRISM_VERSION
The version of the Prism library as a constant string.
Definition version.h:27
#define PRISM_VERSION_PATCH
The patch version of the Prism library as an int.
Definition version.h:22
#define PRISM_VERSION_MINOR
The minor version of the Prism library as an int.
Definition version.h:17
#define PRISM_VERSION_MAJOR
The major version of the Prism library as an int.
Definition version.h:12
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser)
Parse the Ruby source associated with the given parser and return the tree.
Definition prism.c:22271
PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback)
Register a callback that will be called whenever prism changes the encoding it is using to parse base...
Definition prism.c:22207
PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser)
Free any memory associated with the given parser.
Definition prism.c:22245
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options)
Parse a stream of Ruby source and return the tree.
Definition prism.c:22358
PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options)
Initialize a parser with the given start and end pointers.
Definition prism.c:21946
The main header file for the prism parser.
pm_string_query_t
Represents the results of a slice query.
Definition prism.h:265
@ PM_STRING_QUERY_TRUE
Returned if the result of the slice query is true.
Definition prism.h:273
@ PM_STRING_QUERY_ERROR
Returned if the encoding given to a slice query was invalid.
Definition prism.h:267
@ PM_STRING_QUERY_FALSE
Returned if the result of the slice query is false.
Definition prism.h:270
void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
Serialize the encoding, metadata, nodes, and constant pool.
Definition serialize.c:2147
char *() pm_parse_stream_fgets_t(char *string, int size, void *stream)
This function is used in pm_parse_stream() to retrieve a line of input from a stream.
Definition prism.h:102
void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer)
Serialize the name of the encoding to the buffer.
Definition serialize.c:2124
int() pm_parse_stream_feof_t(void *stream)
This function is used in pm_parse_stream to check whether a stream is EOF.
Definition prism.h:109
void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer)
Serialize the given list of comments to the given buffer.
Definition serialize.c:2054
const char * pm_token_type_human(pm_token_type_t token_type)
Returns the human name of the given token type.
Definition token_type.c:364
This struct is used to pass information between the regular expression parser and the error callback.
Definition prism.c:17348
pm_parser_t * parser
The parser that we are parsing the regular expression for.
Definition prism.c:17350
const uint8_t * start
The start of the regular expression.
Definition prism.c:17353
bool shared
Whether or not the source of the regular expression is shared.
Definition prism.c:17364
const uint8_t * end
The end of the regular expression.
Definition prism.c:17356
This struct is used to pass information between the regular expression parser and the named capture c...
Definition prism.c:20339
pm_constant_id_list_t names
The list of names that have been parsed.
Definition prism.c:20350
pm_parser_t * parser
The parser that is parsing the regular expression.
Definition prism.c:20341
pm_match_write_node_t * match
The match write node that is being created.
Definition prism.c:20347
pm_call_node_t * call
The call node wrapping the regular expression node.
Definition prism.c:20344
bool shared
Whether the content of the regular expression is shared.
Definition prism.c:20357
AndNode.
Definition ast.h:1272
struct pm_node * left
AndNode::left.
Definition ast.h:1288
struct pm_node * right
AndNode::right.
Definition ast.h:1301
ArgumentsNode.
Definition ast.h:1333
pm_node_t base
The embedded base node.
Definition ast.h:1335
struct pm_node_list arguments
ArgumentsNode::arguments.
Definition ast.h:1346
This is a special out parameter to the parse_arguments_list function that includes opening and closin...
Definition prism.c:1586
pm_node_t * block
The optional block attached to the call.
Definition prism.c:1597
bool has_forwarding
The flag indicating whether this arguments list has forwarding argument.
Definition prism.c:1600
pm_location_t opening_loc
The optional location of the opening parenthesis or bracket.
Definition prism.c:1588
pm_arguments_node_t * arguments
The lazily-allocated optional arguments node.
Definition prism.c:1591
pm_location_t closing_loc
The optional location of the closing parenthesis or bracket.
Definition prism.c:1594
ArrayNode.
Definition ast.h:1364
struct pm_node_list elements
ArrayNode::elements.
Definition ast.h:1374
ArrayPatternNode.
Definition ast.h:1425
struct pm_node * constant
ArrayPatternNode::constant.
Definition ast.h:1444
pm_location_t opening_loc
ArrayPatternNode::opening_loc.
Definition ast.h:1484
pm_node_t base
The embedded base node.
Definition ast.h:1427
pm_location_t closing_loc
ArrayPatternNode::closing_loc.
Definition ast.h:1494
AssocNode.
Definition ast.h:1509
struct pm_node * value
AssocNode::value.
Definition ast.h:1541
struct pm_node * key
AssocNode::key.
Definition ast.h:1528
BeginNode.
Definition ast.h:1635
struct pm_ensure_node * ensure_clause
BeginNode::ensure_clause.
Definition ast.h:1688
struct pm_rescue_node * rescue_clause
BeginNode::rescue_clause.
Definition ast.h:1668
struct pm_statements_node * statements
BeginNode::statements.
Definition ast.h:1658
pm_node_t base
The embedded base node.
Definition ast.h:1637
struct pm_else_node * else_clause
BeginNode::else_clause.
Definition ast.h:1678
This struct represents a set of binding powers used for a given token.
Definition prism.c:12163
bool binary
Whether or not this token can be used as a binary operator.
Definition prism.c:12171
pm_binding_power_t left
The left binding power.
Definition prism.c:12165
bool nonassoc
Whether or not this token can be used as non-associative binary operator.
Definition prism.c:12177
pm_binding_power_t right
The right binding power.
Definition prism.c:12168
BlockLocalVariableNode.
Definition ast.h:1754
BlockNode.
Definition ast.h:1782
BlockParameterNode.
Definition ast.h:1858
BlockParametersNode.
Definition ast.h:1912
A pm_buffer_t is a simple memory buffer that stores data in a contiguous block of memory.
Definition pm_buffer.h:22
size_t length
The length of the buffer in bytes.
Definition pm_buffer.h:24
char * value
A pointer to the start of the buffer.
Definition pm_buffer.h:30
CallNode.
Definition ast.h:2139
pm_location_t opening_loc
CallNode::opening_loc.
Definition ast.h:2200
pm_location_t closing_loc
CallNode::closing_loc.
Definition ast.h:2220
struct pm_node * receiver
CallNode::receiver.
Definition ast.h:2158
pm_constant_id_t name
CallNode::name.
Definition ast.h:2181
pm_node_t base
The embedded base node.
Definition ast.h:2141
pm_location_t equal_loc
CallNode::equal_loc.
Definition ast.h:2233
pm_location_t call_operator_loc
CallNode::call_operator_loc.
Definition ast.h:2171
pm_location_t message_loc
CallNode::message_loc.
Definition ast.h:2191
struct pm_arguments_node * arguments
CallNode::arguments.
Definition ast.h:2210
struct pm_node * block
CallNode::block.
Definition ast.h:2243
CaseMatchNode.
Definition ast.h:2578
struct pm_node_list conditions
CaseMatchNode::conditions.
Definition ast.h:2601
CaseNode.
Definition ast.h:2648
struct pm_node_list conditions
CaseNode::conditions.
Definition ast.h:2671
ClassVariableReadNode.
Definition ast.h:2943
ClassVariableTargetNode.
Definition ast.h:2972
ClassVariableWriteNode.
Definition ast.h:2995
This is a node in the linked list of comments that we've found while parsing.
Definition parser.h:461
pm_comment_type_t type
The type of comment that we've found.
Definition parser.h:469
pm_location_t location
The location of the comment in the source.
Definition parser.h:466
A list of constant IDs.
size_t size
The number of constant ids in the list.
ConstantPathNode.
Definition ast.h:3209
ConstantPathTargetNode.
Definition ast.h:3347
ConstantReadNode.
Definition ast.h:3442
A constant in the pool which effectively stores a string.
size_t length
The length of the string.
const uint8_t * start
A pointer to the start of the string.
ConstantTargetNode.
Definition ast.h:3471
ConstantWriteNode.
Definition ast.h:3494
This is a node in a linked list of contexts.
Definition parser.h:442
pm_context_t context
The context that this node represents.
Definition parser.h:444
struct pm_context_node * prev
A pointer to the previous context in the linked list.
Definition parser.h:447
This struct represents a diagnostic generated during parsing.
Definition diagnostic.h:366
ElseNode.
Definition ast.h:3673
struct pm_statements_node * statements
ElseNode::statements.
Definition ast.h:3686
This struct defines the functions necessary to implement the encoding interface so we can determine h...
Definition encoding.h:23
size_t(* alpha_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphab...
Definition encoding.h:36
size_t(* char_width)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding.
Definition encoding.h:29
bool(* isupper_char)(const uint8_t *b, ptrdiff_t n)
Return true if the next character is valid in the encoding and is an uppercase character.
Definition encoding.h:50
const char * name
The name of the encoding.
Definition encoding.h:56
size_t(* alnum_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphan...
Definition encoding.h:43
EnsureNode.
Definition ast.h:3771
struct pm_statements_node * statements
EnsureNode::statements.
Definition ast.h:3784
FindPatternNode.
Definition ast.h:3831
struct pm_node * constant
FindPatternNode::constant.
Definition ast.h:3844
pm_location_t opening_loc
FindPatternNode::opening_loc.
Definition ast.h:3896
pm_node_t base
The embedded base node.
Definition ast.h:3833
pm_location_t closing_loc
FindPatternNode::closing_loc.
Definition ast.h:3909
FlipFlopNode.
Definition ast.h:3927
FloatNode.
Definition ast.h:3960
double value
FloatNode::value.
Definition ast.h:3970
pm_node_t base
The embedded base node.
Definition ast.h:3962
ForwardingParameterNode.
Definition ast.h:4096
GlobalVariableReadNode.
Definition ast.h:4263
GlobalVariableTargetNode.
Definition ast.h:4292
GlobalVariableWriteNode.
Definition ast.h:4315
HashNode.
Definition ast.h:4377
struct pm_node_list elements
HashNode::elements.
Definition ast.h:4403
HashPatternNode.
Definition ast.h:4437
pm_location_t opening_loc
HashPatternNode::opening_loc.
Definition ast.h:4492
pm_node_t base
The embedded base node.
Definition ast.h:4439
pm_location_t closing_loc
HashPatternNode::closing_loc.
Definition ast.h:4505
struct pm_node * constant
HashPatternNode::constant.
Definition ast.h:4453
All of the information necessary to store to lexing a heredoc.
Definition parser.h:88
size_t ident_length
The length of the heredoc identifier.
Definition parser.h:93
pm_heredoc_quote_t quote
The type of quote that the heredoc uses.
Definition parser.h:96
pm_heredoc_indent_t indent
The type of indentation that the heredoc uses.
Definition parser.h:99
const uint8_t * ident_start
A pointer to the start of the heredoc identifier.
Definition parser.h:90
IfNode.
Definition ast.h:4526
struct pm_statements_node * statements
IfNode::statements.
Definition ast.h:4586
struct pm_node * subsequent
IfNode::subsequent.
Definition ast.h:4605
ImaginaryNode.
Definition ast.h:4632
InstanceVariableReadNode.
Definition ast.h:5122
InstanceVariableTargetNode.
Definition ast.h:5151
InstanceVariableWriteNode.
Definition ast.h:5174
IntegerNode.
Definition ast.h:5242
pm_integer_t value
IntegerNode::value.
Definition ast.h:5252
pm_node_t base
The embedded base node.
Definition ast.h:5244
bool negative
Whether or not the integer is negative.
Definition pm_integer.h:42
InterpolatedMatchLastLineNode.
Definition ast.h:5280
InterpolatedRegularExpressionNode.
Definition ast.h:5326
InterpolatedStringNode.
Definition ast.h:5363
pm_node_t base
The embedded base node.
Definition ast.h:5365
pm_location_t opening_loc
InterpolatedStringNode::opening_loc.
Definition ast.h:5371
InterpolatedSymbolNode.
Definition ast.h:5396
pm_node_t base
The embedded base node.
Definition ast.h:5398
InterpolatedXStringNode.
Definition ast.h:5429
pm_location_t opening_loc
InterpolatedXStringNode::opening_loc.
Definition ast.h:5437
pm_node_t base
The embedded base node.
Definition ast.h:5431
struct pm_node_list parts
InterpolatedXStringNode::parts.
Definition ast.h:5442
KeywordHashNode.
Definition ast.h:5501
void(* callback)(void *data, pm_parser_t *parser, pm_token_t *token)
This is the callback that is called when a token is lexed.
Definition parser.h:521
void * data
This opaque pointer is used to provide whatever information the user deemed necessary to the callback...
Definition parser.h:515
When lexing Ruby source, the lexer has a small amount of state to tell which kind of token it is curr...
Definition parser.h:109
uint8_t terminator
This is the terminator of the list literal.
Definition parser.h:165
size_t nesting
This keeps track of the nesting level of the list.
Definition parser.h:153
bool interpolation
Whether or not interpolation is allowed in this list.
Definition parser.h:156
uint8_t incrementor
When lexing a list, it takes into account balancing the terminator if the terminator is one of (),...
Definition parser.h:162
uint8_t breakpoints[11]
This is the character set that should be used to delimit the tokens within the list.
Definition parser.h:171
pm_heredoc_lex_mode_t base
All of the data necessary to lex a heredoc.
Definition parser.h:233
bool line_continuation
True if the previous token ended with a line continuation.
Definition parser.h:249
union pm_lex_mode::@98 as
The data associated with this type of lex mode.
struct pm_lex_mode * prev
The previous lex state so that it knows how to pop.
Definition parser.h:254
bool label_allowed
Whether or not at the end of the string we should allow a :, which would indicate this was a dynamic ...
Definition parser.h:208
enum pm_lex_mode::@97 mode
The type of this lex mode.
const uint8_t * next_start
This is the pointer to the character where lexing should resume once the heredoc has been completely ...
Definition parser.h:239
size_t * common_whitespace
This is used to track the amount of common whitespace on each line so that we know how much to dedent...
Definition parser.h:246
int32_t line
The line number.
This struct represents an abstract linked list that provides common functionality.
Definition pm_list.h:46
struct pm_list_node * next
A pointer to the next node in the list.
Definition pm_list.h:48
This represents the overall linked list.
Definition pm_list.h:55
pm_list_node_t * head
A pointer to the head of the list.
Definition pm_list.h:60
size_t size
The size of the list.
Definition pm_list.h:57
This tracks an individual local variable in a certain lexical context, as well as the number of times...
Definition parser.h:535
pm_constant_id_t name
The name of the local variable.
Definition parser.h:537
pm_location_t location
The location of the local variable in the source.
Definition parser.h:540
uint32_t hash
The hash of the local variable.
Definition parser.h:549
uint32_t index
The index of the local variable in the local table.
Definition parser.h:543
uint32_t reads
The number of times the local variable is read.
Definition parser.h:546
LocalVariableReadNode.
Definition ast.h:5743
uint32_t depth
LocalVariableReadNode::depth.
Definition ast.h:5774
pm_constant_id_t name
LocalVariableReadNode::name.
Definition ast.h:5761
LocalVariableTargetNode.
Definition ast.h:5792
LocalVariableWriteNode.
Definition ast.h:5820
uint32_t depth
LocalVariableWriteNode::depth.
Definition ast.h:5847
pm_constant_id_t name
LocalVariableWriteNode::name.
Definition ast.h:5834
This is a set of local variables in a certain lexical context (method, class, module,...
Definition parser.h:557
pm_local_t * locals
The nullable allocated memory for the local variables in the set.
Definition parser.h:565
uint32_t capacity
The capacity of the local variables set.
Definition parser.h:562
uint32_t size
The number of local variables in the set.
Definition parser.h:559
This represents a range of bytes in the source string to which a node or token corresponds.
Definition ast.h:544
const uint8_t * start
A pointer to the start location of the range in the source.
Definition ast.h:546
const uint8_t * end
A pointer to the end location of the range in the source.
Definition ast.h:549
This is a node in the linked list of magic comments that we've found while parsing.
Definition parser.h:478
MatchLastLineNode.
Definition ast.h:5912
MatchWriteNode.
Definition ast.h:6070
struct pm_node_list targets
MatchWriteNode::targets.
Definition ast.h:6083
MissingNode.
Definition ast.h:6095
MultiTargetNode.
Definition ast.h:6166
pm_node_t base
The embedded base node.
Definition ast.h:6168
pm_location_t lparen_loc
MultiTargetNode::lparen_loc.
Definition ast.h:6224
struct pm_node_list lefts
MultiTargetNode::lefts.
Definition ast.h:6184
pm_location_t rparen_loc
MultiTargetNode::rparen_loc.
Definition ast.h:6234
MultiWriteNode.
Definition ast.h:6249
size_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
A list of nodes in the source, most often used for lists of children.
Definition ast.h:557
size_t size
The number of nodes in the list.
Definition ast.h:559
struct pm_node ** nodes
The nodes in the list.
Definition ast.h:565
This is the base structure that represents a node in the syntax tree.
Definition ast.h:1052
pm_node_type_t type
This represents the type of the node.
Definition ast.h:1057
pm_location_t location
This is the location of the node in the source.
Definition ast.h:1075
OptionalParameterNode.
Definition ast.h:6522
A scope of locals surrounding the code that is being parsed.
Definition options.h:36
size_t locals_count
The number of locals in the scope.
Definition options.h:38
uint8_t forwarding
Flags for the set of forwarding parameters in this scope.
Definition options.h:44
The options that can be passed to the parser.
Definition options.h:110
uint8_t command_line
A bitset of the various options that were set on the command line.
Definition options.h:159
void * shebang_callback_data
Any additional data that should be passed along to the shebang callback if one was set.
Definition options.h:121
bool encoding_locked
Whether or not the encoding magic comments should be respected.
Definition options.h:175
bool main_script
When the file being parsed is the main script, the shebang will be considered for command-line flags ...
Definition options.h:182
pm_string_t encoding
The name of the encoding that the source file is in.
Definition options.h:136
int32_t line
The line within the file that the parse starts on.
Definition options.h:130
pm_options_shebang_callback_t shebang_callback
The callback to call when additional switches are found in a shebang comment.
Definition options.h:115
int8_t frozen_string_literal
Whether or not the frozen string literal option has been set.
Definition options.h:168
bool partial_script
When the file being parsed is considered a "partial" script, jumps will not be marked as errors if th...
Definition options.h:192
size_t scopes_count
The number of scopes surrounding the code that is being parsed.
Definition options.h:141
pm_string_t filepath
The name of the file that is currently being parsed.
Definition options.h:124
pm_options_version_t version
The version of prism that we should be parsing with.
Definition options.h:156
OrNode.
Definition ast.h:6560
struct pm_node * left
OrNode::left.
Definition ast.h:6576
struct pm_node * right
OrNode::right.
Definition ast.h:6589
ParametersNode.
Definition ast.h:6615
struct pm_node * rest
ParametersNode::rest.
Definition ast.h:6633
struct pm_block_parameter_node * block
ParametersNode::block.
Definition ast.h:6653
pm_node_t base
The embedded base node.
Definition ast.h:6617
struct pm_node * keyword_rest
ParametersNode::keyword_rest.
Definition ast.h:6648
ParenthesesNode.
Definition ast.h:6671
struct pm_node * body
ParenthesesNode::body.
Definition ast.h:6679
This struct represents the overall parser.
Definition parser.h:643
const pm_encoding_t * explicit_encoding
When a string-like expression is being lexed, any byte or escape sequence that resolves to a value wh...
Definition parser.h:843
pm_lex_state_t lex_state
The current state of the lexer.
Definition parser.h:652
uint8_t command_line
The command line flags given from the options.
Definition parser.h:862
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
Definition parser.h:758
bool partial_script
Whether or not we are parsing a "partial" script, which is a script that will be evaluated in the con...
Definition parser.h:885
bool pattern_matching_newlines
This flag indicates that we are currently parsing a pattern matching expression and impacts that calc...
Definition parser.h:912
const uint8_t * end
The pointer to the end of the source.
Definition parser.h:697
bool recovering
Whether or not we're currently recovering from a syntax error.
Definition parser.h:891
pm_node_flags_t integer_base
We want to add a flag to integer nodes that indicates their base.
Definition parser.h:800
bool warn_mismatched_indentation
By default, Ruby always warns about mismatched indentation.
Definition parser.h:933
pm_constant_pool_t constant_pool
This constant pool keeps all of the constants defined throughout the file so that we can reference th...
Definition parser.h:789
bool in_keyword_arg
This flag indicates that we are currently parsing a keyword argument.
Definition parser.h:915
struct pm_parser::@103 lex_modes
A stack of lex modes.
const uint8_t * next_start
This is a special field set on the parser when we need the parser to jump to a specific location when...
Definition parser.h:710
pm_static_literals_t * current_hash_keys
The hash keys for the hash that is currently being parsed.
Definition parser.h:752
pm_list_t magic_comment_list
The list of magic comments that have been found while parsing.
Definition parser.h:724
int lambda_enclosure_nesting
Used to temporarily track the nesting of enclosures to determine if a { is the beginning of a lambda ...
Definition parser.h:661
pm_lex_callback_t * lex_callback
This is an optional callback that can be attached to the parser that will be called whenever a new to...
Definition parser.h:777
pm_options_version_t version
The version of prism that we should use to parse.
Definition parser.h:859
pm_token_t previous
The previous token we were considering.
Definition parser.h:700
pm_string_t current_string
This string is used to pass information from the lexer to the parser.
Definition parser.h:806
bool parsing_eval
Whether or not we are parsing an eval string.
Definition parser.h:878
bool current_regular_expression_ascii_only
True if the current regular expression being lexed contains only ASCII characters.
Definition parser.h:927
bool encoding_changed
Whether or not the encoding has been changed by a magic comment.
Definition parser.h:906
pm_location_t data_loc
An optional location that represents the location of the END marker and the rest of the content of th...
Definition parser.h:731
pm_context_node_t * current_context
The current parsing context.
Definition parser.h:743
const uint8_t * start
The pointer to the start of the source.
Definition parser.h:694
int enclosure_nesting
Tracks the current nesting of (), [], and {}.
Definition parser.h:655
pm_list_t error_list
The list of errors that have been found while parsing.
Definition parser.h:737
int8_t frozen_string_literal
Whether or not we have found a frozen_string_literal magic comment with a true or false value.
Definition parser.h:872
pm_node_list_t * current_block_exits
When parsing block exits (e.g., break, next, redo), we need to validate that they are in correct cont...
Definition parser.h:856
const uint8_t * encoding_comment_start
This pointer indicates where a comment must start if it is to be considered an encoding comment.
Definition parser.h:771
pm_lex_mode_t stack[PM_LEX_STACK_SIZE]
The stack of lexer modes.
Definition parser.h:687
pm_list_t warning_list
The list of warnings that have been found while parsing.
Definition parser.h:734
const uint8_t * heredoc_end
This field indicates the end of a heredoc whose identifier was found on the current line.
Definition parser.h:718
int brace_nesting
Used to track the nesting of braces to ensure we get the correct value when we are interpolating bloc...
Definition parser.h:667
pm_encoding_changed_callback_t encoding_changed_callback
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:765
int32_t start_line
The line number at the start of the parse.
Definition parser.h:812
bool encoding_locked
This is very specialized behavior for when you want to parse in a context that does not respect encod...
Definition parser.h:899
pm_lex_mode_t * current
The current mode of the lexer.
Definition parser.h:684
pm_list_t comment_list
The list of comments that have been found while parsing.
Definition parser.h:721
size_t index
The current index into the lexer mode stack.
Definition parser.h:690
pm_string_t filepath
This is the path of the file being parsed.
Definition parser.h:783
pm_scope_t * current_scope
The current local scope.
Definition parser.h:740
bool command_start
Whether or not we're at the beginning of a command.
Definition parser.h:888
pm_newline_list_t newline_list
This is the list of newline offsets in the source file.
Definition parser.h:792
bool semantic_token_seen
Whether or not the parser has seen a token that has semantic meaning (i.e., a token that is not a com...
Definition parser.h:921
uint32_t node_id
The next node identifier that will be assigned.
Definition parser.h:649
RangeNode.
Definition ast.h:6907
struct pm_node * right
RangeNode::right.
Definition ast.h:6937
struct pm_node * left
RangeNode::left.
Definition ast.h:6923
RationalNode.
Definition ast.h:6965
pm_node_t base
The embedded base node.
Definition ast.h:6967
pm_integer_t numerator
RationalNode::numerator.
Definition ast.h:6977
In order to properly set a regular expression's encoding and to validate the byte sequence for the un...
Definition prism.c:9487
pm_buffer_t regexp_buffer
The buffer holding the regexp source.
Definition prism.c:9492
pm_token_buffer_t base
The embedded base buffer.
Definition prism.c:9489
RegularExpressionNode.
Definition ast.h:7032
pm_node_t base
The embedded base node.
Definition ast.h:7034
pm_string_t unescaped
RegularExpressionNode::unescaped.
Definition ast.h:7055
RequiredParameterNode.
Definition ast.h:7106
RescueModifierNode.
Definition ast.h:7129
struct pm_node * rescue_expression
RescueModifierNode::rescue_expression.
Definition ast.h:7147
RescueNode.
Definition ast.h:7167
struct pm_rescue_node * subsequent
RescueNode::subsequent.
Definition ast.h:7205
pm_location_t then_keyword_loc
RescueNode::then_keyword_loc.
Definition ast.h:7195
pm_node_t base
The embedded base node.
Definition ast.h:7169
This struct represents a node in a linked list of scopes.
Definition parser.h:583
struct pm_scope * previous
A pointer to the previous scope in the linked list.
Definition parser.h:585
pm_node_list_t implicit_parameters
This is a list of the implicit parameters contained within the block.
Definition parser.h:596
pm_shareable_constant_value_t shareable_constant
The current state of constant shareability for this scope.
Definition parser.h:623
pm_locals_t locals
The IDs of the locals in the given scope.
Definition parser.h:588
pm_scope_parameters_t parameters
This is a bitfield that indicates the parameters that are being used in this scope.
Definition parser.h:617
bool closed
A boolean indicating whether or not this scope can see into its parent.
Definition parser.h:629
SplatNode.
Definition ast.h:7467
struct pm_node * expression
SplatNode::expression.
Definition ast.h:7480
StatementsNode.
Definition ast.h:7495
struct pm_node_list body
StatementsNode::body.
Definition ast.h:7503
Certain sets of nodes (hash keys and when clauses) check for duplicate nodes to alert the user of pot...
StringNode.
Definition ast.h:7530
pm_node_t base
The embedded base node.
Definition ast.h:7532
pm_string_t unescaped
StringNode::unescaped.
Definition ast.h:7553
pm_location_t closing_loc
StringNode::closing_loc.
Definition ast.h:7548
pm_location_t opening_loc
StringNode::opening_loc.
Definition ast.h:7538
A generic string type that can have various ownership semantics.
Definition pm_string.h:33
const uint8_t * source
A pointer to the start of the string.
Definition pm_string.h:35
size_t length
The length of the string in bytes of memory.
Definition pm_string.h:38
enum pm_string_t::@104 type
The type of the string.
SymbolNode.
Definition ast.h:7626
pm_location_t value_loc
SymbolNode::value_loc.
Definition ast.h:7639
pm_string_t unescaped
SymbolNode::unescaped.
Definition ast.h:7649
When we're lexing certain types (strings, symbols, lists, etc.) we have string content associated wit...
Definition prism.c:9461
pm_buffer_t buffer
The buffer that we're using to keep track of the string content.
Definition prism.c:9466
const uint8_t * cursor
The cursor into the source string that points to how far we have currently copied into the buffer.
Definition prism.c:9472
This struct represents a token in the Ruby source.
Definition ast.h:529
const uint8_t * end
A pointer to the end location of the token in the source.
Definition ast.h:537
const uint8_t * start
A pointer to the start location of the token in the source.
Definition ast.h:534
pm_token_type_t type
The type of the token.
Definition ast.h:531
UndefNode.
Definition ast.h:7682
UnlessNode.
Definition ast.h:7713
struct pm_statements_node * statements
UnlessNode::statements.
Definition ast.h:7763
struct pm_else_node * else_clause
UnlessNode::else_clause.
Definition ast.h:7773
WhenNode.
Definition ast.h:7849
XStringNode.
Definition ast.h:7940