Ruby 3.5.0dev (2025-07-18 revision 148db9c80f11af1780f0f3685201f28de8f6b47a)
prism.c
1#include "prism.h"
2
6const char *
7pm_version(void) {
8 return PRISM_VERSION;
9}
10
15#define PM_TAB_WHITESPACE_SIZE 8
16
17// Macros for min/max.
18#define MIN(a,b) (((a)<(b))?(a):(b))
19#define MAX(a,b) (((a)>(b))?(a):(b))
20
21/******************************************************************************/
22/* Lex mode manipulations */
23/******************************************************************************/
24
29static inline uint8_t
30lex_mode_incrementor(const uint8_t start) {
31 switch (start) {
32 case '(':
33 case '[':
34 case '{':
35 case '<':
36 return start;
37 default:
38 return '\0';
39 }
40}
41
46static inline uint8_t
47lex_mode_terminator(const uint8_t start) {
48 switch (start) {
49 case '(':
50 return ')';
51 case '[':
52 return ']';
53 case '{':
54 return '}';
55 case '<':
56 return '>';
57 default:
58 return start;
59 }
60}
61
67static bool
68lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
69 lex_mode.prev = parser->lex_modes.current;
70 parser->lex_modes.index++;
71
72 if (parser->lex_modes.index > PM_LEX_STACK_SIZE - 1) {
74 if (parser->lex_modes.current == NULL) return false;
75
76 *parser->lex_modes.current = lex_mode;
77 } else {
78 parser->lex_modes.stack[parser->lex_modes.index] = lex_mode;
79 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
80 }
81
82 return true;
83}
84
88static inline bool
89lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
90 uint8_t incrementor = lex_mode_incrementor(delimiter);
91 uint8_t terminator = lex_mode_terminator(delimiter);
92
93 pm_lex_mode_t lex_mode = {
94 .mode = PM_LEX_LIST,
95 .as.list = {
96 .nesting = 0,
97 .interpolation = interpolation,
98 .incrementor = incrementor,
99 .terminator = terminator
100 }
101 };
102
103 // These are the places where we need to split up the content of the list.
104 // We'll use strpbrk to find the first of these characters.
105 uint8_t *breakpoints = lex_mode.as.list.breakpoints;
106 memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
107 size_t index = 7;
108
109 // Now we'll add the terminator to the list of breakpoints. If the
110 // terminator is not already a NULL byte, add it to the list.
111 if (terminator != '\0') {
112 breakpoints[index++] = terminator;
113 }
114
115 // If interpolation is allowed, then we're going to check for the #
116 // character. Otherwise we'll only look for escapes and the terminator.
117 if (interpolation) {
118 breakpoints[index++] = '#';
119 }
120
121 // If there is an incrementor, then we'll check for that as well.
122 if (incrementor != '\0') {
123 breakpoints[index++] = incrementor;
124 }
125
126 parser->explicit_encoding = NULL;
127 return lex_mode_push(parser, lex_mode);
128}
129
135static inline bool
136lex_mode_push_list_eof(pm_parser_t *parser) {
137 return lex_mode_push_list(parser, false, '\0');
138}
139
143static inline bool
144lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
145 pm_lex_mode_t lex_mode = {
146 .mode = PM_LEX_REGEXP,
147 .as.regexp = {
148 .nesting = 0,
149 .incrementor = incrementor,
150 .terminator = terminator
151 }
152 };
153
154 // These are the places where we need to split up the content of the
155 // regular expression. We'll use strpbrk to find the first of these
156 // characters.
157 uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
158 memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
159 size_t index = 4;
160
161 // First we'll add the terminator.
162 if (terminator != '\0') {
163 breakpoints[index++] = terminator;
164 }
165
166 // Next, if there is an incrementor, then we'll check for that as well.
167 if (incrementor != '\0') {
168 breakpoints[index++] = incrementor;
169 }
170
171 parser->explicit_encoding = NULL;
172 return lex_mode_push(parser, lex_mode);
173}
174
178static inline bool
179lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
180 pm_lex_mode_t lex_mode = {
181 .mode = PM_LEX_STRING,
182 .as.string = {
183 .nesting = 0,
184 .interpolation = interpolation,
185 .label_allowed = label_allowed,
186 .incrementor = incrementor,
187 .terminator = terminator
188 }
189 };
190
191 // These are the places where we need to split up the content of the
192 // string. We'll use strpbrk to find the first of these characters.
193 uint8_t *breakpoints = lex_mode.as.string.breakpoints;
194 memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
195 size_t index = 3;
196
197 // Now add in the terminator. If the terminator is not already a NULL byte,
198 // then we'll add it.
199 if (terminator != '\0') {
200 breakpoints[index++] = terminator;
201 }
202
203 // If interpolation is allowed, then we're going to check for the #
204 // character. Otherwise we'll only look for escapes and the terminator.
205 if (interpolation) {
206 breakpoints[index++] = '#';
207 }
208
209 // If we have an incrementor, then we'll add that in as a breakpoint as
210 // well.
211 if (incrementor != '\0') {
212 breakpoints[index++] = incrementor;
213 }
214
215 parser->explicit_encoding = NULL;
216 return lex_mode_push(parser, lex_mode);
217}
218
224static inline bool
225lex_mode_push_string_eof(pm_parser_t *parser) {
226 return lex_mode_push_string(parser, false, false, '\0', '\0');
227}
228
234static void
235lex_mode_pop(pm_parser_t *parser) {
236 if (parser->lex_modes.index == 0) {
237 parser->lex_modes.current->mode = PM_LEX_DEFAULT;
238 } else if (parser->lex_modes.index < PM_LEX_STACK_SIZE) {
239 parser->lex_modes.index--;
240 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
241 } else {
242 parser->lex_modes.index--;
243 pm_lex_mode_t *prev = parser->lex_modes.current->prev;
244 xfree(parser->lex_modes.current);
245 parser->lex_modes.current = prev;
246 }
247}
248
252static inline bool
253lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
254 return parser->lex_state & state;
255}
256
257typedef enum {
258 PM_IGNORED_NEWLINE_NONE = 0,
259 PM_IGNORED_NEWLINE_ALL,
260 PM_IGNORED_NEWLINE_PATTERN
261} pm_ignored_newline_type_t;
262
263static inline pm_ignored_newline_type_t
264lex_state_ignored_p(pm_parser_t *parser) {
265 bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
266
267 if (ignored) {
268 return PM_IGNORED_NEWLINE_ALL;
269 } else if ((parser->lex_state & ~((unsigned int) PM_LEX_STATE_LABEL)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) {
270 return PM_IGNORED_NEWLINE_PATTERN;
271 } else {
272 return PM_IGNORED_NEWLINE_NONE;
273 }
274}
275
276static inline bool
277lex_state_beg_p(pm_parser_t *parser) {
278 return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
279}
280
281static inline bool
282lex_state_arg_p(pm_parser_t *parser) {
283 return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
284}
285
286static inline bool
287lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
288 if (parser->current.end >= parser->end) {
289 return false;
290 }
291 return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
292}
293
294static inline bool
295lex_state_end_p(pm_parser_t *parser) {
296 return lex_state_p(parser, PM_LEX_STATE_END_ANY);
297}
298
302static inline bool
303lex_state_operator_p(pm_parser_t *parser) {
304 return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
305}
306
311static inline void
312lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
313 parser->lex_state = state;
314}
315
316#ifndef PM_DEBUG_LOGGING
321#define PM_DEBUG_LOGGING 0
322#endif
323
324#if PM_DEBUG_LOGGING
325PRISM_ATTRIBUTE_UNUSED static void
326debug_state(pm_parser_t *parser) {
327 fprintf(stderr, "STATE: ");
328 bool first = true;
329
330 if (parser->lex_state == PM_LEX_STATE_NONE) {
331 fprintf(stderr, "NONE\n");
332 return;
333 }
334
335#define CHECK_STATE(state) \
336 if (parser->lex_state & state) { \
337 if (!first) fprintf(stderr, "|"); \
338 fprintf(stderr, "%s", #state); \
339 first = false; \
340 }
341
342 CHECK_STATE(PM_LEX_STATE_BEG)
343 CHECK_STATE(PM_LEX_STATE_END)
344 CHECK_STATE(PM_LEX_STATE_ENDARG)
345 CHECK_STATE(PM_LEX_STATE_ENDFN)
346 CHECK_STATE(PM_LEX_STATE_ARG)
347 CHECK_STATE(PM_LEX_STATE_CMDARG)
348 CHECK_STATE(PM_LEX_STATE_MID)
349 CHECK_STATE(PM_LEX_STATE_FNAME)
350 CHECK_STATE(PM_LEX_STATE_DOT)
351 CHECK_STATE(PM_LEX_STATE_CLASS)
352 CHECK_STATE(PM_LEX_STATE_LABEL)
353 CHECK_STATE(PM_LEX_STATE_LABELED)
354 CHECK_STATE(PM_LEX_STATE_FITEM)
355
356#undef CHECK_STATE
357
358 fprintf(stderr, "\n");
359}
360
361static void
362debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * caller_name, int line_number) {
363 fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
364 debug_state(parser);
365 lex_state_set(parser, state);
366 fprintf(stderr, "Now: ");
367 debug_state(parser);
368 fprintf(stderr, "\n");
369}
370
371#define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
372#endif
373
374/******************************************************************************/
375/* Command-line macro helpers */
376/******************************************************************************/
377
379#define PM_PARSER_COMMAND_LINE_OPTION(parser, option) ((parser)->command_line & (option))
380
382#define PM_PARSER_COMMAND_LINE_OPTION_A(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_A)
383
385#define PM_PARSER_COMMAND_LINE_OPTION_E(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_E)
386
388#define PM_PARSER_COMMAND_LINE_OPTION_L(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_L)
389
391#define PM_PARSER_COMMAND_LINE_OPTION_N(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_N)
392
394#define PM_PARSER_COMMAND_LINE_OPTION_P(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_P)
395
397#define PM_PARSER_COMMAND_LINE_OPTION_X(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_X)
398
399/******************************************************************************/
400/* Diagnostic-related functions */
401/******************************************************************************/
402
406static inline void
407pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
408 pm_diagnostic_list_append(&parser->error_list, start, end, diag_id);
409}
410
414#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
415 pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
416
421static inline void
422pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
423 pm_parser_err(parser, parser->current.start, parser->current.end, diag_id);
424}
425
430#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
431 PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
432
437static inline void
438pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
439 pm_parser_err(parser, node->location.start, node->location.end, diag_id);
440}
441
446#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
447 PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
448
453#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
454 PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
455
460static inline void
461pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
462 pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id);
463}
464
469static inline void
470pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
471 pm_parser_err(parser, token->start, token->end, diag_id);
472}
473
478#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
479 PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
480
485#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
486 PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
487
491static inline void
492pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
493 pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
494}
495
500static inline void
501pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
502 pm_parser_warn(parser, token->start, token->end, diag_id);
503}
504
509static inline void
510pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
511 pm_parser_warn(parser, node->location.start, node->location.end, diag_id);
512}
513
517#define PM_PARSER_WARN_FORMAT(parser, start, end, diag_id, ...) \
518 pm_diagnostic_list_append_format(&parser->warning_list, start, end, diag_id, __VA_ARGS__)
519
524#define PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, ...) \
525 PM_PARSER_WARN_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
526
531#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
532 PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
533
538#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
539 PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
540
546static void
547pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
548 PM_PARSER_ERR_FORMAT(
549 parser,
550 ident_start,
551 ident_start + ident_length,
552 PM_ERR_HEREDOC_TERM,
553 (int) ident_length,
554 (const char *) ident_start
555 );
556}
557
558/******************************************************************************/
559/* Scope-related functions */
560/******************************************************************************/
561
565static bool
566pm_parser_scope_push(pm_parser_t *parser, bool closed) {
567 pm_scope_t *scope = (pm_scope_t *) xmalloc(sizeof(pm_scope_t));
568 if (scope == NULL) return false;
569
570 *scope = (pm_scope_t) {
571 .previous = parser->current_scope,
572 .locals = { 0 },
573 .parameters = PM_SCOPE_PARAMETERS_NONE,
574 .implicit_parameters = { 0 },
575 .shareable_constant = parser->current_scope == NULL ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
576 .closed = closed
577 };
578
579 parser->current_scope = scope;
580 return true;
581}
582
587static bool
588pm_parser_scope_toplevel_p(pm_parser_t *parser) {
589 pm_scope_t *scope = parser->current_scope;
590
591 do {
592 if (scope->previous == NULL) return true;
593 if (scope->closed) return false;
594 } while ((scope = scope->previous) != NULL);
595
596 assert(false && "unreachable");
597 return true;
598}
599
603static pm_scope_t *
604pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
605 pm_scope_t *scope = parser->current_scope;
606
607 while (depth-- > 0) {
608 assert(scope != NULL);
609 scope = scope->previous;
610 }
611
612 return scope;
613}
614
615typedef enum {
616 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
617 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
618 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
619} pm_scope_forwarding_param_check_result_t;
620
621static pm_scope_forwarding_param_check_result_t
622pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
623 pm_scope_t *scope = parser->current_scope;
624 bool conflict = false;
625
626 while (scope != NULL) {
627 if (scope->parameters & mask) {
628 if (scope->closed) {
629 if (conflict) {
630 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
631 } else {
632 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
633 }
634 }
635
636 conflict = true;
637 }
638
639 if (scope->closed) break;
640 scope = scope->previous;
641 }
642
643 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
644}
645
646static void
647pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
648 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
649 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
650 // Pass.
651 break;
652 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
653 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
654 break;
655 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
656 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
657 break;
658 }
659}
660
661static void
662pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
663 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
664 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
665 // Pass.
666 break;
667 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
668 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
669 break;
670 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
671 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
672 break;
673 }
674}
675
676static void
677pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
678 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
679 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
680 // Pass.
681 break;
682 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
683 // This shouldn't happen, because ... is not allowed in the
684 // declaration of blocks. If we get here, we assume we already have
685 // an error for this.
686 break;
687 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
688 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
689 break;
690 }
691}
692
693static void
694pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
695 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
696 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
697 // Pass.
698 break;
699 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
700 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
701 break;
702 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
703 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
704 break;
705 }
706}
707
712pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
713 return parser->current_scope->shareable_constant;
714}
715
720static void
721pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constant_value_t shareable_constant) {
722 pm_scope_t *scope = parser->current_scope;
723
724 do {
725 scope->shareable_constant = shareable_constant;
726 } while (!scope->closed && (scope = scope->previous) != NULL);
727}
728
729/******************************************************************************/
730/* Local variable-related functions */
731/******************************************************************************/
732
736#define PM_LOCALS_HASH_THRESHOLD 9
737
738static void
739pm_locals_free(pm_locals_t *locals) {
740 if (locals->capacity > 0) {
741 xfree(locals->locals);
742 }
743}
744
749static uint32_t
750pm_locals_hash(pm_constant_id_t name) {
751 name = ((name >> 16) ^ name) * 0x45d9f3b;
752 name = ((name >> 16) ^ name) * 0x45d9f3b;
753 name = (name >> 16) ^ name;
754 return name;
755}
756
761static void
762pm_locals_resize(pm_locals_t *locals) {
763 uint32_t next_capacity = locals->capacity == 0 ? 4 : (locals->capacity * 2);
764 assert(next_capacity > locals->capacity);
765
766 pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
767 if (next_locals == NULL) abort();
768
769 if (next_capacity < PM_LOCALS_HASH_THRESHOLD) {
770 if (locals->size > 0) {
771 memcpy(next_locals, locals->locals, locals->size * sizeof(pm_local_t));
772 }
773 } else {
774 // If we just switched from a list to a hash, then we need to fill in
775 // the hash values of all of the locals.
776 bool hash_needed = (locals->capacity <= PM_LOCALS_HASH_THRESHOLD);
777 uint32_t mask = next_capacity - 1;
778
779 for (uint32_t index = 0; index < locals->capacity; index++) {
780 pm_local_t *local = &locals->locals[index];
781
782 if (local->name != PM_CONSTANT_ID_UNSET) {
783 if (hash_needed) local->hash = pm_locals_hash(local->name);
784
785 uint32_t hash = local->hash;
786 while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
787 next_locals[hash & mask] = *local;
788 }
789 }
790 }
791
792 pm_locals_free(locals);
793 locals->locals = next_locals;
794 locals->capacity = next_capacity;
795}
796
812static bool
813pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start, const uint8_t *end, uint32_t reads) {
814 if (locals->size >= (locals->capacity / 4 * 3)) {
815 pm_locals_resize(locals);
816 }
817
818 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
819 for (uint32_t index = 0; index < locals->capacity; index++) {
820 pm_local_t *local = &locals->locals[index];
821
822 if (local->name == PM_CONSTANT_ID_UNSET) {
823 *local = (pm_local_t) {
824 .name = name,
825 .location = { .start = start, .end = end },
826 .index = locals->size++,
827 .reads = reads,
828 .hash = 0
829 };
830 return true;
831 } else if (local->name == name) {
832 return false;
833 }
834 }
835 } else {
836 uint32_t mask = locals->capacity - 1;
837 uint32_t hash = pm_locals_hash(name);
838 uint32_t initial_hash = hash;
839
840 do {
841 pm_local_t *local = &locals->locals[hash & mask];
842
843 if (local->name == PM_CONSTANT_ID_UNSET) {
844 *local = (pm_local_t) {
845 .name = name,
846 .location = { .start = start, .end = end },
847 .index = locals->size++,
848 .reads = reads,
849 .hash = initial_hash
850 };
851 return true;
852 } else if (local->name == name) {
853 return false;
854 } else {
855 hash++;
856 }
857 } while ((hash & mask) != initial_hash);
858 }
859
860 assert(false && "unreachable");
861 return true;
862}
863
868static uint32_t
869pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
870 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
871 for (uint32_t index = 0; index < locals->size; index++) {
872 pm_local_t *local = &locals->locals[index];
873 if (local->name == name) return index;
874 }
875 } else {
876 uint32_t mask = locals->capacity - 1;
877 uint32_t hash = pm_locals_hash(name);
878 uint32_t initial_hash = hash & mask;
879
880 do {
881 pm_local_t *local = &locals->locals[hash & mask];
882
883 if (local->name == PM_CONSTANT_ID_UNSET) {
884 return UINT32_MAX;
885 } else if (local->name == name) {
886 return hash & mask;
887 } else {
888 hash++;
889 }
890 } while ((hash & mask) != initial_hash);
891 }
892
893 return UINT32_MAX;
894}
895
900static void
901pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
902 uint32_t index = pm_locals_find(locals, name);
903 assert(index != UINT32_MAX);
904
905 pm_local_t *local = &locals->locals[index];
906 assert(local->reads < UINT32_MAX);
907
908 local->reads++;
909}
910
915static void
916pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
917 uint32_t index = pm_locals_find(locals, name);
918 assert(index != UINT32_MAX);
919
920 pm_local_t *local = &locals->locals[index];
921 assert(local->reads > 0);
922
923 local->reads--;
924}
925
929static uint32_t
930pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
931 uint32_t index = pm_locals_find(locals, name);
932 assert(index != UINT32_MAX);
933
934 return locals->locals[index].reads;
935}
936
945static void
946pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
947 pm_constant_id_list_init_capacity(list, locals->size);
948
949 // If we're still below the threshold for switching to a hash, then we only
950 // need to loop over the locals until we hit the size because the locals are
951 // stored in a list.
952 uint32_t capacity = locals->capacity < PM_LOCALS_HASH_THRESHOLD ? locals->size : locals->capacity;
953
954 // We will only warn for unused variables if we're not at the top level, or
955 // if we're parsing a file outside of eval or -e.
956 bool warn_unused = !toplevel || (!parser->parsing_eval && !PM_PARSER_COMMAND_LINE_OPTION_E(parser));
957
958 for (uint32_t index = 0; index < capacity; index++) {
959 pm_local_t *local = &locals->locals[index];
960
961 if (local->name != PM_CONSTANT_ID_UNSET) {
962 pm_constant_id_list_insert(list, (size_t) local->index, local->name);
963
964 if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_newline_list_line(&parser->newline_list, local->location.start, parser->start_line) >= 0))) {
965 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
966
967 if (constant->length >= 1 && *constant->start != '_') {
968 PM_PARSER_WARN_FORMAT(
969 parser,
970 local->location.start,
971 local->location.end,
972 PM_WARN_UNUSED_LOCAL_VARIABLE,
973 (int) constant->length,
974 (const char *) constant->start
975 );
976 }
977 }
978 }
979 }
980}
981
982/******************************************************************************/
983/* Node-related functions */
984/******************************************************************************/
985
989static inline pm_constant_id_t
990pm_parser_constant_id_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
991 return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
992}
993
997static inline pm_constant_id_t
998pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
999 return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
1000}
1001
1005static inline pm_constant_id_t
1006pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
1007 return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length);
1008}
1009
1013static inline pm_constant_id_t
1014pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1015 return pm_parser_constant_id_location(parser, token->start, token->end);
1016}
1017
1022static inline pm_constant_id_t
1023pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1024 return token->type == PM_TOKEN_NOT_PROVIDED ? 0 : pm_parser_constant_id_token(parser, token);
1025}
1026
1032static pm_node_t *
1033pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
1034 pm_node_t *void_node = NULL;
1035
1036 while (node != NULL) {
1037 switch (PM_NODE_TYPE(node)) {
1038 case PM_RETURN_NODE:
1039 case PM_BREAK_NODE:
1040 case PM_NEXT_NODE:
1041 case PM_REDO_NODE:
1042 case PM_RETRY_NODE:
1043 case PM_MATCH_REQUIRED_NODE:
1044 return void_node != NULL ? void_node : node;
1045 case PM_MATCH_PREDICATE_NODE:
1046 return NULL;
1047 case PM_BEGIN_NODE: {
1048 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1049
1050 if (cast->ensure_clause != NULL) {
1051 if (cast->rescue_clause != NULL) {
1052 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->rescue_clause);
1053 if (vn != NULL) return vn;
1054 }
1055
1056 if (cast->statements != NULL) {
1057 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1058 if (vn != NULL) return vn;
1059 }
1060
1061 node = (pm_node_t *) cast->ensure_clause;
1062 } else if (cast->rescue_clause != NULL) {
1063 if (cast->statements == NULL) return NULL;
1064
1065 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1066 if (vn == NULL) return NULL;
1067 if (void_node == NULL) void_node = vn;
1068
1069 for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
1070 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) rescue_clause->statements);
1071 if (vn == NULL) {
1072 void_node = NULL;
1073 break;
1074 }
1075 if (void_node == NULL) {
1076 void_node = vn;
1077 }
1078 }
1079
1080 if (cast->else_clause != NULL) {
1081 node = (pm_node_t *) cast->else_clause;
1082 } else {
1083 return void_node;
1084 }
1085 } else {
1086 node = (pm_node_t *) cast->statements;
1087 }
1088
1089 break;
1090 }
1091 case PM_ENSURE_NODE: {
1092 pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
1093 node = (pm_node_t *) cast->statements;
1094 break;
1095 }
1096 case PM_PARENTHESES_NODE: {
1098 node = (pm_node_t *) cast->body;
1099 break;
1100 }
1101 case PM_STATEMENTS_NODE: {
1103 node = cast->body.nodes[cast->body.size - 1];
1104 break;
1105 }
1106 case PM_IF_NODE: {
1107 pm_if_node_t *cast = (pm_if_node_t *) node;
1108 if (cast->statements == NULL || cast->subsequent == NULL) {
1109 return NULL;
1110 }
1111 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1112 if (vn == NULL) {
1113 return NULL;
1114 }
1115 if (void_node == NULL) {
1116 void_node = vn;
1117 }
1118 node = cast->subsequent;
1119 break;
1120 }
1121 case PM_UNLESS_NODE: {
1122 pm_unless_node_t *cast = (pm_unless_node_t *) node;
1123 if (cast->statements == NULL || cast->else_clause == NULL) {
1124 return NULL;
1125 }
1126 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1127 if (vn == NULL) {
1128 return NULL;
1129 }
1130 if (void_node == NULL) {
1131 void_node = vn;
1132 }
1133 node = (pm_node_t *) cast->else_clause;
1134 break;
1135 }
1136 case PM_ELSE_NODE: {
1137 pm_else_node_t *cast = (pm_else_node_t *) node;
1138 node = (pm_node_t *) cast->statements;
1139 break;
1140 }
1141 case PM_AND_NODE: {
1142 pm_and_node_t *cast = (pm_and_node_t *) node;
1143 node = cast->left;
1144 break;
1145 }
1146 case PM_OR_NODE: {
1147 pm_or_node_t *cast = (pm_or_node_t *) node;
1148 node = cast->left;
1149 break;
1150 }
1151 case PM_LOCAL_VARIABLE_WRITE_NODE: {
1153
1154 pm_scope_t *scope = parser->current_scope;
1155 for (uint32_t depth = 0; depth < cast->depth; depth++) scope = scope->previous;
1156
1157 pm_locals_read(&scope->locals, cast->name);
1158 return NULL;
1159 }
1160 default:
1161 return NULL;
1162 }
1163 }
1164
1165 return NULL;
1166}
1167
1168static inline void
1169pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
1170 pm_node_t *void_node = pm_check_value_expression(parser, node);
1171 if (void_node != NULL) {
1172 pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
1173 }
1174}
1175
1179static void
1180pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
1181 const char *type = NULL;
1182 int length = 0;
1183
1184 switch (PM_NODE_TYPE(node)) {
1185 case PM_BACK_REFERENCE_READ_NODE:
1186 case PM_CLASS_VARIABLE_READ_NODE:
1187 case PM_GLOBAL_VARIABLE_READ_NODE:
1188 case PM_INSTANCE_VARIABLE_READ_NODE:
1189 case PM_LOCAL_VARIABLE_READ_NODE:
1190 case PM_NUMBERED_REFERENCE_READ_NODE:
1191 type = "a variable";
1192 length = 10;
1193 break;
1194 case PM_CALL_NODE: {
1195 const pm_call_node_t *cast = (const pm_call_node_t *) node;
1196 if (cast->call_operator_loc.start != NULL || cast->message_loc.start == NULL) break;
1197
1198 const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
1199 switch (message->length) {
1200 case 1:
1201 switch (message->start[0]) {
1202 case '+':
1203 case '-':
1204 case '*':
1205 case '/':
1206 case '%':
1207 case '|':
1208 case '^':
1209 case '&':
1210 case '>':
1211 case '<':
1212 type = (const char *) message->start;
1213 length = 1;
1214 break;
1215 }
1216 break;
1217 case 2:
1218 switch (message->start[1]) {
1219 case '=':
1220 if (message->start[0] == '<' || message->start[0] == '>' || message->start[0] == '!' || message->start[0] == '=') {
1221 type = (const char *) message->start;
1222 length = 2;
1223 }
1224 break;
1225 case '@':
1226 if (message->start[0] == '+' || message->start[0] == '-') {
1227 type = (const char *) message->start;
1228 length = 2;
1229 }
1230 break;
1231 case '*':
1232 if (message->start[0] == '*') {
1233 type = (const char *) message->start;
1234 length = 2;
1235 }
1236 break;
1237 }
1238 break;
1239 case 3:
1240 if (memcmp(message->start, "<=>", 3) == 0) {
1241 type = "<=>";
1242 length = 3;
1243 }
1244 break;
1245 }
1246
1247 break;
1248 }
1249 case PM_CONSTANT_PATH_NODE:
1250 type = "::";
1251 length = 2;
1252 break;
1253 case PM_CONSTANT_READ_NODE:
1254 type = "a constant";
1255 length = 10;
1256 break;
1257 case PM_DEFINED_NODE:
1258 type = "defined?";
1259 length = 8;
1260 break;
1261 case PM_FALSE_NODE:
1262 type = "false";
1263 length = 5;
1264 break;
1265 case PM_FLOAT_NODE:
1266 case PM_IMAGINARY_NODE:
1267 case PM_INTEGER_NODE:
1268 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1269 case PM_INTERPOLATED_STRING_NODE:
1270 case PM_RATIONAL_NODE:
1271 case PM_REGULAR_EXPRESSION_NODE:
1272 case PM_SOURCE_ENCODING_NODE:
1273 case PM_SOURCE_FILE_NODE:
1274 case PM_SOURCE_LINE_NODE:
1275 case PM_STRING_NODE:
1276 case PM_SYMBOL_NODE:
1277 type = "a literal";
1278 length = 9;
1279 break;
1280 case PM_NIL_NODE:
1281 type = "nil";
1282 length = 3;
1283 break;
1284 case PM_RANGE_NODE: {
1285 const pm_range_node_t *cast = (const pm_range_node_t *) node;
1286
1287 if (PM_NODE_FLAG_P(cast, PM_RANGE_FLAGS_EXCLUDE_END)) {
1288 type = "...";
1289 length = 3;
1290 } else {
1291 type = "..";
1292 length = 2;
1293 }
1294
1295 break;
1296 }
1297 case PM_SELF_NODE:
1298 type = "self";
1299 length = 4;
1300 break;
1301 case PM_TRUE_NODE:
1302 type = "true";
1303 length = 4;
1304 break;
1305 default:
1306 break;
1307 }
1308
1309 if (type != NULL) {
1310 PM_PARSER_WARN_NODE_FORMAT(parser, node, PM_WARN_VOID_STATEMENT, length, type);
1311 }
1312}
1313
1318static void
1319pm_void_statements_check(pm_parser_t *parser, const pm_statements_node_t *node, bool last_value) {
1320 assert(node->body.size > 0);
1321 const size_t size = node->body.size - (last_value ? 1 : 0);
1322 for (size_t index = 0; index < size; index++) {
1323 pm_void_statement_check(parser, node->body.nodes[index]);
1324 }
1325}
1326
1332typedef enum {
1333 PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL,
1334 PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP,
1335 PM_CONDITIONAL_PREDICATE_TYPE_NOT
1336} pm_conditional_predicate_type_t;
1337
1341static void
1342pm_parser_warn_conditional_predicate_literal(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type, pm_diagnostic_id_t diag_id, const char *prefix) {
1343 switch (type) {
1344 case PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL:
1345 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "condition");
1346 break;
1347 case PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP:
1348 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "flip-flop");
1349 break;
1350 case PM_CONDITIONAL_PREDICATE_TYPE_NOT:
1351 break;
1352 }
1353}
1354
1359static bool
1360pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1361 switch (PM_NODE_TYPE(node)) {
1362 case PM_ARRAY_NODE: {
1363 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1364
1365 const pm_array_node_t *cast = (const pm_array_node_t *) node;
1366 for (size_t index = 0; index < cast->elements.size; index++) {
1367 if (!pm_conditional_predicate_warn_write_literal_p(cast->elements.nodes[index])) return false;
1368 }
1369
1370 return true;
1371 }
1372 case PM_HASH_NODE: {
1373 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1374
1375 const pm_hash_node_t *cast = (const pm_hash_node_t *) node;
1376 for (size_t index = 0; index < cast->elements.size; index++) {
1377 const pm_node_t *element = cast->elements.nodes[index];
1378 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) return false;
1379
1380 const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element;
1381 if (!pm_conditional_predicate_warn_write_literal_p(assoc->key) || !pm_conditional_predicate_warn_write_literal_p(assoc->value)) return false;
1382 }
1383
1384 return true;
1385 }
1386 case PM_FALSE_NODE:
1387 case PM_FLOAT_NODE:
1388 case PM_IMAGINARY_NODE:
1389 case PM_INTEGER_NODE:
1390 case PM_NIL_NODE:
1391 case PM_RATIONAL_NODE:
1392 case PM_REGULAR_EXPRESSION_NODE:
1393 case PM_SOURCE_ENCODING_NODE:
1394 case PM_SOURCE_FILE_NODE:
1395 case PM_SOURCE_LINE_NODE:
1396 case PM_STRING_NODE:
1397 case PM_SYMBOL_NODE:
1398 case PM_TRUE_NODE:
1399 return true;
1400 default:
1401 return false;
1402 }
1403}
1404
1409static inline void
1410pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1411 if (pm_conditional_predicate_warn_write_literal_p(node)) {
1412 pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1413 }
1414}
1415
1428static void
1429pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type) {
1430 switch (PM_NODE_TYPE(node)) {
1431 case PM_AND_NODE: {
1432 pm_and_node_t *cast = (pm_and_node_t *) node;
1433 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1434 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1435 break;
1436 }
1437 case PM_OR_NODE: {
1438 pm_or_node_t *cast = (pm_or_node_t *) node;
1439 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1440 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1441 break;
1442 }
1443 case PM_PARENTHESES_NODE: {
1445
1446 if ((cast->body != NULL) && PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE)) {
1447 pm_statements_node_t *statements = (pm_statements_node_t *) cast->body;
1448 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1449 }
1450
1451 break;
1452 }
1453 case PM_BEGIN_NODE: {
1454 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1455 if (cast->statements != NULL) {
1456 pm_statements_node_t *statements = cast->statements;
1457 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1458 }
1459 break;
1460 }
1461 case PM_RANGE_NODE: {
1462 pm_range_node_t *cast = (pm_range_node_t *) node;
1463
1464 if (cast->left != NULL) pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1465 if (cast->right != NULL) pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1466
1467 // Here we change the range node into a flip flop node. We can do
1468 // this since the nodes are exactly the same except for the type.
1469 // We're only asserting against the size when we should probably
1470 // assert against the entire layout, but we'll assume tests will
1471 // catch this.
1472 assert(sizeof(pm_range_node_t) == sizeof(pm_flip_flop_node_t));
1473 node->type = PM_FLIP_FLOP_NODE;
1474
1475 break;
1476 }
1477 case PM_REGULAR_EXPRESSION_NODE:
1478 // Here we change the regular expression node into a match last line
1479 // node. We can do this since the nodes are exactly the same except
1480 // for the type.
1482 node->type = PM_MATCH_LAST_LINE_NODE;
1483
1484 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1485 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "regex ");
1486 }
1487
1488 break;
1489 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1490 // Here we change the interpolated regular expression node into an
1491 // interpolated match last line node. We can do this since the nodes
1492 // are exactly the same except for the type.
1494 node->type = PM_INTERPOLATED_MATCH_LAST_LINE_NODE;
1495
1496 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1497 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "regex ");
1498 }
1499
1500 break;
1501 case PM_INTEGER_NODE:
1502 if (type == PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP) {
1503 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1504 pm_parser_warn_node(parser, node, PM_WARN_INTEGER_IN_FLIP_FLOP);
1505 }
1506 } else {
1507 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1508 }
1509 break;
1510 case PM_STRING_NODE:
1511 case PM_SOURCE_FILE_NODE:
1512 case PM_INTERPOLATED_STRING_NODE:
1513 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "string ");
1514 break;
1515 case PM_SYMBOL_NODE:
1516 case PM_INTERPOLATED_SYMBOL_NODE:
1517 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "symbol ");
1518 break;
1519 case PM_SOURCE_LINE_NODE:
1520 case PM_SOURCE_ENCODING_NODE:
1521 case PM_FLOAT_NODE:
1522 case PM_RATIONAL_NODE:
1523 case PM_IMAGINARY_NODE:
1524 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1525 break;
1526 case PM_CLASS_VARIABLE_WRITE_NODE:
1527 pm_conditional_predicate_warn_write_literal(parser, ((pm_class_variable_write_node_t *) node)->value);
1528 break;
1529 case PM_CONSTANT_WRITE_NODE:
1530 pm_conditional_predicate_warn_write_literal(parser, ((pm_constant_write_node_t *) node)->value);
1531 break;
1532 case PM_GLOBAL_VARIABLE_WRITE_NODE:
1533 pm_conditional_predicate_warn_write_literal(parser, ((pm_global_variable_write_node_t *) node)->value);
1534 break;
1535 case PM_INSTANCE_VARIABLE_WRITE_NODE:
1536 pm_conditional_predicate_warn_write_literal(parser, ((pm_instance_variable_write_node_t *) node)->value);
1537 break;
1538 case PM_LOCAL_VARIABLE_WRITE_NODE:
1539 pm_conditional_predicate_warn_write_literal(parser, ((pm_local_variable_write_node_t *) node)->value);
1540 break;
1541 case PM_MULTI_WRITE_NODE:
1542 pm_conditional_predicate_warn_write_literal(parser, ((pm_multi_write_node_t *) node)->value);
1543 break;
1544 default:
1545 break;
1546 }
1547}
1548
1557static inline pm_token_t
1558not_provided(pm_parser_t *parser) {
1559 return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
1560}
1561
1562#define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = (parser)->start, .end = (parser)->start })
1563#define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end })
1564#define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end })
1565#define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
1566#define PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE ((pm_location_t) { .start = NULL, .end = NULL })
1567#define PM_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == PM_TOKEN_NOT_PROVIDED ? PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE : PM_LOCATION_TOKEN_VALUE(token))
1568
1591
1595static inline const uint8_t *
1596pm_arguments_end(pm_arguments_t *arguments) {
1597 if (arguments->block != NULL) {
1598 const uint8_t *end = arguments->block->location.end;
1599 if (arguments->closing_loc.start != NULL && arguments->closing_loc.end > end) {
1600 end = arguments->closing_loc.end;
1601 }
1602 return end;
1603 }
1604 if (arguments->closing_loc.start != NULL) {
1605 return arguments->closing_loc.end;
1606 }
1607 if (arguments->arguments != NULL) {
1608 return arguments->arguments->base.location.end;
1609 }
1610 return arguments->closing_loc.end;
1611}
1612
1617static void
1618pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
1619 // First, check that we have arguments and that we don't have a closing
1620 // location for them.
1621 if (arguments->arguments == NULL || arguments->closing_loc.start != NULL) {
1622 return;
1623 }
1624
1625 // Next, check that we don't have a single parentheses argument. This would
1626 // look like:
1627 //
1628 // foo (1) {}
1629 //
1630 // In this case, it's actually okay for the block to be attached to the
1631 // call, even though it looks like it's attached to the argument.
1632 if (arguments->arguments->arguments.size == 1 && PM_NODE_TYPE_P(arguments->arguments->arguments.nodes[0], PM_PARENTHESES_NODE)) {
1633 return;
1634 }
1635
1636 // If we didn't hit a case before this check, then at this point we need to
1637 // add a syntax error.
1638 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
1639}
1640
1641/******************************************************************************/
1642/* Basic character checks */
1643/******************************************************************************/
1644
1651static inline size_t
1652char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1653 if (n <= 0) return 0;
1654
1655 if (parser->encoding_changed) {
1656 size_t width;
1657
1658 if ((width = parser->encoding->alpha_char(b, n)) != 0) {
1659 return width;
1660 } else if (*b == '_') {
1661 return 1;
1662 } else if (*b >= 0x80) {
1663 return parser->encoding->char_width(b, n);
1664 } else {
1665 return 0;
1666 }
1667 } else if (*b < 0x80) {
1668 return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
1669 } else {
1670 return pm_encoding_utf_8_char_width(b, n);
1671 }
1672}
1673
1678static inline size_t
1679char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
1680 if (n <= 0) {
1681 return 0;
1682 } else if (*b < 0x80) {
1683 return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
1684 } else {
1685 return pm_encoding_utf_8_char_width(b, n);
1686 }
1687}
1688
1694static inline size_t
1695char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1696 if (n <= 0) {
1697 return 0;
1698 } else if (parser->encoding_changed) {
1699 size_t width;
1700
1701 if ((width = parser->encoding->alnum_char(b, n)) != 0) {
1702 return width;
1703 } else if (*b == '_') {
1704 return 1;
1705 } else if (*b >= 0x80) {
1706 return parser->encoding->char_width(b, n);
1707 } else {
1708 return 0;
1709 }
1710 } else {
1711 return char_is_identifier_utf8(b, n);
1712 }
1713}
1714
1715// Here we're defining a perfect hash for the characters that are allowed in
1716// global names. This is used to quickly check the next character after a $ to
1717// see if it's a valid character for a global name.
1718#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
1719#define PUNCT(idx) ( \
1720 BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
1721 BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
1722 BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
1723 BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
1724 BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
1725 BIT('0', idx))
1726
1727const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
1728
1729#undef BIT
1730#undef PUNCT
1731
1732static inline bool
1733char_is_global_name_punctuation(const uint8_t b) {
1734 const unsigned int i = (const unsigned int) b;
1735 if (i <= 0x20 || 0x7e < i) return false;
1736
1737 return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
1738}
1739
1740static inline bool
1741token_is_setter_name(pm_token_t *token) {
1742 return (
1743 (token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
1744 ((token->type == PM_TOKEN_IDENTIFIER) &&
1745 (token->end - token->start >= 2) &&
1746 (token->end[-1] == '='))
1747 );
1748}
1749
1753static bool
1754pm_local_is_keyword(const char *source, size_t length) {
1755#define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
1756
1757 switch (length) {
1758 case 2:
1759 switch (source[0]) {
1760 case 'd': KEYWORD("do"); return false;
1761 case 'i': KEYWORD("if"); KEYWORD("in"); return false;
1762 case 'o': KEYWORD("or"); return false;
1763 default: return false;
1764 }
1765 case 3:
1766 switch (source[0]) {
1767 case 'a': KEYWORD("and"); return false;
1768 case 'd': KEYWORD("def"); return false;
1769 case 'e': KEYWORD("end"); return false;
1770 case 'f': KEYWORD("for"); return false;
1771 case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
1772 default: return false;
1773 }
1774 case 4:
1775 switch (source[0]) {
1776 case 'c': KEYWORD("case"); return false;
1777 case 'e': KEYWORD("else"); return false;
1778 case 'n': KEYWORD("next"); return false;
1779 case 'r': KEYWORD("redo"); return false;
1780 case 's': KEYWORD("self"); return false;
1781 case 't': KEYWORD("then"); KEYWORD("true"); return false;
1782 case 'w': KEYWORD("when"); return false;
1783 default: return false;
1784 }
1785 case 5:
1786 switch (source[0]) {
1787 case 'a': KEYWORD("alias"); return false;
1788 case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
1789 case 'c': KEYWORD("class"); return false;
1790 case 'e': KEYWORD("elsif"); return false;
1791 case 'f': KEYWORD("false"); return false;
1792 case 'r': KEYWORD("retry"); return false;
1793 case 's': KEYWORD("super"); return false;
1794 case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
1795 case 'w': KEYWORD("while"); return false;
1796 case 'y': KEYWORD("yield"); return false;
1797 default: return false;
1798 }
1799 case 6:
1800 switch (source[0]) {
1801 case 'e': KEYWORD("ensure"); return false;
1802 case 'm': KEYWORD("module"); return false;
1803 case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
1804 case 'u': KEYWORD("unless"); return false;
1805 default: return false;
1806 }
1807 case 8:
1808 KEYWORD("__LINE__");
1809 KEYWORD("__FILE__");
1810 return false;
1811 case 12:
1812 KEYWORD("__ENCODING__");
1813 return false;
1814 default:
1815 return false;
1816 }
1817
1818#undef KEYWORD
1819}
1820
1821/******************************************************************************/
1822/* Node flag handling functions */
1823/******************************************************************************/
1824
1828static inline void
1829pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
1830 node->flags |= flag;
1831}
1832
1836static inline void
1837pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
1838 node->flags &= (pm_node_flags_t) ~flag;
1839}
1840
1844static inline void
1845pm_node_flag_set_repeated_parameter(pm_node_t *node) {
1846 assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
1847 PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
1848 PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
1849 PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
1850 PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
1851 PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
1852 PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
1853 PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
1854
1855 pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
1856}
1857
1858/******************************************************************************/
1859/* Node creation functions */
1860/******************************************************************************/
1861
1867#define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
1868
1872static inline pm_node_flags_t
1873pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
1874 pm_node_flags_t flags = 0;
1875
1876 if (closing->type == PM_TOKEN_REGEXP_END) {
1877 pm_buffer_t unknown_flags = { 0 };
1878
1879 for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
1880 switch (*flag) {
1881 case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
1882 case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
1883 case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
1884 case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
1885
1886 case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
1887 case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
1888 case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
1889 case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
1890
1891 default: pm_buffer_append_byte(&unknown_flags, *flag);
1892 }
1893 }
1894
1895 size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
1896 if (unknown_flags_length != 0) {
1897 const char *word = unknown_flags_length >= 2 ? "options" : "option";
1898 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
1899 }
1900 pm_buffer_free(&unknown_flags);
1901 }
1902
1903 return flags;
1904}
1905
1906#undef PM_REGULAR_EXPRESSION_ENCODING_MASK
1907
1908static pm_statements_node_t *
1909pm_statements_node_create(pm_parser_t *parser);
1910
1911static void
1912pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline);
1913
1914static size_t
1915pm_statements_node_body_length(pm_statements_node_t *node);
1916
1921static inline void *
1922pm_node_alloc(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
1923 void *memory = xcalloc(1, size);
1924 if (memory == NULL) {
1925 fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
1926 abort();
1927 }
1928 return memory;
1929}
1930
1931#define PM_NODE_ALLOC(parser, type) (type *) pm_node_alloc(parser, sizeof(type))
1932#define PM_NODE_IDENTIFY(parser) (++parser->node_id)
1933
1937static pm_missing_node_t *
1938pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
1939 pm_missing_node_t *node = PM_NODE_ALLOC(parser, pm_missing_node_t);
1940
1941 *node = (pm_missing_node_t) {{
1942 .type = PM_MISSING_NODE,
1943 .node_id = PM_NODE_IDENTIFY(parser),
1944 .location = { .start = start, .end = end }
1945 }};
1946
1947 return node;
1948}
1949
1953static pm_alias_global_variable_node_t *
1954pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1955 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1956 pm_alias_global_variable_node_t *node = PM_NODE_ALLOC(parser, pm_alias_global_variable_node_t);
1957
1958 *node = (pm_alias_global_variable_node_t) {
1959 {
1960 .type = PM_ALIAS_GLOBAL_VARIABLE_NODE,
1961 .node_id = PM_NODE_IDENTIFY(parser),
1962 .location = {
1963 .start = keyword->start,
1964 .end = old_name->location.end
1965 },
1966 },
1967 .new_name = new_name,
1968 .old_name = old_name,
1969 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1970 };
1971
1972 return node;
1973}
1974
1978static pm_alias_method_node_t *
1979pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1980 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1981 pm_alias_method_node_t *node = PM_NODE_ALLOC(parser, pm_alias_method_node_t);
1982
1983 *node = (pm_alias_method_node_t) {
1984 {
1985 .type = PM_ALIAS_METHOD_NODE,
1986 .node_id = PM_NODE_IDENTIFY(parser),
1987 .location = {
1988 .start = keyword->start,
1989 .end = old_name->location.end
1990 },
1991 },
1992 .new_name = new_name,
1993 .old_name = old_name,
1994 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1995 };
1996
1997 return node;
1998}
1999
2003static pm_alternation_pattern_node_t *
2004pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
2005 pm_alternation_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_alternation_pattern_node_t);
2006
2007 *node = (pm_alternation_pattern_node_t) {
2008 {
2009 .type = PM_ALTERNATION_PATTERN_NODE,
2010 .node_id = PM_NODE_IDENTIFY(parser),
2011 .location = {
2012 .start = left->location.start,
2013 .end = right->location.end
2014 },
2015 },
2016 .left = left,
2017 .right = right,
2018 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2019 };
2020
2021 return node;
2022}
2023
2027static pm_and_node_t *
2028pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
2029 pm_assert_value_expression(parser, left);
2030
2031 pm_and_node_t *node = PM_NODE_ALLOC(parser, pm_and_node_t);
2032
2033 *node = (pm_and_node_t) {
2034 {
2035 .type = PM_AND_NODE,
2036 .node_id = PM_NODE_IDENTIFY(parser),
2037 .location = {
2038 .start = left->location.start,
2039 .end = right->location.end
2040 },
2041 },
2042 .left = left,
2043 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2044 .right = right
2045 };
2046
2047 return node;
2048}
2049
2053static pm_arguments_node_t *
2054pm_arguments_node_create(pm_parser_t *parser) {
2055 pm_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_arguments_node_t);
2056
2057 *node = (pm_arguments_node_t) {
2058 {
2059 .type = PM_ARGUMENTS_NODE,
2060 .node_id = PM_NODE_IDENTIFY(parser),
2061 .location = PM_LOCATION_NULL_VALUE(parser)
2062 },
2063 .arguments = { 0 }
2064 };
2065
2066 return node;
2067}
2068
2072static size_t
2073pm_arguments_node_size(pm_arguments_node_t *node) {
2074 return node->arguments.size;
2075}
2076
2080static void
2081pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argument) {
2082 if (pm_arguments_node_size(node) == 0) {
2083 node->base.location.start = argument->location.start;
2084 }
2085
2086 node->base.location.end = argument->location.end;
2087 pm_node_list_append(&node->arguments, argument);
2088
2089 if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
2090 if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
2091 pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
2092 } else {
2093 pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
2094 }
2095 }
2096}
2097
2101static pm_array_node_t *
2102pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
2103 pm_array_node_t *node = PM_NODE_ALLOC(parser, pm_array_node_t);
2104
2105 *node = (pm_array_node_t) {
2106 {
2107 .type = PM_ARRAY_NODE,
2108 .flags = PM_NODE_FLAG_STATIC_LITERAL,
2109 .node_id = PM_NODE_IDENTIFY(parser),
2110 .location = PM_LOCATION_TOKEN_VALUE(opening)
2111 },
2112 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2113 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2114 .elements = { 0 }
2115 };
2116
2117 return node;
2118}
2119
2123static inline void
2124pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
2125 if (!node->elements.size && !node->opening_loc.start) {
2126 node->base.location.start = element->location.start;
2127 }
2128
2129 pm_node_list_append(&node->elements, element);
2130 node->base.location.end = element->location.end;
2131
2132 // If the element is not a static literal, then the array is not a static
2133 // literal. Turn that flag off.
2134 if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
2135 pm_node_flag_unset((pm_node_t *)node, PM_NODE_FLAG_STATIC_LITERAL);
2136 }
2137
2138 if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
2139 pm_node_flag_set((pm_node_t *)node, PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
2140 }
2141}
2142
2146static void
2147pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) {
2148 assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == PM_TOKEN_MISSING || closing->type == PM_TOKEN_NOT_PROVIDED);
2149 node->base.location.end = closing->end;
2150 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2151}
2152
2157static pm_array_pattern_node_t *
2158pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
2159 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2160
2161 *node = (pm_array_pattern_node_t) {
2162 {
2163 .type = PM_ARRAY_PATTERN_NODE,
2164 .node_id = PM_NODE_IDENTIFY(parser),
2165 .location = {
2166 .start = nodes->nodes[0]->location.start,
2167 .end = nodes->nodes[nodes->size - 1]->location.end
2168 },
2169 },
2170 .constant = NULL,
2171 .rest = NULL,
2172 .requireds = { 0 },
2173 .posts = { 0 },
2174 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2175 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2176 };
2177
2178 // For now we're going to just copy over each pointer manually. This could be
2179 // much more efficient, as we could instead resize the node list.
2180 bool found_rest = false;
2181 pm_node_t *child;
2182
2183 PM_NODE_LIST_FOREACH(nodes, index, child) {
2184 if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
2185 node->rest = child;
2186 found_rest = true;
2187 } else if (found_rest) {
2188 pm_node_list_append(&node->posts, child);
2189 } else {
2190 pm_node_list_append(&node->requireds, child);
2191 }
2192 }
2193
2194 return node;
2195}
2196
2200static pm_array_pattern_node_t *
2201pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
2202 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2203
2204 *node = (pm_array_pattern_node_t) {
2205 {
2206 .type = PM_ARRAY_PATTERN_NODE,
2207 .node_id = PM_NODE_IDENTIFY(parser),
2208 .location = rest->location,
2209 },
2210 .constant = NULL,
2211 .rest = rest,
2212 .requireds = { 0 },
2213 .posts = { 0 },
2214 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2215 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2216 };
2217
2218 return node;
2219}
2220
2225static pm_array_pattern_node_t *
2226pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
2227 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2228
2229 *node = (pm_array_pattern_node_t) {
2230 {
2231 .type = PM_ARRAY_PATTERN_NODE,
2232 .node_id = PM_NODE_IDENTIFY(parser),
2233 .location = {
2234 .start = constant->location.start,
2235 .end = closing->end
2236 },
2237 },
2238 .constant = constant,
2239 .rest = NULL,
2240 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2241 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2242 .requireds = { 0 },
2243 .posts = { 0 }
2244 };
2245
2246 return node;
2247}
2248
2253static pm_array_pattern_node_t *
2254pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
2255 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2256
2257 *node = (pm_array_pattern_node_t) {
2258 {
2259 .type = PM_ARRAY_PATTERN_NODE,
2260 .node_id = PM_NODE_IDENTIFY(parser),
2261 .location = {
2262 .start = opening->start,
2263 .end = closing->end
2264 },
2265 },
2266 .constant = NULL,
2267 .rest = NULL,
2268 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2269 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2270 .requireds = { 0 },
2271 .posts = { 0 }
2272 };
2273
2274 return node;
2275}
2276
2277static inline void
2278pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t *inner) {
2279 pm_node_list_append(&node->requireds, inner);
2280}
2281
2285static pm_assoc_node_t *
2286pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
2287 pm_assoc_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_node_t);
2288 const uint8_t *end;
2289
2290 if (value != NULL && value->location.end > key->location.end) {
2291 end = value->location.end;
2292 } else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
2293 end = operator->end;
2294 } else {
2295 end = key->location.end;
2296 }
2297
2298 // Hash string keys will be frozen, so we can mark them as frozen here so
2299 // that the compiler picks them up and also when we check for static literal
2300 // on the keys it gets factored in.
2301 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
2302 key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
2303 }
2304
2305 // If the key and value of this assoc node are both static literals, then
2306 // we can mark this node as a static literal.
2307 pm_node_flags_t flags = 0;
2308 if (
2309 !PM_NODE_TYPE_P(key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(key, PM_HASH_NODE) && !PM_NODE_TYPE_P(key, PM_RANGE_NODE) &&
2310 value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)
2311 ) {
2312 flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
2313 }
2314
2315 *node = (pm_assoc_node_t) {
2316 {
2317 .type = PM_ASSOC_NODE,
2318 .flags = flags,
2319 .node_id = PM_NODE_IDENTIFY(parser),
2320 .location = {
2321 .start = key->location.start,
2322 .end = end
2323 },
2324 },
2325 .key = key,
2326 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2327 .value = value
2328 };
2329
2330 return node;
2331}
2332
2336static pm_assoc_splat_node_t *
2337pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
2338 assert(operator->type == PM_TOKEN_USTAR_STAR);
2339 pm_assoc_splat_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_splat_node_t);
2340
2341 *node = (pm_assoc_splat_node_t) {
2342 {
2343 .type = PM_ASSOC_SPLAT_NODE,
2344 .node_id = PM_NODE_IDENTIFY(parser),
2345 .location = {
2346 .start = operator->start,
2347 .end = value == NULL ? operator->end : value->location.end
2348 },
2349 },
2350 .value = value,
2351 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2352 };
2353
2354 return node;
2355}
2356
2360static pm_back_reference_read_node_t *
2361pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
2362 assert(name->type == PM_TOKEN_BACK_REFERENCE);
2363 pm_back_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_back_reference_read_node_t);
2364
2365 *node = (pm_back_reference_read_node_t) {
2366 {
2367 .type = PM_BACK_REFERENCE_READ_NODE,
2368 .node_id = PM_NODE_IDENTIFY(parser),
2369 .location = PM_LOCATION_TOKEN_VALUE(name),
2370 },
2371 .name = pm_parser_constant_id_token(parser, name)
2372 };
2373
2374 return node;
2375}
2376
2380static pm_begin_node_t *
2381pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
2382 pm_begin_node_t *node = PM_NODE_ALLOC(parser, pm_begin_node_t);
2383
2384 *node = (pm_begin_node_t) {
2385 {
2386 .type = PM_BEGIN_NODE,
2387 .node_id = PM_NODE_IDENTIFY(parser),
2388 .location = {
2389 .start = begin_keyword->start,
2390 .end = statements == NULL ? begin_keyword->end : statements->base.location.end
2391 },
2392 },
2393 .begin_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword),
2394 .statements = statements,
2395 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2396 };
2397
2398 return node;
2399}
2400
2404static void
2405pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
2406 // If the begin keyword doesn't exist, we set the start on the begin_node
2407 if (!node->begin_keyword_loc.start) {
2408 node->base.location.start = rescue_clause->base.location.start;
2409 }
2410 node->base.location.end = rescue_clause->base.location.end;
2411 node->rescue_clause = rescue_clause;
2412}
2413
2417static void
2418pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
2419 node->base.location.end = else_clause->base.location.end;
2420 node->else_clause = else_clause;
2421}
2422
2426static void
2427pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
2428 node->base.location.end = ensure_clause->base.location.end;
2429 node->ensure_clause = ensure_clause;
2430}
2431
2435static void
2436pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keyword) {
2437 assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == PM_TOKEN_MISSING);
2438
2439 node->base.location.end = end_keyword->end;
2440 node->end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword);
2441}
2442
2446static pm_block_argument_node_t *
2447pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
2448 pm_block_argument_node_t *node = PM_NODE_ALLOC(parser, pm_block_argument_node_t);
2449
2450 *node = (pm_block_argument_node_t) {
2451 {
2452 .type = PM_BLOCK_ARGUMENT_NODE,
2453 .node_id = PM_NODE_IDENTIFY(parser),
2454 .location = {
2455 .start = operator->start,
2456 .end = expression == NULL ? operator->end : expression->location.end
2457 },
2458 },
2459 .expression = expression,
2460 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2461 };
2462
2463 return node;
2464}
2465
2469static pm_block_node_t *
2470pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
2471 pm_block_node_t *node = PM_NODE_ALLOC(parser, pm_block_node_t);
2472
2473 *node = (pm_block_node_t) {
2474 {
2475 .type = PM_BLOCK_NODE,
2476 .node_id = PM_NODE_IDENTIFY(parser),
2477 .location = { .start = opening->start, .end = closing->end },
2478 },
2479 .locals = *locals,
2480 .parameters = parameters,
2481 .body = body,
2482 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2483 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
2484 };
2485
2486 return node;
2487}
2488
2492static pm_block_parameter_node_t *
2493pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
2494 assert(operator->type == PM_TOKEN_NOT_PROVIDED || operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
2495 pm_block_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameter_node_t);
2496
2497 *node = (pm_block_parameter_node_t) {
2498 {
2499 .type = PM_BLOCK_PARAMETER_NODE,
2500 .node_id = PM_NODE_IDENTIFY(parser),
2501 .location = {
2502 .start = operator->start,
2503 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
2504 },
2505 },
2506 .name = pm_parser_optional_constant_id_token(parser, name),
2507 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
2508 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2509 };
2510
2511 return node;
2512}
2513
2517static pm_block_parameters_node_t *
2518pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
2519 pm_block_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameters_node_t);
2520
2521 const uint8_t *start;
2522 if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2523 start = opening->start;
2524 } else if (parameters != NULL) {
2525 start = parameters->base.location.start;
2526 } else {
2527 start = NULL;
2528 }
2529
2530 const uint8_t *end;
2531 if (parameters != NULL) {
2532 end = parameters->base.location.end;
2533 } else if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2534 end = opening->end;
2535 } else {
2536 end = NULL;
2537 }
2538
2539 *node = (pm_block_parameters_node_t) {
2540 {
2541 .type = PM_BLOCK_PARAMETERS_NODE,
2542 .node_id = PM_NODE_IDENTIFY(parser),
2543 .location = {
2544 .start = start,
2545 .end = end
2546 }
2547 },
2548 .parameters = parameters,
2549 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2550 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2551 .locals = { 0 }
2552 };
2553
2554 return node;
2555}
2556
2560static void
2561pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_token_t *closing) {
2562 assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == PM_TOKEN_MISSING);
2563
2564 node->base.location.end = closing->end;
2565 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2566}
2567
2571static pm_block_local_variable_node_t *
2572pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
2573 pm_block_local_variable_node_t *node = PM_NODE_ALLOC(parser, pm_block_local_variable_node_t);
2574
2575 *node = (pm_block_local_variable_node_t) {
2576 {
2577 .type = PM_BLOCK_LOCAL_VARIABLE_NODE,
2578 .node_id = PM_NODE_IDENTIFY(parser),
2579 .location = PM_LOCATION_TOKEN_VALUE(name),
2580 },
2581 .name = pm_parser_constant_id_token(parser, name)
2582 };
2583
2584 return node;
2585}
2586
2590static void
2591pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
2592 pm_node_list_append(&node->locals, (pm_node_t *) local);
2593
2594 if (node->base.location.start == NULL) node->base.location.start = local->base.location.start;
2595 node->base.location.end = local->base.location.end;
2596}
2597
2601static pm_break_node_t *
2602pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
2603 assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
2604 pm_break_node_t *node = PM_NODE_ALLOC(parser, pm_break_node_t);
2605
2606 *node = (pm_break_node_t) {
2607 {
2608 .type = PM_BREAK_NODE,
2609 .node_id = PM_NODE_IDENTIFY(parser),
2610 .location = {
2611 .start = keyword->start,
2612 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
2613 },
2614 },
2615 .arguments = arguments,
2616 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
2617 };
2618
2619 return node;
2620}
2621
2622// There are certain flags that we want to use internally but don't want to
2623// expose because they are not relevant beyond parsing. Therefore we'll define
2624// them here and not define them in config.yml/a header file.
2625static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = 0x4;
2626static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = 0x40;
2627static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = 0x80;
2628static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = 0x100;
2629
2635static pm_call_node_t *
2636pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
2637 pm_call_node_t *node = PM_NODE_ALLOC(parser, pm_call_node_t);
2638
2639 *node = (pm_call_node_t) {
2640 {
2641 .type = PM_CALL_NODE,
2642 .flags = flags,
2643 .node_id = PM_NODE_IDENTIFY(parser),
2644 .location = PM_LOCATION_NULL_VALUE(parser),
2645 },
2646 .receiver = NULL,
2647 .call_operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2648 .message_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2649 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2650 .arguments = NULL,
2651 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2652 .block = NULL,
2653 .name = 0
2654 };
2655
2656 return node;
2657}
2658
2663static inline pm_node_flags_t
2664pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
2665 return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
2666}
2667
2672static pm_call_node_t *
2673pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
2674 pm_assert_value_expression(parser, receiver);
2675
2676 pm_node_flags_t flags = pm_call_node_ignore_visibility_flag(receiver);
2677 if (arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_ARGUMENT_NODE)) {
2678 flags |= PM_CALL_NODE_FLAGS_INDEX;
2679 }
2680
2681 pm_call_node_t *node = pm_call_node_create(parser, flags);
2682
2683 node->base.location.start = receiver->location.start;
2684 node->base.location.end = pm_arguments_end(arguments);
2685
2686 node->receiver = receiver;
2687 node->message_loc.start = arguments->opening_loc.start;
2688 node->message_loc.end = arguments->closing_loc.end;
2689
2690 node->opening_loc = arguments->opening_loc;
2691 node->arguments = arguments->arguments;
2692 node->closing_loc = arguments->closing_loc;
2693 node->block = arguments->block;
2694
2695 node->name = pm_parser_constant_id_constant(parser, "[]", 2);
2696 return node;
2697}
2698
2702static pm_call_node_t *
2703pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument, pm_node_flags_t flags) {
2704 pm_assert_value_expression(parser, receiver);
2705 pm_assert_value_expression(parser, argument);
2706
2707 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
2708
2709 node->base.location.start = MIN(receiver->location.start, argument->location.start);
2710 node->base.location.end = MAX(receiver->location.end, argument->location.end);
2711
2712 node->receiver = receiver;
2713 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2714
2715 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
2716 pm_arguments_node_arguments_append(arguments, argument);
2717 node->arguments = arguments;
2718
2719 node->name = pm_parser_constant_id_token(parser, operator);
2720 return node;
2721}
2722
2726static pm_call_node_t *
2727pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
2728 pm_assert_value_expression(parser, receiver);
2729
2730 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2731
2732 node->base.location.start = receiver->location.start;
2733 const uint8_t *end = pm_arguments_end(arguments);
2734 if (end == NULL) {
2735 end = message->end;
2736 }
2737 node->base.location.end = end;
2738
2739 node->receiver = receiver;
2740 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2741 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2742 node->opening_loc = arguments->opening_loc;
2743 node->arguments = arguments->arguments;
2744 node->closing_loc = arguments->closing_loc;
2745 node->block = arguments->block;
2746
2747 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2748 pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2749 }
2750
2751 node->name = pm_parser_constant_id_token(parser, message);
2752 return node;
2753}
2754
2758static pm_call_node_t *
2759pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
2760 pm_call_node_t *node = pm_call_node_create(parser, 0);
2761 node->base.location.start = parser->start;
2762 node->base.location.end = parser->end;
2763
2764 node->receiver = receiver;
2765 node->call_operator_loc = (pm_location_t) { .start = NULL, .end = NULL };
2766 node->message_loc = (pm_location_t) { .start = NULL, .end = NULL };
2767 node->arguments = arguments;
2768
2769 node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
2770 return node;
2771}
2772
2777static pm_call_node_t *
2778pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
2779 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2780
2781 node->base.location.start = message->start;
2782 node->base.location.end = pm_arguments_end(arguments);
2783
2784 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2785 node->opening_loc = arguments->opening_loc;
2786 node->arguments = arguments->arguments;
2787 node->closing_loc = arguments->closing_loc;
2788 node->block = arguments->block;
2789
2790 node->name = pm_parser_constant_id_token(parser, message);
2791 return node;
2792}
2793
2798static pm_call_node_t *
2799pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
2800 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2801
2802 node->base.location = PM_LOCATION_NULL_VALUE(parser);
2803 node->arguments = arguments;
2804
2805 node->name = name;
2806 return node;
2807}
2808
2812static pm_call_node_t *
2813pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
2814 pm_assert_value_expression(parser, receiver);
2815 if (receiver != NULL) pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
2816
2817 pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
2818
2819 node->base.location.start = message->start;
2820 if (arguments->closing_loc.start != NULL) {
2821 node->base.location.end = arguments->closing_loc.end;
2822 } else {
2823 assert(receiver != NULL);
2824 node->base.location.end = receiver->location.end;
2825 }
2826
2827 node->receiver = receiver;
2828 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2829 node->opening_loc = arguments->opening_loc;
2830 node->arguments = arguments->arguments;
2831 node->closing_loc = arguments->closing_loc;
2832
2833 node->name = pm_parser_constant_id_constant(parser, "!", 1);
2834 return node;
2835}
2836
2840static pm_call_node_t *
2841pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
2842 pm_assert_value_expression(parser, receiver);
2843
2844 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2845
2846 node->base.location.start = receiver->location.start;
2847 node->base.location.end = pm_arguments_end(arguments);
2848
2849 node->receiver = receiver;
2850 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2851 node->opening_loc = arguments->opening_loc;
2852 node->arguments = arguments->arguments;
2853 node->closing_loc = arguments->closing_loc;
2854 node->block = arguments->block;
2855
2856 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2857 pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2858 }
2859
2860 node->name = pm_parser_constant_id_constant(parser, "call", 4);
2861 return node;
2862}
2863
2867static pm_call_node_t *
2868pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
2869 pm_assert_value_expression(parser, receiver);
2870
2871 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2872
2873 node->base.location.start = operator->start;
2874 node->base.location.end = receiver->location.end;
2875
2876 node->receiver = receiver;
2877 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2878
2879 node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
2880 return node;
2881}
2882
2887static pm_call_node_t *
2888pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
2889 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2890
2891 node->base.location = PM_LOCATION_TOKEN_VALUE(message);
2892 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2893
2894 node->name = pm_parser_constant_id_token(parser, message);
2895 return node;
2896}
2897
2902static inline bool
2903pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
2904 return (
2905 (node->message_loc.start != NULL) &&
2906 (node->message_loc.end[-1] != '!') &&
2907 (node->message_loc.end[-1] != '?') &&
2908 char_is_identifier_start(parser, node->message_loc.start, parser->end - node->message_loc.start) &&
2909 (node->opening_loc.start == NULL) &&
2910 (node->arguments == NULL) &&
2911 (node->block == NULL)
2912 );
2913}
2914
2918static void
2919pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
2920 pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
2921
2922 if (write_constant->length > 0) {
2923 size_t length = write_constant->length - 1;
2924
2925 void *memory = xmalloc(length);
2926 memcpy(memory, write_constant->start, length);
2927
2928 *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
2929 } else {
2930 // We can get here if the message was missing because of a syntax error.
2931 *read_name = pm_parser_constant_id_constant(parser, "", 0);
2932 }
2933}
2934
2938static pm_call_and_write_node_t *
2939pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2940 assert(target->block == NULL);
2941 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2942 pm_call_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_and_write_node_t);
2943
2944 *node = (pm_call_and_write_node_t) {
2945 {
2946 .type = PM_CALL_AND_WRITE_NODE,
2947 .flags = target->base.flags,
2948 .node_id = PM_NODE_IDENTIFY(parser),
2949 .location = {
2950 .start = target->base.location.start,
2951 .end = value->location.end
2952 }
2953 },
2954 .receiver = target->receiver,
2955 .call_operator_loc = target->call_operator_loc,
2956 .message_loc = target->message_loc,
2957 .read_name = 0,
2958 .write_name = target->name,
2959 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2960 .value = value
2961 };
2962
2963 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
2964
2965 // Here we're going to free the target, since it is no longer necessary.
2966 // However, we don't want to call `pm_node_destroy` because we want to keep
2967 // around all of its children since we just reused them.
2968 xfree(target);
2969
2970 return node;
2971}
2972
2977static void
2978pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
2979 if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) {
2980 if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
2981 pm_node_t *node;
2982 PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
2983 if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
2984 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
2985 break;
2986 }
2987 }
2988 }
2989
2990 if (block != NULL) {
2991 pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
2992 }
2993 }
2994}
2995
2999static pm_index_and_write_node_t *
3000pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3001 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3002 pm_index_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_and_write_node_t);
3003
3004 pm_index_arguments_check(parser, target->arguments, target->block);
3005
3006 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3007 *node = (pm_index_and_write_node_t) {
3008 {
3009 .type = PM_INDEX_AND_WRITE_NODE,
3010 .flags = target->base.flags,
3011 .node_id = PM_NODE_IDENTIFY(parser),
3012 .location = {
3013 .start = target->base.location.start,
3014 .end = value->location.end
3015 }
3016 },
3017 .receiver = target->receiver,
3018 .call_operator_loc = target->call_operator_loc,
3019 .opening_loc = target->opening_loc,
3020 .arguments = target->arguments,
3021 .closing_loc = target->closing_loc,
3022 .block = (pm_block_argument_node_t *) target->block,
3023 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3024 .value = value
3025 };
3026
3027 // Here we're going to free the target, since it is no longer necessary.
3028 // However, we don't want to call `pm_node_destroy` because we want to keep
3029 // around all of its children since we just reused them.
3030 xfree(target);
3031
3032 return node;
3033}
3034
3038static pm_call_operator_write_node_t *
3039pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3040 assert(target->block == NULL);
3041 pm_call_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_operator_write_node_t);
3042
3043 *node = (pm_call_operator_write_node_t) {
3044 {
3045 .type = PM_CALL_OPERATOR_WRITE_NODE,
3046 .flags = target->base.flags,
3047 .node_id = PM_NODE_IDENTIFY(parser),
3048 .location = {
3049 .start = target->base.location.start,
3050 .end = value->location.end
3051 }
3052 },
3053 .receiver = target->receiver,
3054 .call_operator_loc = target->call_operator_loc,
3055 .message_loc = target->message_loc,
3056 .read_name = 0,
3057 .write_name = target->name,
3058 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3059 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3060 .value = value
3061 };
3062
3063 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3064
3065 // Here we're going to free the target, since it is no longer necessary.
3066 // However, we don't want to call `pm_node_destroy` because we want to keep
3067 // around all of its children since we just reused them.
3068 xfree(target);
3069
3070 return node;
3071}
3072
3076static pm_index_operator_write_node_t *
3077pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3078 pm_index_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_operator_write_node_t);
3079
3080 pm_index_arguments_check(parser, target->arguments, target->block);
3081
3082 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3083 *node = (pm_index_operator_write_node_t) {
3084 {
3085 .type = PM_INDEX_OPERATOR_WRITE_NODE,
3086 .flags = target->base.flags,
3087 .node_id = PM_NODE_IDENTIFY(parser),
3088 .location = {
3089 .start = target->base.location.start,
3090 .end = value->location.end
3091 }
3092 },
3093 .receiver = target->receiver,
3094 .call_operator_loc = target->call_operator_loc,
3095 .opening_loc = target->opening_loc,
3096 .arguments = target->arguments,
3097 .closing_loc = target->closing_loc,
3098 .block = (pm_block_argument_node_t *) target->block,
3099 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3100 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3101 .value = value
3102 };
3103
3104 // Here we're going to free the target, since it is no longer necessary.
3105 // However, we don't want to call `pm_node_destroy` because we want to keep
3106 // around all of its children since we just reused them.
3107 xfree(target);
3108
3109 return node;
3110}
3111
3115static pm_call_or_write_node_t *
3116pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3117 assert(target->block == NULL);
3118 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3119 pm_call_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_or_write_node_t);
3120
3121 *node = (pm_call_or_write_node_t) {
3122 {
3123 .type = PM_CALL_OR_WRITE_NODE,
3124 .flags = target->base.flags,
3125 .node_id = PM_NODE_IDENTIFY(parser),
3126 .location = {
3127 .start = target->base.location.start,
3128 .end = value->location.end
3129 }
3130 },
3131 .receiver = target->receiver,
3132 .call_operator_loc = target->call_operator_loc,
3133 .message_loc = target->message_loc,
3134 .read_name = 0,
3135 .write_name = target->name,
3136 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3137 .value = value
3138 };
3139
3140 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3141
3142 // Here we're going to free the target, since it is no longer necessary.
3143 // However, we don't want to call `pm_node_destroy` because we want to keep
3144 // around all of its children since we just reused them.
3145 xfree(target);
3146
3147 return node;
3148}
3149
3153static pm_index_or_write_node_t *
3154pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3155 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3156 pm_index_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_or_write_node_t);
3157
3158 pm_index_arguments_check(parser, target->arguments, target->block);
3159
3160 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3161 *node = (pm_index_or_write_node_t) {
3162 {
3163 .type = PM_INDEX_OR_WRITE_NODE,
3164 .flags = target->base.flags,
3165 .node_id = PM_NODE_IDENTIFY(parser),
3166 .location = {
3167 .start = target->base.location.start,
3168 .end = value->location.end
3169 }
3170 },
3171 .receiver = target->receiver,
3172 .call_operator_loc = target->call_operator_loc,
3173 .opening_loc = target->opening_loc,
3174 .arguments = target->arguments,
3175 .closing_loc = target->closing_loc,
3176 .block = (pm_block_argument_node_t *) target->block,
3177 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3178 .value = value
3179 };
3180
3181 // Here we're going to free the target, since it is no longer necessary.
3182 // However, we don't want to call `pm_node_destroy` because we want to keep
3183 // around all of its children since we just reused them.
3184 xfree(target);
3185
3186 return node;
3187}
3188
3193static pm_call_target_node_t *
3194pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3195 pm_call_target_node_t *node = PM_NODE_ALLOC(parser, pm_call_target_node_t);
3196
3197 *node = (pm_call_target_node_t) {
3198 {
3199 .type = PM_CALL_TARGET_NODE,
3200 .flags = target->base.flags,
3201 .node_id = PM_NODE_IDENTIFY(parser),
3202 .location = target->base.location
3203 },
3204 .receiver = target->receiver,
3205 .call_operator_loc = target->call_operator_loc,
3206 .name = target->name,
3207 .message_loc = target->message_loc
3208 };
3209
3210 // Here we're going to free the target, since it is no longer necessary.
3211 // However, we don't want to call `pm_node_destroy` because we want to keep
3212 // around all of its children since we just reused them.
3213 xfree(target);
3214
3215 return node;
3216}
3217
3222static pm_index_target_node_t *
3223pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3224 pm_index_target_node_t *node = PM_NODE_ALLOC(parser, pm_index_target_node_t);
3225 pm_node_flags_t flags = target->base.flags;
3226
3227 pm_index_arguments_check(parser, target->arguments, target->block);
3228
3229 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3230 *node = (pm_index_target_node_t) {
3231 {
3232 .type = PM_INDEX_TARGET_NODE,
3233 .flags = flags | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
3234 .node_id = PM_NODE_IDENTIFY(parser),
3235 .location = target->base.location
3236 },
3237 .receiver = target->receiver,
3238 .opening_loc = target->opening_loc,
3239 .arguments = target->arguments,
3240 .closing_loc = target->closing_loc,
3241 .block = (pm_block_argument_node_t *) target->block,
3242 };
3243
3244 // Here we're going to free the target, since it is no longer necessary.
3245 // However, we don't want to call `pm_node_destroy` because we want to keep
3246 // around all of its children since we just reused them.
3247 xfree(target);
3248
3249 return node;
3250}
3251
3255static pm_capture_pattern_node_t *
3256pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
3257 pm_capture_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_capture_pattern_node_t);
3258
3259 *node = (pm_capture_pattern_node_t) {
3260 {
3261 .type = PM_CAPTURE_PATTERN_NODE,
3262 .node_id = PM_NODE_IDENTIFY(parser),
3263 .location = {
3264 .start = value->location.start,
3265 .end = target->base.location.end
3266 },
3267 },
3268 .value = value,
3269 .target = target,
3270 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
3271 };
3272
3273 return node;
3274}
3275
3279static pm_case_node_t *
3280pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3281 pm_case_node_t *node = PM_NODE_ALLOC(parser, pm_case_node_t);
3282
3283 *node = (pm_case_node_t) {
3284 {
3285 .type = PM_CASE_NODE,
3286 .node_id = PM_NODE_IDENTIFY(parser),
3287 .location = {
3288 .start = case_keyword->start,
3289 .end = end_keyword->end
3290 },
3291 },
3292 .predicate = predicate,
3293 .else_clause = NULL,
3294 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3295 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3296 .conditions = { 0 }
3297 };
3298
3299 return node;
3300}
3301
3305static void
3306pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
3307 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
3308
3309 pm_node_list_append(&node->conditions, condition);
3310 node->base.location.end = condition->location.end;
3311}
3312
3316static void
3317pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
3318 node->else_clause = else_clause;
3319 node->base.location.end = else_clause->base.location.end;
3320}
3321
3325static void
3326pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_keyword) {
3327 node->base.location.end = end_keyword->end;
3328 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3329}
3330
3334static pm_case_match_node_t *
3335pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3336 pm_case_match_node_t *node = PM_NODE_ALLOC(parser, pm_case_match_node_t);
3337
3338 *node = (pm_case_match_node_t) {
3339 {
3340 .type = PM_CASE_MATCH_NODE,
3341 .node_id = PM_NODE_IDENTIFY(parser),
3342 .location = {
3343 .start = case_keyword->start,
3344 .end = end_keyword->end
3345 },
3346 },
3347 .predicate = predicate,
3348 .else_clause = NULL,
3349 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3350 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3351 .conditions = { 0 }
3352 };
3353
3354 return node;
3355}
3356
3360static void
3361pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condition) {
3362 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
3363
3364 pm_node_list_append(&node->conditions, condition);
3365 node->base.location.end = condition->location.end;
3366}
3367
3371static void
3372pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
3373 node->else_clause = else_clause;
3374 node->base.location.end = else_clause->base.location.end;
3375}
3376
3380static void
3381pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_token_t *end_keyword) {
3382 node->base.location.end = end_keyword->end;
3383 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3384}
3385
3389static pm_class_node_t *
3390pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
3391 pm_class_node_t *node = PM_NODE_ALLOC(parser, pm_class_node_t);
3392
3393 *node = (pm_class_node_t) {
3394 {
3395 .type = PM_CLASS_NODE,
3396 .node_id = PM_NODE_IDENTIFY(parser),
3397 .location = { .start = class_keyword->start, .end = end_keyword->end },
3398 },
3399 .locals = *locals,
3400 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
3401 .constant_path = constant_path,
3402 .inheritance_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator),
3403 .superclass = superclass,
3404 .body = body,
3405 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3406 .name = pm_parser_constant_id_token(parser, name)
3407 };
3408
3409 return node;
3410}
3411
3415static pm_class_variable_and_write_node_t *
3416pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3417 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3418 pm_class_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_and_write_node_t);
3419
3420 *node = (pm_class_variable_and_write_node_t) {
3421 {
3422 .type = PM_CLASS_VARIABLE_AND_WRITE_NODE,
3423 .node_id = PM_NODE_IDENTIFY(parser),
3424 .location = {
3425 .start = target->base.location.start,
3426 .end = value->location.end
3427 }
3428 },
3429 .name = target->name,
3430 .name_loc = target->base.location,
3431 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3432 .value = value
3433 };
3434
3435 return node;
3436}
3437
3441static pm_class_variable_operator_write_node_t *
3442pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3443 pm_class_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_operator_write_node_t);
3444
3445 *node = (pm_class_variable_operator_write_node_t) {
3446 {
3447 .type = PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE,
3448 .node_id = PM_NODE_IDENTIFY(parser),
3449 .location = {
3450 .start = target->base.location.start,
3451 .end = value->location.end
3452 }
3453 },
3454 .name = target->name,
3455 .name_loc = target->base.location,
3456 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3457 .value = value,
3458 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3459 };
3460
3461 return node;
3462}
3463
3467static pm_class_variable_or_write_node_t *
3468pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3469 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3470 pm_class_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_or_write_node_t);
3471
3472 *node = (pm_class_variable_or_write_node_t) {
3473 {
3474 .type = PM_CLASS_VARIABLE_OR_WRITE_NODE,
3475 .node_id = PM_NODE_IDENTIFY(parser),
3476 .location = {
3477 .start = target->base.location.start,
3478 .end = value->location.end
3479 }
3480 },
3481 .name = target->name,
3482 .name_loc = target->base.location,
3483 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3484 .value = value
3485 };
3486
3487 return node;
3488}
3489
3493static pm_class_variable_read_node_t *
3494pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
3495 assert(token->type == PM_TOKEN_CLASS_VARIABLE);
3496 pm_class_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_read_node_t);
3497
3498 *node = (pm_class_variable_read_node_t) {
3499 {
3500 .type = PM_CLASS_VARIABLE_READ_NODE,
3501 .node_id = PM_NODE_IDENTIFY(parser),
3502 .location = PM_LOCATION_TOKEN_VALUE(token)
3503 },
3504 .name = pm_parser_constant_id_token(parser, token)
3505 };
3506
3507 return node;
3508}
3509
3516static inline pm_node_flags_t
3517pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
3518 if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.start == NULL) {
3519 return flags;
3520 }
3521 return 0;
3522}
3523
3527static pm_class_variable_write_node_t *
3528pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
3529 pm_class_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_write_node_t);
3530
3531 *node = (pm_class_variable_write_node_t) {
3532 {
3533 .type = PM_CLASS_VARIABLE_WRITE_NODE,
3534 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3535 .node_id = PM_NODE_IDENTIFY(parser),
3536 .location = {
3537 .start = read_node->base.location.start,
3538 .end = value->location.end
3539 },
3540 },
3541 .name = read_node->name,
3542 .name_loc = PM_LOCATION_NODE_VALUE((pm_node_t *) read_node),
3543 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3544 .value = value
3545 };
3546
3547 return node;
3548}
3549
3553static pm_constant_path_and_write_node_t *
3554pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3555 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3556 pm_constant_path_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_and_write_node_t);
3557
3558 *node = (pm_constant_path_and_write_node_t) {
3559 {
3560 .type = PM_CONSTANT_PATH_AND_WRITE_NODE,
3561 .node_id = PM_NODE_IDENTIFY(parser),
3562 .location = {
3563 .start = target->base.location.start,
3564 .end = value->location.end
3565 }
3566 },
3567 .target = target,
3568 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3569 .value = value
3570 };
3571
3572 return node;
3573}
3574
3578static pm_constant_path_operator_write_node_t *
3579pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3580 pm_constant_path_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_operator_write_node_t);
3581
3582 *node = (pm_constant_path_operator_write_node_t) {
3583 {
3584 .type = PM_CONSTANT_PATH_OPERATOR_WRITE_NODE,
3585 .node_id = PM_NODE_IDENTIFY(parser),
3586 .location = {
3587 .start = target->base.location.start,
3588 .end = value->location.end
3589 }
3590 },
3591 .target = target,
3592 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3593 .value = value,
3594 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3595 };
3596
3597 return node;
3598}
3599
3603static pm_constant_path_or_write_node_t *
3604pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3605 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3606 pm_constant_path_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_or_write_node_t);
3607
3608 *node = (pm_constant_path_or_write_node_t) {
3609 {
3610 .type = PM_CONSTANT_PATH_OR_WRITE_NODE,
3611 .node_id = PM_NODE_IDENTIFY(parser),
3612 .location = {
3613 .start = target->base.location.start,
3614 .end = value->location.end
3615 }
3616 },
3617 .target = target,
3618 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3619 .value = value
3620 };
3621
3622 return node;
3623}
3624
3628static pm_constant_path_node_t *
3629pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3630 pm_assert_value_expression(parser, parent);
3631 pm_constant_path_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_node_t);
3632
3633 pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3634 if (name_token->type == PM_TOKEN_CONSTANT) {
3635 name = pm_parser_constant_id_token(parser, name_token);
3636 }
3637
3638 *node = (pm_constant_path_node_t) {
3639 {
3640 .type = PM_CONSTANT_PATH_NODE,
3641 .node_id = PM_NODE_IDENTIFY(parser),
3642 .location = {
3643 .start = parent == NULL ? delimiter->start : parent->location.start,
3644 .end = name_token->end
3645 },
3646 },
3647 .parent = parent,
3648 .name = name,
3649 .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
3650 .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
3651 };
3652
3653 return node;
3654}
3655
3659static pm_constant_path_write_node_t *
3660pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3661 pm_constant_path_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_write_node_t);
3662
3663 *node = (pm_constant_path_write_node_t) {
3664 {
3665 .type = PM_CONSTANT_PATH_WRITE_NODE,
3666 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3667 .node_id = PM_NODE_IDENTIFY(parser),
3668 .location = {
3669 .start = target->base.location.start,
3670 .end = value->location.end
3671 },
3672 },
3673 .target = target,
3674 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3675 .value = value
3676 };
3677
3678 return node;
3679}
3680
3684static pm_constant_and_write_node_t *
3685pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3686 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3687 pm_constant_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_and_write_node_t);
3688
3689 *node = (pm_constant_and_write_node_t) {
3690 {
3691 .type = PM_CONSTANT_AND_WRITE_NODE,
3692 .node_id = PM_NODE_IDENTIFY(parser),
3693 .location = {
3694 .start = target->base.location.start,
3695 .end = value->location.end
3696 }
3697 },
3698 .name = target->name,
3699 .name_loc = target->base.location,
3700 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3701 .value = value
3702 };
3703
3704 return node;
3705}
3706
3710static pm_constant_operator_write_node_t *
3711pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3712 pm_constant_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_operator_write_node_t);
3713
3714 *node = (pm_constant_operator_write_node_t) {
3715 {
3716 .type = PM_CONSTANT_OPERATOR_WRITE_NODE,
3717 .node_id = PM_NODE_IDENTIFY(parser),
3718 .location = {
3719 .start = target->base.location.start,
3720 .end = value->location.end
3721 }
3722 },
3723 .name = target->name,
3724 .name_loc = target->base.location,
3725 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3726 .value = value,
3727 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3728 };
3729
3730 return node;
3731}
3732
3736static pm_constant_or_write_node_t *
3737pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3738 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3739 pm_constant_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_or_write_node_t);
3740
3741 *node = (pm_constant_or_write_node_t) {
3742 {
3743 .type = PM_CONSTANT_OR_WRITE_NODE,
3744 .node_id = PM_NODE_IDENTIFY(parser),
3745 .location = {
3746 .start = target->base.location.start,
3747 .end = value->location.end
3748 }
3749 },
3750 .name = target->name,
3751 .name_loc = target->base.location,
3752 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3753 .value = value
3754 };
3755
3756 return node;
3757}
3758
3762static pm_constant_read_node_t *
3763pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
3764 assert(name->type == PM_TOKEN_CONSTANT || name->type == PM_TOKEN_MISSING);
3765 pm_constant_read_node_t *node = PM_NODE_ALLOC(parser, pm_constant_read_node_t);
3766
3767 *node = (pm_constant_read_node_t) {
3768 {
3769 .type = PM_CONSTANT_READ_NODE,
3770 .node_id = PM_NODE_IDENTIFY(parser),
3771 .location = PM_LOCATION_TOKEN_VALUE(name)
3772 },
3773 .name = pm_parser_constant_id_token(parser, name)
3774 };
3775
3776 return node;
3777}
3778
3782static pm_constant_write_node_t *
3783pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3784 pm_constant_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_write_node_t);
3785
3786 *node = (pm_constant_write_node_t) {
3787 {
3788 .type = PM_CONSTANT_WRITE_NODE,
3789 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3790 .node_id = PM_NODE_IDENTIFY(parser),
3791 .location = {
3792 .start = target->base.location.start,
3793 .end = value->location.end
3794 }
3795 },
3796 .name = target->name,
3797 .name_loc = target->base.location,
3798 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3799 .value = value
3800 };
3801
3802 return node;
3803}
3804
3808static void
3809pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3810 switch (PM_NODE_TYPE(node)) {
3811 case PM_BEGIN_NODE: {
3812 const pm_begin_node_t *cast = (pm_begin_node_t *) node;
3813 if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements);
3814 break;
3815 }
3816 case PM_PARENTHESES_NODE: {
3817 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
3818 if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
3819 break;
3820 }
3821 case PM_STATEMENTS_NODE: {
3822 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
3823 pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
3824 break;
3825 }
3826 case PM_ARRAY_NODE:
3827 case PM_FLOAT_NODE:
3828 case PM_IMAGINARY_NODE:
3829 case PM_INTEGER_NODE:
3830 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
3831 case PM_INTERPOLATED_STRING_NODE:
3832 case PM_INTERPOLATED_SYMBOL_NODE:
3833 case PM_INTERPOLATED_X_STRING_NODE:
3834 case PM_RATIONAL_NODE:
3835 case PM_REGULAR_EXPRESSION_NODE:
3836 case PM_SOURCE_ENCODING_NODE:
3837 case PM_SOURCE_FILE_NODE:
3838 case PM_SOURCE_LINE_NODE:
3839 case PM_STRING_NODE:
3840 case PM_SYMBOL_NODE:
3841 case PM_X_STRING_NODE:
3842 pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
3843 break;
3844 default:
3845 break;
3846 }
3847}
3848
3852static pm_def_node_t *
3853pm_def_node_create(
3854 pm_parser_t *parser,
3855 pm_constant_id_t name,
3856 const pm_token_t *name_loc,
3857 pm_node_t *receiver,
3858 pm_parameters_node_t *parameters,
3859 pm_node_t *body,
3860 pm_constant_id_list_t *locals,
3861 const pm_token_t *def_keyword,
3862 const pm_token_t *operator,
3863 const pm_token_t *lparen,
3864 const pm_token_t *rparen,
3865 const pm_token_t *equal,
3866 const pm_token_t *end_keyword
3867) {
3868 pm_def_node_t *node = PM_NODE_ALLOC(parser, pm_def_node_t);
3869 const uint8_t *end;
3870
3871 if (end_keyword->type == PM_TOKEN_NOT_PROVIDED) {
3872 end = body->location.end;
3873 } else {
3874 end = end_keyword->end;
3875 }
3876
3877 if (receiver != NULL) {
3878 pm_def_node_receiver_check(parser, receiver);
3879 }
3880
3881 *node = (pm_def_node_t) {
3882 {
3883 .type = PM_DEF_NODE,
3884 .node_id = PM_NODE_IDENTIFY(parser),
3885 .location = { .start = def_keyword->start, .end = end },
3886 },
3887 .name = name,
3888 .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
3889 .receiver = receiver,
3890 .parameters = parameters,
3891 .body = body,
3892 .locals = *locals,
3893 .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
3894 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3895 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3896 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3897 .equal_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(equal),
3898 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3899 };
3900
3901 return node;
3902}
3903
3907static pm_defined_node_t *
3908pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_location_t *keyword_loc) {
3909 pm_defined_node_t *node = PM_NODE_ALLOC(parser, pm_defined_node_t);
3910
3911 *node = (pm_defined_node_t) {
3912 {
3913 .type = PM_DEFINED_NODE,
3914 .node_id = PM_NODE_IDENTIFY(parser),
3915 .location = {
3916 .start = keyword_loc->start,
3917 .end = (rparen->type == PM_TOKEN_NOT_PROVIDED ? value->location.end : rparen->end)
3918 },
3919 },
3920 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3921 .value = value,
3922 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3923 .keyword_loc = *keyword_loc
3924 };
3925
3926 return node;
3927}
3928
3932static pm_else_node_t *
3933pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3934 pm_else_node_t *node = PM_NODE_ALLOC(parser, pm_else_node_t);
3935 const uint8_t *end = NULL;
3936 if ((end_keyword->type == PM_TOKEN_NOT_PROVIDED) && (statements != NULL)) {
3937 end = statements->base.location.end;
3938 } else {
3939 end = end_keyword->end;
3940 }
3941
3942 *node = (pm_else_node_t) {
3943 {
3944 .type = PM_ELSE_NODE,
3945 .node_id = PM_NODE_IDENTIFY(parser),
3946 .location = {
3947 .start = else_keyword->start,
3948 .end = end,
3949 },
3950 },
3951 .else_keyword_loc = PM_LOCATION_TOKEN_VALUE(else_keyword),
3952 .statements = statements,
3953 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3954 };
3955
3956 return node;
3957}
3958
3962static pm_embedded_statements_node_t *
3963pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
3964 pm_embedded_statements_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_statements_node_t);
3965
3966 *node = (pm_embedded_statements_node_t) {
3967 {
3968 .type = PM_EMBEDDED_STATEMENTS_NODE,
3969 .node_id = PM_NODE_IDENTIFY(parser),
3970 .location = {
3971 .start = opening->start,
3972 .end = closing->end
3973 }
3974 },
3975 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
3976 .statements = statements,
3977 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
3978 };
3979
3980 return node;
3981}
3982
3986static pm_embedded_variable_node_t *
3987pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
3988 pm_embedded_variable_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_variable_node_t);
3989
3990 *node = (pm_embedded_variable_node_t) {
3991 {
3992 .type = PM_EMBEDDED_VARIABLE_NODE,
3993 .node_id = PM_NODE_IDENTIFY(parser),
3994 .location = {
3995 .start = operator->start,
3996 .end = variable->location.end
3997 }
3998 },
3999 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4000 .variable = variable
4001 };
4002
4003 return node;
4004}
4005
4009static pm_ensure_node_t *
4010pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
4011 pm_ensure_node_t *node = PM_NODE_ALLOC(parser, pm_ensure_node_t);
4012
4013 *node = (pm_ensure_node_t) {
4014 {
4015 .type = PM_ENSURE_NODE,
4016 .node_id = PM_NODE_IDENTIFY(parser),
4017 .location = {
4018 .start = ensure_keyword->start,
4019 .end = end_keyword->end
4020 },
4021 },
4022 .ensure_keyword_loc = PM_LOCATION_TOKEN_VALUE(ensure_keyword),
4023 .statements = statements,
4024 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4025 };
4026
4027 return node;
4028}
4029
4033static pm_false_node_t *
4034pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
4035 assert(token->type == PM_TOKEN_KEYWORD_FALSE);
4036 pm_false_node_t *node = PM_NODE_ALLOC(parser, pm_false_node_t);
4037
4038 *node = (pm_false_node_t) {{
4039 .type = PM_FALSE_NODE,
4040 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4041 .node_id = PM_NODE_IDENTIFY(parser),
4042 .location = PM_LOCATION_TOKEN_VALUE(token)
4043 }};
4044
4045 return node;
4046}
4047
4052static pm_find_pattern_node_t *
4053pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
4054 pm_find_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_find_pattern_node_t);
4055
4056 pm_node_t *left = nodes->nodes[0];
4057 assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
4058 pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left;
4059
4060 pm_node_t *right;
4061
4062 if (nodes->size == 1) {
4063 right = (pm_node_t *) pm_missing_node_create(parser, left->location.end, left->location.end);
4064 } else {
4065 right = nodes->nodes[nodes->size - 1];
4066 assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
4067 }
4068
4069#if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
4070 // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode.
4071 // The resulting AST will anyway be ignored, but this file still needs to compile.
4072 pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node;
4073#else
4074 pm_node_t *right_splat_node = right;
4075#endif
4076 *node = (pm_find_pattern_node_t) {
4077 {
4078 .type = PM_FIND_PATTERN_NODE,
4079 .node_id = PM_NODE_IDENTIFY(parser),
4080 .location = {
4081 .start = left->location.start,
4082 .end = right->location.end,
4083 },
4084 },
4085 .constant = NULL,
4086 .left = left_splat_node,
4087 .right = right_splat_node,
4088 .requireds = { 0 },
4089 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4090 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4091 };
4092
4093 // For now we're going to just copy over each pointer manually. This could be
4094 // much more efficient, as we could instead resize the node list to only point
4095 // to 1...-1.
4096 for (size_t index = 1; index < nodes->size - 1; index++) {
4097 pm_node_list_append(&node->requireds, nodes->nodes[index]);
4098 }
4099
4100 return node;
4101}
4102
4107static double
4108pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
4109 ptrdiff_t diff = token->end - token->start;
4110 if (diff <= 0) return 0.0;
4111
4112 // First, get a buffer of the content.
4113 size_t length = (size_t) diff;
4114 char *buffer = xmalloc(sizeof(char) * (length + 1));
4115 memcpy((void *) buffer, token->start, length);
4116
4117 // Next, determine if we need to replace the decimal point because of
4118 // locale-specific options, and then normalize them if we have to.
4119 char decimal_point = *localeconv()->decimal_point;
4120 if (decimal_point != '.') {
4121 for (size_t index = 0; index < length; index++) {
4122 if (buffer[index] == '.') buffer[index] = decimal_point;
4123 }
4124 }
4125
4126 // Next, handle underscores by removing them from the buffer.
4127 for (size_t index = 0; index < length; index++) {
4128 if (buffer[index] == '_') {
4129 memmove((void *) (buffer + index), (void *) (buffer + index + 1), length - index);
4130 length--;
4131 }
4132 }
4133
4134 // Null-terminate the buffer so that strtod cannot read off the end.
4135 buffer[length] = '\0';
4136
4137 // Now, call strtod to parse the value. Note that CRuby has their own
4138 // version of strtod which avoids locales. We're okay using the locale-aware
4139 // version because we've already validated through the parser that the token
4140 // is in a valid format.
4141 errno = 0;
4142 char *eptr;
4143 double value = strtod(buffer, &eptr);
4144
4145 // This should never happen, because we've already checked that the token
4146 // is in a valid format. However it's good to be safe.
4147 if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
4148 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, (*token), PM_ERR_FLOAT_PARSE);
4149 xfree((void *) buffer);
4150 return 0.0;
4151 }
4152
4153 // If errno is set, then it should only be ERANGE. At this point we need to
4154 // check if it's infinity (it should be).
4155 if (errno == ERANGE && PRISM_ISINF(value)) {
4156 int warn_width;
4157 const char *ellipsis;
4158
4159 if (length > 20) {
4160 warn_width = 20;
4161 ellipsis = "...";
4162 } else {
4163 warn_width = (int) length;
4164 ellipsis = "";
4165 }
4166
4167 pm_diagnostic_list_append_format(&parser->warning_list, token->start, token->end, PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
4168 value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
4169 }
4170
4171 // Finally we can free the buffer and return the value.
4172 xfree((void *) buffer);
4173 return value;
4174}
4175
4179static pm_float_node_t *
4180pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
4181 assert(token->type == PM_TOKEN_FLOAT);
4182 pm_float_node_t *node = PM_NODE_ALLOC(parser, pm_float_node_t);
4183
4184 *node = (pm_float_node_t) {
4185 {
4186 .type = PM_FLOAT_NODE,
4187 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4188 .node_id = PM_NODE_IDENTIFY(parser),
4189 .location = PM_LOCATION_TOKEN_VALUE(token)
4190 },
4191 .value = pm_double_parse(parser, token)
4192 };
4193
4194 return node;
4195}
4196
4200static pm_imaginary_node_t *
4201pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4202 assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
4203
4204 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4205 *node = (pm_imaginary_node_t) {
4206 {
4207 .type = PM_IMAGINARY_NODE,
4208 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4209 .node_id = PM_NODE_IDENTIFY(parser),
4210 .location = PM_LOCATION_TOKEN_VALUE(token)
4211 },
4212 .numeric = (pm_node_t *) pm_float_node_create(parser, &((pm_token_t) {
4213 .type = PM_TOKEN_FLOAT,
4214 .start = token->start,
4215 .end = token->end - 1
4216 }))
4217 };
4218
4219 return node;
4220}
4221
4225static pm_rational_node_t *
4226pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
4227 assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
4228
4229 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4230 *node = (pm_rational_node_t) {
4231 {
4232 .type = PM_RATIONAL_NODE,
4233 .flags = PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
4234 .node_id = PM_NODE_IDENTIFY(parser),
4235 .location = PM_LOCATION_TOKEN_VALUE(token)
4236 },
4237 .numerator = { 0 },
4238 .denominator = { 0 }
4239 };
4240
4241 const uint8_t *start = token->start;
4242 const uint8_t *end = token->end - 1; // r
4243
4244 while (start < end && *start == '0') start++; // 0.1 -> .1
4245 while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
4246
4247 size_t length = (size_t) (end - start);
4248 if (length == 1) {
4249 node->denominator.value = 1;
4250 return node;
4251 }
4252
4253 const uint8_t *point = memchr(start, '.', length);
4254 assert(point && "should have a decimal point");
4255
4256 uint8_t *digits = xmalloc(length);
4257 if (digits == NULL) {
4258 fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
4259 abort();
4260 }
4261
4262 memcpy(digits, start, (unsigned long) (point - start));
4263 memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
4264 pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
4265
4266 digits[0] = '1';
4267 if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
4268 pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
4269 xfree(digits);
4270
4271 pm_integers_reduce(&node->numerator, &node->denominator);
4272 return node;
4273}
4274
4279static pm_imaginary_node_t *
4280pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4281 assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
4282
4283 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4284 *node = (pm_imaginary_node_t) {
4285 {
4286 .type = PM_IMAGINARY_NODE,
4287 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4288 .node_id = PM_NODE_IDENTIFY(parser),
4289 .location = PM_LOCATION_TOKEN_VALUE(token)
4290 },
4291 .numeric = (pm_node_t *) pm_float_node_rational_create(parser, &((pm_token_t) {
4292 .type = PM_TOKEN_FLOAT_RATIONAL,
4293 .start = token->start,
4294 .end = token->end - 1
4295 }))
4296 };
4297
4298 return node;
4299}
4300
4304static pm_for_node_t *
4305pm_for_node_create(
4306 pm_parser_t *parser,
4307 pm_node_t *index,
4308 pm_node_t *collection,
4309 pm_statements_node_t *statements,
4310 const pm_token_t *for_keyword,
4311 const pm_token_t *in_keyword,
4312 const pm_token_t *do_keyword,
4313 const pm_token_t *end_keyword
4314) {
4315 pm_for_node_t *node = PM_NODE_ALLOC(parser, pm_for_node_t);
4316
4317 *node = (pm_for_node_t) {
4318 {
4319 .type = PM_FOR_NODE,
4320 .node_id = PM_NODE_IDENTIFY(parser),
4321 .location = {
4322 .start = for_keyword->start,
4323 .end = end_keyword->end
4324 },
4325 },
4326 .index = index,
4327 .collection = collection,
4328 .statements = statements,
4329 .for_keyword_loc = PM_LOCATION_TOKEN_VALUE(for_keyword),
4330 .in_keyword_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
4331 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
4332 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4333 };
4334
4335 return node;
4336}
4337
4341static pm_forwarding_arguments_node_t *
4342pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
4343 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4344 pm_forwarding_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_arguments_node_t);
4345
4346 *node = (pm_forwarding_arguments_node_t) {{
4347 .type = PM_FORWARDING_ARGUMENTS_NODE,
4348 .node_id = PM_NODE_IDENTIFY(parser),
4349 .location = PM_LOCATION_TOKEN_VALUE(token)
4350 }};
4351
4352 return node;
4353}
4354
4358static pm_forwarding_parameter_node_t *
4359pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
4360 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4361 pm_forwarding_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_parameter_node_t);
4362
4363 *node = (pm_forwarding_parameter_node_t) {{
4364 .type = PM_FORWARDING_PARAMETER_NODE,
4365 .node_id = PM_NODE_IDENTIFY(parser),
4366 .location = PM_LOCATION_TOKEN_VALUE(token)
4367 }};
4368
4369 return node;
4370}
4371
4375static pm_forwarding_super_node_t *
4376pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
4377 assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
4378 assert(token->type == PM_TOKEN_KEYWORD_SUPER);
4379 pm_forwarding_super_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_super_node_t);
4380
4381 pm_block_node_t *block = NULL;
4382 if (arguments->block != NULL) {
4383 block = (pm_block_node_t *) arguments->block;
4384 }
4385
4386 *node = (pm_forwarding_super_node_t) {
4387 {
4388 .type = PM_FORWARDING_SUPER_NODE,
4389 .node_id = PM_NODE_IDENTIFY(parser),
4390 .location = {
4391 .start = token->start,
4392 .end = block != NULL ? block->base.location.end : token->end
4393 },
4394 },
4395 .block = block
4396 };
4397
4398 return node;
4399}
4400
4405static pm_hash_pattern_node_t *
4406pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4407 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4408
4409 *node = (pm_hash_pattern_node_t) {
4410 {
4411 .type = PM_HASH_PATTERN_NODE,
4412 .node_id = PM_NODE_IDENTIFY(parser),
4413 .location = {
4414 .start = opening->start,
4415 .end = closing->end
4416 },
4417 },
4418 .constant = NULL,
4419 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4420 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
4421 .elements = { 0 },
4422 .rest = NULL
4423 };
4424
4425 return node;
4426}
4427
4431static pm_hash_pattern_node_t *
4432pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
4433 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4434
4435 const uint8_t *start;
4436 const uint8_t *end;
4437
4438 if (elements->size > 0) {
4439 if (rest) {
4440 start = elements->nodes[0]->location.start;
4441 end = rest->location.end;
4442 } else {
4443 start = elements->nodes[0]->location.start;
4444 end = elements->nodes[elements->size - 1]->location.end;
4445 }
4446 } else {
4447 assert(rest != NULL);
4448 start = rest->location.start;
4449 end = rest->location.end;
4450 }
4451
4452 *node = (pm_hash_pattern_node_t) {
4453 {
4454 .type = PM_HASH_PATTERN_NODE,
4455 .node_id = PM_NODE_IDENTIFY(parser),
4456 .location = {
4457 .start = start,
4458 .end = end
4459 },
4460 },
4461 .constant = NULL,
4462 .elements = { 0 },
4463 .rest = rest,
4464 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4465 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4466 };
4467
4468 pm_node_t *element;
4469 PM_NODE_LIST_FOREACH(elements, index, element) {
4470 pm_node_list_append(&node->elements, element);
4471 }
4472
4473 return node;
4474}
4475
4479static pm_constant_id_t
4480pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
4481 switch (PM_NODE_TYPE(target)) {
4482 case PM_GLOBAL_VARIABLE_READ_NODE:
4483 return ((pm_global_variable_read_node_t *) target)->name;
4484 case PM_BACK_REFERENCE_READ_NODE:
4485 return ((pm_back_reference_read_node_t *) target)->name;
4486 case PM_NUMBERED_REFERENCE_READ_NODE:
4487 // This will only ever happen in the event of a syntax error, but we
4488 // still need to provide something for the node.
4489 return pm_parser_constant_id_location(parser, target->location.start, target->location.end);
4490 default:
4491 assert(false && "unreachable");
4492 return (pm_constant_id_t) -1;
4493 }
4494}
4495
4499static pm_global_variable_and_write_node_t *
4500pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4501 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4502 pm_global_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_and_write_node_t);
4503
4504 *node = (pm_global_variable_and_write_node_t) {
4505 {
4506 .type = PM_GLOBAL_VARIABLE_AND_WRITE_NODE,
4507 .node_id = PM_NODE_IDENTIFY(parser),
4508 .location = {
4509 .start = target->location.start,
4510 .end = value->location.end
4511 }
4512 },
4513 .name = pm_global_variable_write_name(parser, target),
4514 .name_loc = target->location,
4515 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4516 .value = value
4517 };
4518
4519 return node;
4520}
4521
4525static pm_global_variable_operator_write_node_t *
4526pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4527 pm_global_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_operator_write_node_t);
4528
4529 *node = (pm_global_variable_operator_write_node_t) {
4530 {
4531 .type = PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE,
4532 .node_id = PM_NODE_IDENTIFY(parser),
4533 .location = {
4534 .start = target->location.start,
4535 .end = value->location.end
4536 }
4537 },
4538 .name = pm_global_variable_write_name(parser, target),
4539 .name_loc = target->location,
4540 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4541 .value = value,
4542 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4543 };
4544
4545 return node;
4546}
4547
4551static pm_global_variable_or_write_node_t *
4552pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4553 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4554 pm_global_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_or_write_node_t);
4555
4556 *node = (pm_global_variable_or_write_node_t) {
4557 {
4558 .type = PM_GLOBAL_VARIABLE_OR_WRITE_NODE,
4559 .node_id = PM_NODE_IDENTIFY(parser),
4560 .location = {
4561 .start = target->location.start,
4562 .end = value->location.end
4563 }
4564 },
4565 .name = pm_global_variable_write_name(parser, target),
4566 .name_loc = target->location,
4567 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4568 .value = value
4569 };
4570
4571 return node;
4572}
4573
4577static pm_global_variable_read_node_t *
4578pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4579 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4580
4581 *node = (pm_global_variable_read_node_t) {
4582 {
4583 .type = PM_GLOBAL_VARIABLE_READ_NODE,
4584 .node_id = PM_NODE_IDENTIFY(parser),
4585 .location = PM_LOCATION_TOKEN_VALUE(name),
4586 },
4587 .name = pm_parser_constant_id_token(parser, name)
4588 };
4589
4590 return node;
4591}
4592
4596static pm_global_variable_read_node_t *
4597pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
4598 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4599
4600 *node = (pm_global_variable_read_node_t) {
4601 {
4602 .type = PM_GLOBAL_VARIABLE_READ_NODE,
4603 .node_id = PM_NODE_IDENTIFY(parser),
4604 .location = PM_LOCATION_NULL_VALUE(parser)
4605 },
4606 .name = name
4607 };
4608
4609 return node;
4610}
4611
4615static pm_global_variable_write_node_t *
4616pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4617 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4618
4619 *node = (pm_global_variable_write_node_t) {
4620 {
4621 .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4622 .node_id = PM_NODE_IDENTIFY(parser),
4623 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
4624 .location = {
4625 .start = target->location.start,
4626 .end = value->location.end
4627 },
4628 },
4629 .name = pm_global_variable_write_name(parser, target),
4630 .name_loc = PM_LOCATION_NODE_VALUE(target),
4631 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
4632 .value = value
4633 };
4634
4635 return node;
4636}
4637
4641static pm_global_variable_write_node_t *
4642pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
4643 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4644
4645 *node = (pm_global_variable_write_node_t) {
4646 {
4647 .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4648 .node_id = PM_NODE_IDENTIFY(parser),
4649 .location = PM_LOCATION_NULL_VALUE(parser)
4650 },
4651 .name = name,
4652 .name_loc = PM_LOCATION_NULL_VALUE(parser),
4653 .operator_loc = PM_LOCATION_NULL_VALUE(parser),
4654 .value = value
4655 };
4656
4657 return node;
4658}
4659
4663static pm_hash_node_t *
4664pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4665 assert(opening != NULL);
4666 pm_hash_node_t *node = PM_NODE_ALLOC(parser, pm_hash_node_t);
4667
4668 *node = (pm_hash_node_t) {
4669 {
4670 .type = PM_HASH_NODE,
4671 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4672 .node_id = PM_NODE_IDENTIFY(parser),
4673 .location = PM_LOCATION_TOKEN_VALUE(opening)
4674 },
4675 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4676 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
4677 .elements = { 0 }
4678 };
4679
4680 return node;
4681}
4682
4686static inline void
4687pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
4688 pm_node_list_append(&hash->elements, element);
4689
4690 bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
4691 if (static_literal) {
4692 pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
4693 static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
4694 static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
4695 static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
4696 }
4697
4698 if (!static_literal) {
4699 pm_node_flag_unset((pm_node_t *)hash, PM_NODE_FLAG_STATIC_LITERAL);
4700 }
4701}
4702
4703static inline void
4704pm_hash_node_closing_loc_set(pm_hash_node_t *hash, pm_token_t *token) {
4705 hash->base.location.end = token->end;
4706 hash->closing_loc = PM_LOCATION_TOKEN_VALUE(token);
4707}
4708
4712static pm_if_node_t *
4713pm_if_node_create(pm_parser_t *parser,
4714 const pm_token_t *if_keyword,
4715 pm_node_t *predicate,
4716 const pm_token_t *then_keyword,
4717 pm_statements_node_t *statements,
4718 pm_node_t *subsequent,
4719 const pm_token_t *end_keyword
4720) {
4721 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4722 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4723
4724 const uint8_t *end;
4725 if (end_keyword->type != PM_TOKEN_NOT_PROVIDED) {
4726 end = end_keyword->end;
4727 } else if (subsequent != NULL) {
4728 end = subsequent->location.end;
4729 } else if (pm_statements_node_body_length(statements) != 0) {
4730 end = statements->base.location.end;
4731 } else {
4732 end = predicate->location.end;
4733 }
4734
4735 *node = (pm_if_node_t) {
4736 {
4737 .type = PM_IF_NODE,
4738 .flags = PM_NODE_FLAG_NEWLINE,
4739 .node_id = PM_NODE_IDENTIFY(parser),
4740 .location = {
4741 .start = if_keyword->start,
4742 .end = end
4743 },
4744 },
4745 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4746 .predicate = predicate,
4747 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
4748 .statements = statements,
4749 .subsequent = subsequent,
4750 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
4751 };
4752
4753 return node;
4754}
4755
4759static pm_if_node_t *
4760pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
4761 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4762 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4763
4764 pm_statements_node_t *statements = pm_statements_node_create(parser);
4765 pm_statements_node_body_append(parser, statements, statement, true);
4766
4767 *node = (pm_if_node_t) {
4768 {
4769 .type = PM_IF_NODE,
4770 .flags = PM_NODE_FLAG_NEWLINE,
4771 .node_id = PM_NODE_IDENTIFY(parser),
4772 .location = {
4773 .start = statement->location.start,
4774 .end = predicate->location.end
4775 },
4776 },
4777 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4778 .predicate = predicate,
4779 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4780 .statements = statements,
4781 .subsequent = NULL,
4782 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4783 };
4784
4785 return node;
4786}
4787
4791static pm_if_node_t *
4792pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
4793 pm_assert_value_expression(parser, predicate);
4794 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4795
4796 pm_statements_node_t *if_statements = pm_statements_node_create(parser);
4797 pm_statements_node_body_append(parser, if_statements, true_expression, true);
4798
4799 pm_statements_node_t *else_statements = pm_statements_node_create(parser);
4800 pm_statements_node_body_append(parser, else_statements, false_expression, true);
4801
4802 pm_token_t end_keyword = not_provided(parser);
4803 pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, &end_keyword);
4804
4805 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4806
4807 *node = (pm_if_node_t) {
4808 {
4809 .type = PM_IF_NODE,
4810 .flags = PM_NODE_FLAG_NEWLINE,
4811 .node_id = PM_NODE_IDENTIFY(parser),
4812 .location = {
4813 .start = predicate->location.start,
4814 .end = false_expression->location.end,
4815 },
4816 },
4817 .if_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4818 .predicate = predicate,
4819 .then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark),
4820 .statements = if_statements,
4821 .subsequent = (pm_node_t *) else_node,
4822 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4823 };
4824
4825 return node;
4826
4827}
4828
4829static inline void
4830pm_if_node_end_keyword_loc_set(pm_if_node_t *node, const pm_token_t *keyword) {
4831 node->base.location.end = keyword->end;
4832 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4833}
4834
4835static inline void
4836pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword) {
4837 node->base.location.end = keyword->end;
4838 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4839}
4840
4844static pm_implicit_node_t *
4845pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
4846 pm_implicit_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_node_t);
4847
4848 *node = (pm_implicit_node_t) {
4849 {
4850 .type = PM_IMPLICIT_NODE,
4851 .node_id = PM_NODE_IDENTIFY(parser),
4852 .location = value->location
4853 },
4854 .value = value
4855 };
4856
4857 return node;
4858}
4859
4863static pm_implicit_rest_node_t *
4864pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
4865 assert(token->type == PM_TOKEN_COMMA);
4866
4867 pm_implicit_rest_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_rest_node_t);
4868
4869 *node = (pm_implicit_rest_node_t) {
4870 {
4871 .type = PM_IMPLICIT_REST_NODE,
4872 .node_id = PM_NODE_IDENTIFY(parser),
4873 .location = PM_LOCATION_TOKEN_VALUE(token)
4874 }
4875 };
4876
4877 return node;
4878}
4879
4883static pm_integer_node_t *
4884pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4885 assert(token->type == PM_TOKEN_INTEGER);
4886 pm_integer_node_t *node = PM_NODE_ALLOC(parser, pm_integer_node_t);
4887
4888 *node = (pm_integer_node_t) {
4889 {
4890 .type = PM_INTEGER_NODE,
4891 .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4892 .node_id = PM_NODE_IDENTIFY(parser),
4893 .location = PM_LOCATION_TOKEN_VALUE(token)
4894 },
4895 .value = { 0 }
4896 };
4897
4898 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4899 switch (base) {
4900 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4901 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4902 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4903 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4904 default: assert(false && "unreachable"); break;
4905 }
4906
4907 pm_integer_parse(&node->value, integer_base, token->start, token->end);
4908 return node;
4909}
4910
4915static pm_imaginary_node_t *
4916pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4917 assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
4918
4919 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4920 *node = (pm_imaginary_node_t) {
4921 {
4922 .type = PM_IMAGINARY_NODE,
4923 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4924 .node_id = PM_NODE_IDENTIFY(parser),
4925 .location = PM_LOCATION_TOKEN_VALUE(token)
4926 },
4927 .numeric = (pm_node_t *) pm_integer_node_create(parser, base, &((pm_token_t) {
4928 .type = PM_TOKEN_INTEGER,
4929 .start = token->start,
4930 .end = token->end - 1
4931 }))
4932 };
4933
4934 return node;
4935}
4936
4941static pm_rational_node_t *
4942pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4943 assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
4944
4945 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4946 *node = (pm_rational_node_t) {
4947 {
4948 .type = PM_RATIONAL_NODE,
4949 .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4950 .node_id = PM_NODE_IDENTIFY(parser),
4951 .location = PM_LOCATION_TOKEN_VALUE(token)
4952 },
4953 .numerator = { 0 },
4954 .denominator = { .value = 1, 0 }
4955 };
4956
4957 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4958 switch (base) {
4959 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4960 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4961 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4962 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4963 default: assert(false && "unreachable"); break;
4964 }
4965
4966 pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
4967
4968 return node;
4969}
4970
4975static pm_imaginary_node_t *
4976pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4977 assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
4978
4979 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4980 *node = (pm_imaginary_node_t) {
4981 {
4982 .type = PM_IMAGINARY_NODE,
4983 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4984 .node_id = PM_NODE_IDENTIFY(parser),
4985 .location = PM_LOCATION_TOKEN_VALUE(token)
4986 },
4987 .numeric = (pm_node_t *) pm_integer_node_rational_create(parser, base, &((pm_token_t) {
4988 .type = PM_TOKEN_INTEGER_RATIONAL,
4989 .start = token->start,
4990 .end = token->end - 1
4991 }))
4992 };
4993
4994 return node;
4995}
4996
5000static pm_in_node_t *
5001pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
5002 pm_in_node_t *node = PM_NODE_ALLOC(parser, pm_in_node_t);
5003
5004 const uint8_t *end;
5005 if (statements != NULL) {
5006 end = statements->base.location.end;
5007 } else if (then_keyword->type != PM_TOKEN_NOT_PROVIDED) {
5008 end = then_keyword->end;
5009 } else {
5010 end = pattern->location.end;
5011 }
5012
5013 *node = (pm_in_node_t) {
5014 {
5015 .type = PM_IN_NODE,
5016 .node_id = PM_NODE_IDENTIFY(parser),
5017 .location = {
5018 .start = in_keyword->start,
5019 .end = end
5020 },
5021 },
5022 .pattern = pattern,
5023 .statements = statements,
5024 .in_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
5025 .then_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword)
5026 };
5027
5028 return node;
5029}
5030
5034static pm_instance_variable_and_write_node_t *
5035pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5036 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5037 pm_instance_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_and_write_node_t);
5038
5039 *node = (pm_instance_variable_and_write_node_t) {
5040 {
5041 .type = PM_INSTANCE_VARIABLE_AND_WRITE_NODE,
5042 .node_id = PM_NODE_IDENTIFY(parser),
5043 .location = {
5044 .start = target->base.location.start,
5045 .end = value->location.end
5046 }
5047 },
5048 .name = target->name,
5049 .name_loc = target->base.location,
5050 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5051 .value = value
5052 };
5053
5054 return node;
5055}
5056
5060static pm_instance_variable_operator_write_node_t *
5061pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5062 pm_instance_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_operator_write_node_t);
5063
5064 *node = (pm_instance_variable_operator_write_node_t) {
5065 {
5066 .type = PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE,
5067 .node_id = PM_NODE_IDENTIFY(parser),
5068 .location = {
5069 .start = target->base.location.start,
5070 .end = value->location.end
5071 }
5072 },
5073 .name = target->name,
5074 .name_loc = target->base.location,
5075 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5076 .value = value,
5077 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
5078 };
5079
5080 return node;
5081}
5082
5086static pm_instance_variable_or_write_node_t *
5087pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5088 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5089 pm_instance_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_or_write_node_t);
5090
5091 *node = (pm_instance_variable_or_write_node_t) {
5092 {
5093 .type = PM_INSTANCE_VARIABLE_OR_WRITE_NODE,
5094 .node_id = PM_NODE_IDENTIFY(parser),
5095 .location = {
5096 .start = target->base.location.start,
5097 .end = value->location.end
5098 }
5099 },
5100 .name = target->name,
5101 .name_loc = target->base.location,
5102 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5103 .value = value
5104 };
5105
5106 return node;
5107}
5108
5112static pm_instance_variable_read_node_t *
5113pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
5114 assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
5115 pm_instance_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_read_node_t);
5116
5117 *node = (pm_instance_variable_read_node_t) {
5118 {
5119 .type = PM_INSTANCE_VARIABLE_READ_NODE,
5120 .node_id = PM_NODE_IDENTIFY(parser),
5121 .location = PM_LOCATION_TOKEN_VALUE(token)
5122 },
5123 .name = pm_parser_constant_id_token(parser, token)
5124 };
5125
5126 return node;
5127}
5128
5133static pm_instance_variable_write_node_t *
5134pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
5135 pm_instance_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_write_node_t);
5136 *node = (pm_instance_variable_write_node_t) {
5137 {
5138 .type = PM_INSTANCE_VARIABLE_WRITE_NODE,
5139 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5140 .node_id = PM_NODE_IDENTIFY(parser),
5141 .location = {
5142 .start = read_node->base.location.start,
5143 .end = value->location.end
5144 }
5145 },
5146 .name = read_node->name,
5147 .name_loc = PM_LOCATION_NODE_BASE_VALUE(read_node),
5148 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
5149 .value = value
5150 };
5151
5152 return node;
5153}
5154
5160static void
5161pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
5162 switch (PM_NODE_TYPE(part)) {
5163 case PM_STRING_NODE:
5164 pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5165 break;
5166 case PM_EMBEDDED_STATEMENTS_NODE: {
5167 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5168 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5169
5170 if (embedded == NULL) {
5171 // If there are no statements or more than one statement, then
5172 // we lose the static literal flag.
5173 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5174 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5175 // If the embedded statement is a string, then we can keep the
5176 // static literal flag and mark the string as frozen.
5177 pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5178 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5179 // If the embedded statement is an interpolated string and it's
5180 // a static literal, then we can keep the static literal flag.
5181 } else {
5182 // Otherwise we lose the static literal flag.
5183 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5184 }
5185
5186 break;
5187 }
5188 case PM_EMBEDDED_VARIABLE_NODE:
5189 pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
5190 break;
5191 default:
5192 assert(false && "unexpected node type");
5193 break;
5194 }
5195
5196 pm_node_list_append(parts, part);
5197}
5198
5202static pm_interpolated_regular_expression_node_t *
5203pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
5204 pm_interpolated_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_regular_expression_node_t);
5205
5206 *node = (pm_interpolated_regular_expression_node_t) {
5207 {
5208 .type = PM_INTERPOLATED_REGULAR_EXPRESSION_NODE,
5209 .flags = PM_NODE_FLAG_STATIC_LITERAL,
5210 .node_id = PM_NODE_IDENTIFY(parser),
5211 .location = {
5212 .start = opening->start,
5213 .end = NULL,
5214 },
5215 },
5216 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5217 .closing_loc = PM_LOCATION_TOKEN_VALUE(opening),
5218 .parts = { 0 }
5219 };
5220
5221 return node;
5222}
5223
5224static inline void
5225pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
5226 if (node->base.location.start > part->location.start) {
5227 node->base.location.start = part->location.start;
5228 }
5229 if (node->base.location.end < part->location.end) {
5230 node->base.location.end = part->location.end;
5231 }
5232
5233 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5234}
5235
5236static inline void
5237pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
5238 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
5239 node->base.location.end = closing->end;
5240 pm_node_flag_set((pm_node_t *) node, pm_regular_expression_flags_create(parser, closing));
5241}
5242
5266static inline void
5267pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
5268#define CLEAR_FLAGS(node) \
5269 node->base.flags = (pm_node_flags_t) (node->base.flags & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
5270
5271#define MUTABLE_FLAGS(node) \
5272 node->base.flags = (pm_node_flags_t) ((node->base.flags | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
5273
5274 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
5275 node->base.location.start = part->location.start;
5276 }
5277
5278 node->base.location.end = MAX(node->base.location.end, part->location.end);
5279
5280 switch (PM_NODE_TYPE(part)) {
5281 case PM_STRING_NODE:
5282 part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5283 break;
5284 case PM_INTERPOLATED_STRING_NODE:
5285 if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
5286 // If the string that we're concatenating is a static literal,
5287 // then we can keep the static literal flag for this string.
5288 } else {
5289 // Otherwise, we lose the static literal flag here and we should
5290 // also clear the mutability flags.
5291 CLEAR_FLAGS(node);
5292 }
5293 break;
5294 case PM_EMBEDDED_STATEMENTS_NODE: {
5295 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5296 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5297
5298 if (embedded == NULL) {
5299 // If we're embedding multiple statements or no statements, then
5300 // the string is not longer a static literal.
5301 CLEAR_FLAGS(node);
5302 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5303 // If the embedded statement is a string, then we can make that
5304 // string as frozen and static literal, and not touch the static
5305 // literal status of this string.
5306 embedded->flags = (pm_node_flags_t) ((embedded->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5307
5308 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5309 MUTABLE_FLAGS(node);
5310 }
5311 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5312 // If the embedded statement is an interpolated string, but that
5313 // string is marked as static literal, then we can keep our
5314 // static literal status for this string.
5315 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5316 MUTABLE_FLAGS(node);
5317 }
5318 } else {
5319 // In all other cases, we lose the static literal flag here and
5320 // become mutable.
5321 CLEAR_FLAGS(node);
5322 }
5323
5324 break;
5325 }
5326 case PM_EMBEDDED_VARIABLE_NODE:
5327 // Embedded variables clear static literal, which means we also
5328 // should clear the mutability flags.
5329 CLEAR_FLAGS(node);
5330 break;
5331 case PM_X_STRING_NODE:
5332 case PM_INTERPOLATED_X_STRING_NODE:
5333 // If this is an x string, then this is a syntax error. But we want
5334 // to handle it here so that we don't fail the assertion.
5335 CLEAR_FLAGS(node);
5336 break;
5337 default:
5338 assert(false && "unexpected node type");
5339 break;
5340 }
5341
5342 pm_node_list_append(&node->parts, part);
5343
5344#undef CLEAR_FLAGS
5345#undef MUTABLE_FLAGS
5346}
5347
5351static pm_interpolated_string_node_t *
5352pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5353 pm_interpolated_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_string_node_t);
5354 pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
5355
5356 switch (parser->frozen_string_literal) {
5357 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
5358 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
5359 break;
5360 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
5361 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
5362 break;
5363 }
5364
5365 *node = (pm_interpolated_string_node_t) {
5366 {
5367 .type = PM_INTERPOLATED_STRING_NODE,
5368 .flags = flags,
5369 .node_id = PM_NODE_IDENTIFY(parser),
5370 .location = {
5371 .start = opening->start,
5372 .end = closing->end,
5373 },
5374 },
5375 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5376 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5377 .parts = { 0 }
5378 };
5379
5380 if (parts != NULL) {
5381 pm_node_t *part;
5382 PM_NODE_LIST_FOREACH(parts, index, part) {
5383 pm_interpolated_string_node_append(node, part);
5384 }
5385 }
5386
5387 return node;
5388}
5389
5393static void
5394pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, const pm_token_t *closing) {
5395 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5396 node->base.location.end = closing->end;
5397}
5398
5399static void
5400pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) {
5401 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
5402 node->base.location.start = part->location.start;
5403 }
5404
5405 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5406 node->base.location.end = MAX(node->base.location.end, part->location.end);
5407}
5408
5409static void
5410pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
5411 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5412 node->base.location.end = closing->end;
5413}
5414
5418static pm_interpolated_symbol_node_t *
5419pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5420 pm_interpolated_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_symbol_node_t);
5421
5422 *node = (pm_interpolated_symbol_node_t) {
5423 {
5424 .type = PM_INTERPOLATED_SYMBOL_NODE,
5425 .flags = PM_NODE_FLAG_STATIC_LITERAL,
5426 .node_id = PM_NODE_IDENTIFY(parser),
5427 .location = {
5428 .start = opening->start,
5429 .end = closing->end,
5430 },
5431 },
5432 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5433 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5434 .parts = { 0 }
5435 };
5436
5437 if (parts != NULL) {
5438 pm_node_t *part;
5439 PM_NODE_LIST_FOREACH(parts, index, part) {
5440 pm_interpolated_symbol_node_append(node, part);
5441 }
5442 }
5443
5444 return node;
5445}
5446
5450static pm_interpolated_x_string_node_t *
5451pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5452 pm_interpolated_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_x_string_node_t);
5453
5454 *node = (pm_interpolated_x_string_node_t) {
5455 {
5456 .type = PM_INTERPOLATED_X_STRING_NODE,
5457 .node_id = PM_NODE_IDENTIFY(parser),
5458 .location = {
5459 .start = opening->start,
5460 .end = closing->end
5461 },
5462 },
5463 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5464 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5465 .parts = { 0 }
5466 };
5467
5468 return node;
5469}
5470
5471static inline void
5472pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
5473 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5474 node->base.location.end = part->location.end;
5475}
5476
5477static inline void
5478pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
5479 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5480 node->base.location.end = closing->end;
5481}
5482
5486static pm_it_local_variable_read_node_t *
5487pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5488 pm_it_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_it_local_variable_read_node_t);
5489
5490 *node = (pm_it_local_variable_read_node_t) {
5491 {
5492 .type = PM_IT_LOCAL_VARIABLE_READ_NODE,
5493 .node_id = PM_NODE_IDENTIFY(parser),
5494 .location = PM_LOCATION_TOKEN_VALUE(name)
5495 }
5496 };
5497
5498 return node;
5499}
5500
5504static pm_it_parameters_node_t *
5505pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5506 pm_it_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_it_parameters_node_t);
5507
5508 *node = (pm_it_parameters_node_t) {
5509 {
5510 .type = PM_IT_PARAMETERS_NODE,
5511 .node_id = PM_NODE_IDENTIFY(parser),
5512 .location = {
5513 .start = opening->start,
5514 .end = closing->end
5515 }
5516 }
5517 };
5518
5519 return node;
5520}
5521
5525static pm_keyword_hash_node_t *
5526pm_keyword_hash_node_create(pm_parser_t *parser) {
5527 pm_keyword_hash_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_hash_node_t);
5528
5529 *node = (pm_keyword_hash_node_t) {
5530 .base = {
5531 .type = PM_KEYWORD_HASH_NODE,
5532 .flags = PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS,
5533 .node_id = PM_NODE_IDENTIFY(parser),
5534 .location = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
5535 },
5536 .elements = { 0 }
5537 };
5538
5539 return node;
5540}
5541
5545static void
5546pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
5547 // If the element being added is not an AssocNode or does not have a symbol
5548 // key, then we want to turn the SYMBOL_KEYS flag off.
5549 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
5550 pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
5551 }
5552
5553 pm_node_list_append(&hash->elements, element);
5554 if (hash->base.location.start == NULL) {
5555 hash->base.location.start = element->location.start;
5556 }
5557 hash->base.location.end = element->location.end;
5558}
5559
5563static pm_required_keyword_parameter_node_t *
5564pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
5565 pm_required_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_keyword_parameter_node_t);
5566
5567 *node = (pm_required_keyword_parameter_node_t) {
5568 {
5569 .type = PM_REQUIRED_KEYWORD_PARAMETER_NODE,
5570 .node_id = PM_NODE_IDENTIFY(parser),
5571 .location = {
5572 .start = name->start,
5573 .end = name->end
5574 },
5575 },
5576 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5577 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5578 };
5579
5580 return node;
5581}
5582
5586static pm_optional_keyword_parameter_node_t *
5587pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
5588 pm_optional_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_keyword_parameter_node_t);
5589
5590 *node = (pm_optional_keyword_parameter_node_t) {
5591 {
5592 .type = PM_OPTIONAL_KEYWORD_PARAMETER_NODE,
5593 .node_id = PM_NODE_IDENTIFY(parser),
5594 .location = {
5595 .start = name->start,
5596 .end = value->location.end
5597 },
5598 },
5599 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5600 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5601 .value = value
5602 };
5603
5604 return node;
5605}
5606
5610static pm_keyword_rest_parameter_node_t *
5611pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
5612 pm_keyword_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_rest_parameter_node_t);
5613
5614 *node = (pm_keyword_rest_parameter_node_t) {
5615 {
5616 .type = PM_KEYWORD_REST_PARAMETER_NODE,
5617 .node_id = PM_NODE_IDENTIFY(parser),
5618 .location = {
5619 .start = operator->start,
5620 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
5621 },
5622 },
5623 .name = pm_parser_optional_constant_id_token(parser, name),
5624 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
5625 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5626 };
5627
5628 return node;
5629}
5630
5634static pm_lambda_node_t *
5635pm_lambda_node_create(
5636 pm_parser_t *parser,
5637 pm_constant_id_list_t *locals,
5638 const pm_token_t *operator,
5639 const pm_token_t *opening,
5640 const pm_token_t *closing,
5641 pm_node_t *parameters,
5642 pm_node_t *body
5643) {
5644 pm_lambda_node_t *node = PM_NODE_ALLOC(parser, pm_lambda_node_t);
5645
5646 *node = (pm_lambda_node_t) {
5647 {
5648 .type = PM_LAMBDA_NODE,
5649 .node_id = PM_NODE_IDENTIFY(parser),
5650 .location = {
5651 .start = operator->start,
5652 .end = closing->end
5653 },
5654 },
5655 .locals = *locals,
5656 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5657 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5658 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
5659 .parameters = parameters,
5660 .body = body
5661 };
5662
5663 return node;
5664}
5665
5669static pm_local_variable_and_write_node_t *
5670pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5671 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5672 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5673 pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t);
5674
5675 *node = (pm_local_variable_and_write_node_t) {
5676 {
5677 .type = PM_LOCAL_VARIABLE_AND_WRITE_NODE,
5678 .node_id = PM_NODE_IDENTIFY(parser),
5679 .location = {
5680 .start = target->location.start,
5681 .end = value->location.end
5682 }
5683 },
5684 .name_loc = target->location,
5685 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5686 .value = value,
5687 .name = name,
5688 .depth = depth
5689 };
5690
5691 return node;
5692}
5693
5697static pm_local_variable_operator_write_node_t *
5698pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5699 pm_local_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_operator_write_node_t);
5700
5701 *node = (pm_local_variable_operator_write_node_t) {
5702 {
5703 .type = PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE,
5704 .node_id = PM_NODE_IDENTIFY(parser),
5705 .location = {
5706 .start = target->location.start,
5707 .end = value->location.end
5708 }
5709 },
5710 .name_loc = target->location,
5711 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5712 .value = value,
5713 .name = name,
5714 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5715 .depth = depth
5716 };
5717
5718 return node;
5719}
5720
5724static pm_local_variable_or_write_node_t *
5725pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5726 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5727 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5728 pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t);
5729
5730 *node = (pm_local_variable_or_write_node_t) {
5731 {
5732 .type = PM_LOCAL_VARIABLE_OR_WRITE_NODE,
5733 .node_id = PM_NODE_IDENTIFY(parser),
5734 .location = {
5735 .start = target->location.start,
5736 .end = value->location.end
5737 }
5738 },
5739 .name_loc = target->location,
5740 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5741 .value = value,
5742 .name = name,
5743 .depth = depth
5744 };
5745
5746 return node;
5747}
5748
5752static pm_local_variable_read_node_t *
5753pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
5754 if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
5755
5756 pm_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_read_node_t);
5757
5758 *node = (pm_local_variable_read_node_t) {
5759 {
5760 .type = PM_LOCAL_VARIABLE_READ_NODE,
5761 .node_id = PM_NODE_IDENTIFY(parser),
5762 .location = PM_LOCATION_TOKEN_VALUE(name)
5763 },
5764 .name = name_id,
5765 .depth = depth
5766 };
5767
5768 return node;
5769}
5770
5774static pm_local_variable_read_node_t *
5775pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5776 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5777 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, false);
5778}
5779
5784static pm_local_variable_read_node_t *
5785pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5786 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5787 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, true);
5788}
5789
5793static pm_local_variable_write_node_t *
5794pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
5795 pm_local_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_write_node_t);
5796
5797 *node = (pm_local_variable_write_node_t) {
5798 {
5799 .type = PM_LOCAL_VARIABLE_WRITE_NODE,
5800 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5801 .node_id = PM_NODE_IDENTIFY(parser),
5802 .location = {
5803 .start = name_loc->start,
5804 .end = value->location.end
5805 }
5806 },
5807 .name = name,
5808 .depth = depth,
5809 .value = value,
5810 .name_loc = *name_loc,
5811 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator)
5812 };
5813
5814 return node;
5815}
5816
5820static inline bool
5821pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5822 return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5823}
5824
5829static inline bool
5830pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
5831 return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
5832}
5833
5838static inline void
5839pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
5840 if (pm_token_is_numbered_parameter(start, end)) {
5841 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start);
5842 }
5843}
5844
5849static pm_local_variable_target_node_t *
5850pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
5851 pm_refute_numbered_parameter(parser, location->start, location->end);
5852 pm_local_variable_target_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_target_node_t);
5853
5854 *node = (pm_local_variable_target_node_t) {
5855 {
5856 .type = PM_LOCAL_VARIABLE_TARGET_NODE,
5857 .node_id = PM_NODE_IDENTIFY(parser),
5858 .location = *location
5859 },
5860 .name = name,
5861 .depth = depth
5862 };
5863
5864 return node;
5865}
5866
5870static pm_match_predicate_node_t *
5871pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5872 pm_assert_value_expression(parser, value);
5873
5874 pm_match_predicate_node_t *node = PM_NODE_ALLOC(parser, pm_match_predicate_node_t);
5875
5876 *node = (pm_match_predicate_node_t) {
5877 {
5878 .type = PM_MATCH_PREDICATE_NODE,
5879 .node_id = PM_NODE_IDENTIFY(parser),
5880 .location = {
5881 .start = value->location.start,
5882 .end = pattern->location.end
5883 }
5884 },
5885 .value = value,
5886 .pattern = pattern,
5887 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5888 };
5889
5890 return node;
5891}
5892
5896static pm_match_required_node_t *
5897pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5898 pm_assert_value_expression(parser, value);
5899
5900 pm_match_required_node_t *node = PM_NODE_ALLOC(parser, pm_match_required_node_t);
5901
5902 *node = (pm_match_required_node_t) {
5903 {
5904 .type = PM_MATCH_REQUIRED_NODE,
5905 .node_id = PM_NODE_IDENTIFY(parser),
5906 .location = {
5907 .start = value->location.start,
5908 .end = pattern->location.end
5909 }
5910 },
5911 .value = value,
5912 .pattern = pattern,
5913 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5914 };
5915
5916 return node;
5917}
5918
5922static pm_match_write_node_t *
5923pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
5924 pm_match_write_node_t *node = PM_NODE_ALLOC(parser, pm_match_write_node_t);
5925
5926 *node = (pm_match_write_node_t) {
5927 {
5928 .type = PM_MATCH_WRITE_NODE,
5929 .node_id = PM_NODE_IDENTIFY(parser),
5930 .location = call->base.location
5931 },
5932 .call = call,
5933 .targets = { 0 }
5934 };
5935
5936 return node;
5937}
5938
5942static pm_module_node_t *
5943pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
5944 pm_module_node_t *node = PM_NODE_ALLOC(parser, pm_module_node_t);
5945
5946 *node = (pm_module_node_t) {
5947 {
5948 .type = PM_MODULE_NODE,
5949 .node_id = PM_NODE_IDENTIFY(parser),
5950 .location = {
5951 .start = module_keyword->start,
5952 .end = end_keyword->end
5953 }
5954 },
5955 .locals = (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
5956 .module_keyword_loc = PM_LOCATION_TOKEN_VALUE(module_keyword),
5957 .constant_path = constant_path,
5958 .body = body,
5959 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
5960 .name = pm_parser_constant_id_token(parser, name)
5961 };
5962
5963 return node;
5964}
5965
5969static pm_multi_target_node_t *
5970pm_multi_target_node_create(pm_parser_t *parser) {
5971 pm_multi_target_node_t *node = PM_NODE_ALLOC(parser, pm_multi_target_node_t);
5972
5973 *node = (pm_multi_target_node_t) {
5974 {
5975 .type = PM_MULTI_TARGET_NODE,
5976 .node_id = PM_NODE_IDENTIFY(parser),
5977 .location = { .start = NULL, .end = NULL }
5978 },
5979 .lefts = { 0 },
5980 .rest = NULL,
5981 .rights = { 0 },
5982 .lparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
5983 .rparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
5984 };
5985
5986 return node;
5987}
5988
5992static void
5993pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
5994 if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
5995 if (node->rest == NULL) {
5996 node->rest = target;
5997 } else {
5998 pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
5999 pm_node_list_append(&node->rights, target);
6000 }
6001 } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
6002 if (node->rest == NULL) {
6003 node->rest = target;
6004 } else {
6005 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
6006 pm_node_list_append(&node->rights, target);
6007 }
6008 } else if (node->rest == NULL) {
6009 pm_node_list_append(&node->lefts, target);
6010 } else {
6011 pm_node_list_append(&node->rights, target);
6012 }
6013
6014 if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) {
6015 node->base.location.start = target->location.start;
6016 }
6017
6018 if (node->base.location.end == NULL || (node->base.location.end < target->location.end)) {
6019 node->base.location.end = target->location.end;
6020 }
6021}
6022
6026static void
6027pm_multi_target_node_opening_set(pm_multi_target_node_t *node, const pm_token_t *lparen) {
6028 node->base.location.start = lparen->start;
6029 node->lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen);
6030}
6031
6035static void
6036pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t *rparen) {
6037 node->base.location.end = rparen->end;
6038 node->rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen);
6039}
6040
6044static pm_multi_write_node_t *
6045pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
6046 pm_multi_write_node_t *node = PM_NODE_ALLOC(parser, pm_multi_write_node_t);
6047
6048 *node = (pm_multi_write_node_t) {
6049 {
6050 .type = PM_MULTI_WRITE_NODE,
6051 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
6052 .node_id = PM_NODE_IDENTIFY(parser),
6053 .location = {
6054 .start = target->base.location.start,
6055 .end = value->location.end
6056 }
6057 },
6058 .lefts = target->lefts,
6059 .rest = target->rest,
6060 .rights = target->rights,
6061 .lparen_loc = target->lparen_loc,
6062 .rparen_loc = target->rparen_loc,
6063 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6064 .value = value
6065 };
6066
6067 // Explicitly do not call pm_node_destroy here because we want to keep
6068 // around all of the information within the MultiWriteNode node.
6069 xfree(target);
6070
6071 return node;
6072}
6073
6077static pm_next_node_t *
6078pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6079 assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
6080 pm_next_node_t *node = PM_NODE_ALLOC(parser, pm_next_node_t);
6081
6082 *node = (pm_next_node_t) {
6083 {
6084 .type = PM_NEXT_NODE,
6085 .node_id = PM_NODE_IDENTIFY(parser),
6086 .location = {
6087 .start = keyword->start,
6088 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
6089 }
6090 },
6091 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6092 .arguments = arguments
6093 };
6094
6095 return node;
6096}
6097
6101static pm_nil_node_t *
6102pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
6103 assert(token->type == PM_TOKEN_KEYWORD_NIL);
6104 pm_nil_node_t *node = PM_NODE_ALLOC(parser, pm_nil_node_t);
6105
6106 *node = (pm_nil_node_t) {{
6107 .type = PM_NIL_NODE,
6108 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6109 .node_id = PM_NODE_IDENTIFY(parser),
6110 .location = PM_LOCATION_TOKEN_VALUE(token)
6111 }};
6112
6113 return node;
6114}
6115
6119static pm_no_keywords_parameter_node_t *
6120pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
6121 assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
6122 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
6123 pm_no_keywords_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_no_keywords_parameter_node_t);
6124
6125 *node = (pm_no_keywords_parameter_node_t) {
6126 {
6127 .type = PM_NO_KEYWORDS_PARAMETER_NODE,
6128 .node_id = PM_NODE_IDENTIFY(parser),
6129 .location = {
6130 .start = operator->start,
6131 .end = keyword->end
6132 }
6133 },
6134 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6135 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
6136 };
6137
6138 return node;
6139}
6140
6144static pm_numbered_parameters_node_t *
6145pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) {
6146 pm_numbered_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_parameters_node_t);
6147
6148 *node = (pm_numbered_parameters_node_t) {
6149 {
6150 .type = PM_NUMBERED_PARAMETERS_NODE,
6151 .node_id = PM_NODE_IDENTIFY(parser),
6152 .location = *location
6153 },
6154 .maximum = maximum
6155 };
6156
6157 return node;
6158}
6159
6164#define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1))
6165
6172static uint32_t
6173pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) {
6174 const uint8_t *start = token->start + 1;
6175 const uint8_t *end = token->end;
6176
6177 ptrdiff_t diff = end - start;
6178 assert(diff > 0);
6179#if PTRDIFF_MAX > SIZE_MAX
6180 assert(diff < (ptrdiff_t) SIZE_MAX);
6181#endif
6182 size_t length = (size_t) diff;
6183
6184 char *digits = xcalloc(length + 1, sizeof(char));
6185 memcpy(digits, start, length);
6186 digits[length] = '\0';
6187
6188 char *endptr;
6189 errno = 0;
6190 unsigned long value = strtoul(digits, &endptr, 10);
6191
6192 if ((digits == endptr) || (*endptr != '\0')) {
6193 pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
6194 value = 0;
6195 }
6196
6197 xfree(digits);
6198
6199 if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
6200 PM_PARSER_WARN_FORMAT(parser, start, end, PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
6201 value = 0;
6202 }
6203
6204 return (uint32_t) value;
6205}
6206
6207#undef NTH_REF_MAX
6208
6212static pm_numbered_reference_read_node_t *
6213pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
6214 assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
6215 pm_numbered_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_reference_read_node_t);
6216
6217 *node = (pm_numbered_reference_read_node_t) {
6218 {
6219 .type = PM_NUMBERED_REFERENCE_READ_NODE,
6220 .node_id = PM_NODE_IDENTIFY(parser),
6221 .location = PM_LOCATION_TOKEN_VALUE(name),
6222 },
6223 .number = pm_numbered_reference_read_node_number(parser, name)
6224 };
6225
6226 return node;
6227}
6228
6232static pm_optional_parameter_node_t *
6233pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
6234 pm_optional_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_parameter_node_t);
6235
6236 *node = (pm_optional_parameter_node_t) {
6237 {
6238 .type = PM_OPTIONAL_PARAMETER_NODE,
6239 .node_id = PM_NODE_IDENTIFY(parser),
6240 .location = {
6241 .start = name->start,
6242 .end = value->location.end
6243 }
6244 },
6245 .name = pm_parser_constant_id_token(parser, name),
6246 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
6247 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6248 .value = value
6249 };
6250
6251 return node;
6252}
6253
6257static pm_or_node_t *
6258pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6259 pm_assert_value_expression(parser, left);
6260
6261 pm_or_node_t *node = PM_NODE_ALLOC(parser, pm_or_node_t);
6262
6263 *node = (pm_or_node_t) {
6264 {
6265 .type = PM_OR_NODE,
6266 .node_id = PM_NODE_IDENTIFY(parser),
6267 .location = {
6268 .start = left->location.start,
6269 .end = right->location.end
6270 }
6271 },
6272 .left = left,
6273 .right = right,
6274 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6275 };
6276
6277 return node;
6278}
6279
6283static pm_parameters_node_t *
6284pm_parameters_node_create(pm_parser_t *parser) {
6285 pm_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_parameters_node_t);
6286
6287 *node = (pm_parameters_node_t) {
6288 {
6289 .type = PM_PARAMETERS_NODE,
6290 .node_id = PM_NODE_IDENTIFY(parser),
6291 .location = PM_LOCATION_TOKEN_VALUE(&parser->current)
6292 },
6293 .rest = NULL,
6294 .keyword_rest = NULL,
6295 .block = NULL,
6296 .requireds = { 0 },
6297 .optionals = { 0 },
6298 .posts = { 0 },
6299 .keywords = { 0 }
6300 };
6301
6302 return node;
6303}
6304
6308static void
6309pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
6310 if (params->base.location.start == NULL) {
6311 params->base.location.start = param->location.start;
6312 } else {
6313 params->base.location.start = params->base.location.start < param->location.start ? params->base.location.start : param->location.start;
6314 }
6315
6316 if (params->base.location.end == NULL) {
6317 params->base.location.end = param->location.end;
6318 } else {
6319 params->base.location.end = params->base.location.end > param->location.end ? params->base.location.end : param->location.end;
6320 }
6321}
6322
6326static void
6327pm_parameters_node_requireds_append(pm_parameters_node_t *params, pm_node_t *param) {
6328 pm_parameters_node_location_set(params, param);
6329 pm_node_list_append(&params->requireds, param);
6330}
6331
6335static void
6336pm_parameters_node_optionals_append(pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
6337 pm_parameters_node_location_set(params, (pm_node_t *) param);
6338 pm_node_list_append(&params->optionals, (pm_node_t *) param);
6339}
6340
6344static void
6345pm_parameters_node_posts_append(pm_parameters_node_t *params, pm_node_t *param) {
6346 pm_parameters_node_location_set(params, param);
6347 pm_node_list_append(&params->posts, param);
6348}
6349
6353static void
6354pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6355 pm_parameters_node_location_set(params, param);
6356 params->rest = param;
6357}
6358
6362static void
6363pm_parameters_node_keywords_append(pm_parameters_node_t *params, pm_node_t *param) {
6364 pm_parameters_node_location_set(params, param);
6365 pm_node_list_append(&params->keywords, param);
6366}
6367
6371static void
6372pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6373 assert(params->keyword_rest == NULL);
6374 pm_parameters_node_location_set(params, param);
6375 params->keyword_rest = param;
6376}
6377
6381static void
6382pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_node_t *param) {
6383 assert(params->block == NULL);
6384 pm_parameters_node_location_set(params, (pm_node_t *) param);
6385 params->block = param;
6386}
6387
6391static pm_program_node_t *
6392pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
6393 pm_program_node_t *node = PM_NODE_ALLOC(parser, pm_program_node_t);
6394
6395 *node = (pm_program_node_t) {
6396 {
6397 .type = PM_PROGRAM_NODE,
6398 .node_id = PM_NODE_IDENTIFY(parser),
6399 .location = {
6400 .start = statements == NULL ? parser->start : statements->base.location.start,
6401 .end = statements == NULL ? parser->end : statements->base.location.end
6402 }
6403 },
6404 .locals = *locals,
6405 .statements = statements
6406 };
6407
6408 return node;
6409}
6410
6414static pm_parentheses_node_t *
6415pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing, pm_node_flags_t flags) {
6416 pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t);
6417
6418 *node = (pm_parentheses_node_t) {
6419 {
6420 .type = PM_PARENTHESES_NODE,
6421 .flags = flags,
6422 .node_id = PM_NODE_IDENTIFY(parser),
6423 .location = {
6424 .start = opening->start,
6425 .end = closing->end
6426 }
6427 },
6428 .body = body,
6429 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6430 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6431 };
6432
6433 return node;
6434}
6435
6439static pm_pinned_expression_node_t *
6440pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
6441 pm_pinned_expression_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_expression_node_t);
6442
6443 *node = (pm_pinned_expression_node_t) {
6444 {
6445 .type = PM_PINNED_EXPRESSION_NODE,
6446 .node_id = PM_NODE_IDENTIFY(parser),
6447 .location = {
6448 .start = operator->start,
6449 .end = rparen->end
6450 }
6451 },
6452 .expression = expression,
6453 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6454 .lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen),
6455 .rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen)
6456 };
6457
6458 return node;
6459}
6460
6464static pm_pinned_variable_node_t *
6465pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
6466 pm_pinned_variable_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_variable_node_t);
6467
6468 *node = (pm_pinned_variable_node_t) {
6469 {
6470 .type = PM_PINNED_VARIABLE_NODE,
6471 .node_id = PM_NODE_IDENTIFY(parser),
6472 .location = {
6473 .start = operator->start,
6474 .end = variable->location.end
6475 }
6476 },
6477 .variable = variable,
6478 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6479 };
6480
6481 return node;
6482}
6483
6487static pm_post_execution_node_t *
6488pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6489 pm_post_execution_node_t *node = PM_NODE_ALLOC(parser, pm_post_execution_node_t);
6490
6491 *node = (pm_post_execution_node_t) {
6492 {
6493 .type = PM_POST_EXECUTION_NODE,
6494 .node_id = PM_NODE_IDENTIFY(parser),
6495 .location = {
6496 .start = keyword->start,
6497 .end = closing->end
6498 }
6499 },
6500 .statements = statements,
6501 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6502 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6503 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6504 };
6505
6506 return node;
6507}
6508
6512static pm_pre_execution_node_t *
6513pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6514 pm_pre_execution_node_t *node = PM_NODE_ALLOC(parser, pm_pre_execution_node_t);
6515
6516 *node = (pm_pre_execution_node_t) {
6517 {
6518 .type = PM_PRE_EXECUTION_NODE,
6519 .node_id = PM_NODE_IDENTIFY(parser),
6520 .location = {
6521 .start = keyword->start,
6522 .end = closing->end
6523 }
6524 },
6525 .statements = statements,
6526 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6527 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6528 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6529 };
6530
6531 return node;
6532}
6533
6537static pm_range_node_t *
6538pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6539 pm_assert_value_expression(parser, left);
6540 pm_assert_value_expression(parser, right);
6541
6542 pm_range_node_t *node = PM_NODE_ALLOC(parser, pm_range_node_t);
6543 pm_node_flags_t flags = 0;
6544
6545 // Indicate that this node is an exclusive range if the operator is `...`.
6546 if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
6547 flags |= PM_RANGE_FLAGS_EXCLUDE_END;
6548 }
6549
6550 // Indicate that this node is a static literal (i.e., can be compiled with
6551 // a putobject in CRuby) if the left and right are implicit nil, explicit
6552 // nil, or integers.
6553 if (
6554 (left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
6555 (right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
6556 ) {
6557 flags |= PM_NODE_FLAG_STATIC_LITERAL;
6558 }
6559
6560 *node = (pm_range_node_t) {
6561 {
6562 .type = PM_RANGE_NODE,
6563 .flags = flags,
6564 .node_id = PM_NODE_IDENTIFY(parser),
6565 .location = {
6566 .start = (left == NULL ? operator->start : left->location.start),
6567 .end = (right == NULL ? operator->end : right->location.end)
6568 }
6569 },
6570 .left = left,
6571 .right = right,
6572 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6573 };
6574
6575 return node;
6576}
6577
6581static pm_redo_node_t *
6582pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
6583 assert(token->type == PM_TOKEN_KEYWORD_REDO);
6584 pm_redo_node_t *node = PM_NODE_ALLOC(parser, pm_redo_node_t);
6585
6586 *node = (pm_redo_node_t) {{
6587 .type = PM_REDO_NODE,
6588 .node_id = PM_NODE_IDENTIFY(parser),
6589 .location = PM_LOCATION_TOKEN_VALUE(token)
6590 }};
6591
6592 return node;
6593}
6594
6599static pm_regular_expression_node_t *
6600pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
6601 pm_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_regular_expression_node_t);
6602
6603 *node = (pm_regular_expression_node_t) {
6604 {
6605 .type = PM_REGULAR_EXPRESSION_NODE,
6606 .flags = pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL,
6607 .node_id = PM_NODE_IDENTIFY(parser),
6608 .location = {
6609 .start = MIN(opening->start, closing->start),
6610 .end = MAX(opening->end, closing->end)
6611 }
6612 },
6613 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6614 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
6615 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
6616 .unescaped = *unescaped
6617 };
6618
6619 return node;
6620}
6621
6625static inline pm_regular_expression_node_t *
6626pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6627 return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6628}
6629
6633static pm_required_parameter_node_t *
6634pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
6635 pm_required_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_parameter_node_t);
6636
6637 *node = (pm_required_parameter_node_t) {
6638 {
6639 .type = PM_REQUIRED_PARAMETER_NODE,
6640 .node_id = PM_NODE_IDENTIFY(parser),
6641 .location = PM_LOCATION_TOKEN_VALUE(token)
6642 },
6643 .name = pm_parser_constant_id_token(parser, token)
6644 };
6645
6646 return node;
6647}
6648
6652static pm_rescue_modifier_node_t *
6653pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
6654 pm_rescue_modifier_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_modifier_node_t);
6655
6656 *node = (pm_rescue_modifier_node_t) {
6657 {
6658 .type = PM_RESCUE_MODIFIER_NODE,
6659 .node_id = PM_NODE_IDENTIFY(parser),
6660 .location = {
6661 .start = expression->location.start,
6662 .end = rescue_expression->location.end
6663 }
6664 },
6665 .expression = expression,
6666 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6667 .rescue_expression = rescue_expression
6668 };
6669
6670 return node;
6671}
6672
6676static pm_rescue_node_t *
6677pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
6678 pm_rescue_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_node_t);
6679
6680 *node = (pm_rescue_node_t) {
6681 {
6682 .type = PM_RESCUE_NODE,
6683 .node_id = PM_NODE_IDENTIFY(parser),
6684 .location = PM_LOCATION_TOKEN_VALUE(keyword)
6685 },
6686 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6687 .operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
6688 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
6689 .reference = NULL,
6690 .statements = NULL,
6691 .subsequent = NULL,
6692 .exceptions = { 0 }
6693 };
6694
6695 return node;
6696}
6697
6698static inline void
6699pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) {
6700 node->operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
6701}
6702
6706static void
6707pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
6708 node->reference = reference;
6709 node->base.location.end = reference->location.end;
6710}
6711
6715static void
6716pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
6717 node->statements = statements;
6718 if (pm_statements_node_body_length(statements) > 0) {
6719 node->base.location.end = statements->base.location.end;
6720 }
6721}
6722
6726static void
6727pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
6728 node->subsequent = subsequent;
6729 node->base.location.end = subsequent->base.location.end;
6730}
6731
6735static void
6736pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) {
6737 pm_node_list_append(&node->exceptions, exception);
6738 node->base.location.end = exception->location.end;
6739}
6740
6744static pm_rest_parameter_node_t *
6745pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
6746 pm_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_rest_parameter_node_t);
6747
6748 *node = (pm_rest_parameter_node_t) {
6749 {
6750 .type = PM_REST_PARAMETER_NODE,
6751 .node_id = PM_NODE_IDENTIFY(parser),
6752 .location = {
6753 .start = operator->start,
6754 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
6755 }
6756 },
6757 .name = pm_parser_optional_constant_id_token(parser, name),
6758 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
6759 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6760 };
6761
6762 return node;
6763}
6764
6768static pm_retry_node_t *
6769pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
6770 assert(token->type == PM_TOKEN_KEYWORD_RETRY);
6771 pm_retry_node_t *node = PM_NODE_ALLOC(parser, pm_retry_node_t);
6772
6773 *node = (pm_retry_node_t) {{
6774 .type = PM_RETRY_NODE,
6775 .node_id = PM_NODE_IDENTIFY(parser),
6776 .location = PM_LOCATION_TOKEN_VALUE(token)
6777 }};
6778
6779 return node;
6780}
6781
6785static pm_return_node_t *
6786pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6787 pm_return_node_t *node = PM_NODE_ALLOC(parser, pm_return_node_t);
6788
6789 *node = (pm_return_node_t) {
6790 {
6791 .type = PM_RETURN_NODE,
6792 .node_id = PM_NODE_IDENTIFY(parser),
6793 .location = {
6794 .start = keyword->start,
6795 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
6796 }
6797 },
6798 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6799 .arguments = arguments
6800 };
6801
6802 return node;
6803}
6804
6808static pm_self_node_t *
6809pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
6810 assert(token->type == PM_TOKEN_KEYWORD_SELF);
6811 pm_self_node_t *node = PM_NODE_ALLOC(parser, pm_self_node_t);
6812
6813 *node = (pm_self_node_t) {{
6814 .type = PM_SELF_NODE,
6815 .node_id = PM_NODE_IDENTIFY(parser),
6816 .location = PM_LOCATION_TOKEN_VALUE(token)
6817 }};
6818
6819 return node;
6820}
6821
6825static pm_shareable_constant_node_t *
6826pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
6827 pm_shareable_constant_node_t *node = PM_NODE_ALLOC(parser, pm_shareable_constant_node_t);
6828
6829 *node = (pm_shareable_constant_node_t) {
6830 {
6831 .type = PM_SHAREABLE_CONSTANT_NODE,
6832 .flags = (pm_node_flags_t) value,
6833 .node_id = PM_NODE_IDENTIFY(parser),
6834 .location = PM_LOCATION_NODE_VALUE(write)
6835 },
6836 .write = write
6837 };
6838
6839 return node;
6840}
6841
6845static pm_singleton_class_node_t *
6846pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
6847 pm_singleton_class_node_t *node = PM_NODE_ALLOC(parser, pm_singleton_class_node_t);
6848
6849 *node = (pm_singleton_class_node_t) {
6850 {
6851 .type = PM_SINGLETON_CLASS_NODE,
6852 .node_id = PM_NODE_IDENTIFY(parser),
6853 .location = {
6854 .start = class_keyword->start,
6855 .end = end_keyword->end
6856 }
6857 },
6858 .locals = *locals,
6859 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
6860 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6861 .expression = expression,
6862 .body = body,
6863 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
6864 };
6865
6866 return node;
6867}
6868
6872static pm_source_encoding_node_t *
6873pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
6874 assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
6875 pm_source_encoding_node_t *node = PM_NODE_ALLOC(parser, pm_source_encoding_node_t);
6876
6877 *node = (pm_source_encoding_node_t) {{
6878 .type = PM_SOURCE_ENCODING_NODE,
6879 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6880 .node_id = PM_NODE_IDENTIFY(parser),
6881 .location = PM_LOCATION_TOKEN_VALUE(token)
6882 }};
6883
6884 return node;
6885}
6886
6890static pm_source_file_node_t*
6891pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
6892 pm_source_file_node_t *node = PM_NODE_ALLOC(parser, pm_source_file_node_t);
6893 assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
6894
6895 pm_node_flags_t flags = 0;
6896
6897 switch (parser->frozen_string_literal) {
6898 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6899 flags |= PM_STRING_FLAGS_MUTABLE;
6900 break;
6901 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6902 flags |= PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6903 break;
6904 }
6905
6906 *node = (pm_source_file_node_t) {
6907 {
6908 .type = PM_SOURCE_FILE_NODE,
6909 .flags = flags,
6910 .node_id = PM_NODE_IDENTIFY(parser),
6911 .location = PM_LOCATION_TOKEN_VALUE(file_keyword),
6912 },
6913 .filepath = parser->filepath
6914 };
6915
6916 return node;
6917}
6918
6922static pm_source_line_node_t *
6923pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
6924 assert(token->type == PM_TOKEN_KEYWORD___LINE__);
6925 pm_source_line_node_t *node = PM_NODE_ALLOC(parser, pm_source_line_node_t);
6926
6927 *node = (pm_source_line_node_t) {{
6928 .type = PM_SOURCE_LINE_NODE,
6929 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6930 .node_id = PM_NODE_IDENTIFY(parser),
6931 .location = PM_LOCATION_TOKEN_VALUE(token)
6932 }};
6933
6934 return node;
6935}
6936
6940static pm_splat_node_t *
6941pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
6942 pm_splat_node_t *node = PM_NODE_ALLOC(parser, pm_splat_node_t);
6943
6944 *node = (pm_splat_node_t) {
6945 {
6946 .type = PM_SPLAT_NODE,
6947 .node_id = PM_NODE_IDENTIFY(parser),
6948 .location = {
6949 .start = operator->start,
6950 .end = (expression == NULL ? operator->end : expression->location.end)
6951 }
6952 },
6953 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6954 .expression = expression
6955 };
6956
6957 return node;
6958}
6959
6963static pm_statements_node_t *
6964pm_statements_node_create(pm_parser_t *parser) {
6965 pm_statements_node_t *node = PM_NODE_ALLOC(parser, pm_statements_node_t);
6966
6967 *node = (pm_statements_node_t) {
6968 {
6969 .type = PM_STATEMENTS_NODE,
6970 .node_id = PM_NODE_IDENTIFY(parser),
6971 .location = PM_LOCATION_NULL_VALUE(parser)
6972 },
6973 .body = { 0 }
6974 };
6975
6976 return node;
6977}
6978
6982static size_t
6983pm_statements_node_body_length(pm_statements_node_t *node) {
6984 return node && node->body.size;
6985}
6986
6990static void
6991pm_statements_node_location_set(pm_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
6992 node->base.location = (pm_location_t) { .start = start, .end = end };
6993}
6994
6999static inline void
7000pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
7001 if (pm_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
7002 node->base.location.start = statement->location.start;
7003 }
7004
7005 if (statement->location.end > node->base.location.end) {
7006 node->base.location.end = statement->location.end;
7007 }
7008}
7009
7013static void
7014pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline) {
7015 pm_statements_node_body_update(node, statement);
7016
7017 if (node->body.size > 0) {
7018 const pm_node_t *previous = node->body.nodes[node->body.size - 1];
7019
7020 switch (PM_NODE_TYPE(previous)) {
7021 case PM_BREAK_NODE:
7022 case PM_NEXT_NODE:
7023 case PM_REDO_NODE:
7024 case PM_RETRY_NODE:
7025 case PM_RETURN_NODE:
7026 pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
7027 break;
7028 default:
7029 break;
7030 }
7031 }
7032
7033 pm_node_list_append(&node->body, statement);
7034 if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
7035}
7036
7040static void
7041pm_statements_node_body_prepend(pm_statements_node_t *node, pm_node_t *statement) {
7042 pm_statements_node_body_update(node, statement);
7043 pm_node_list_prepend(&node->body, statement);
7044 pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
7045}
7046
7050static inline pm_string_node_t *
7051pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
7052 pm_string_node_t *node = PM_NODE_ALLOC(parser, pm_string_node_t);
7053 pm_node_flags_t flags = 0;
7054
7055 switch (parser->frozen_string_literal) {
7056 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
7057 flags = PM_STRING_FLAGS_MUTABLE;
7058 break;
7059 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
7060 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
7061 break;
7062 }
7063
7064 *node = (pm_string_node_t) {
7065 {
7066 .type = PM_STRING_NODE,
7067 .flags = flags,
7068 .node_id = PM_NODE_IDENTIFY(parser),
7069 .location = {
7070 .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? content->start : opening->start),
7071 .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? content->end : closing->end)
7072 }
7073 },
7074 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7075 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7076 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7077 .unescaped = *string
7078 };
7079
7080 return node;
7081}
7082
7086static pm_string_node_t *
7087pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7088 return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7089}
7090
7095static pm_string_node_t *
7096pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7097 pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
7098 parser->current_string = PM_STRING_EMPTY;
7099 return node;
7100}
7101
7105static pm_super_node_t *
7106pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
7107 assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
7108 pm_super_node_t *node = PM_NODE_ALLOC(parser, pm_super_node_t);
7109
7110 const uint8_t *end = pm_arguments_end(arguments);
7111 if (end == NULL) {
7112 assert(false && "unreachable");
7113 }
7114
7115 *node = (pm_super_node_t) {
7116 {
7117 .type = PM_SUPER_NODE,
7118 .node_id = PM_NODE_IDENTIFY(parser),
7119 .location = {
7120 .start = keyword->start,
7121 .end = end,
7122 }
7123 },
7124 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7125 .lparen_loc = arguments->opening_loc,
7126 .arguments = arguments->arguments,
7127 .rparen_loc = arguments->closing_loc,
7128 .block = arguments->block
7129 };
7130
7131 return node;
7132}
7133
7138static bool
7139pm_ascii_only_p(const pm_string_t *contents) {
7140 const size_t length = pm_string_length(contents);
7141 const uint8_t *source = pm_string_source(contents);
7142
7143 for (size_t index = 0; index < length; index++) {
7144 if (source[index] & 0x80) return false;
7145 }
7146
7147 return true;
7148}
7149
7153static void
7154parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7155 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7156 size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
7157
7158 if (width == 0) {
7159 pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7160 break;
7161 }
7162
7163 cursor += width;
7164 }
7165}
7166
7171static void
7172parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7173 const pm_encoding_t *encoding = parser->encoding;
7174
7175 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7176 size_t width = encoding->char_width(cursor, end - cursor);
7177
7178 if (width == 0) {
7179 pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7180 break;
7181 }
7182
7183 cursor += width;
7184 }
7185}
7186
7196static inline pm_node_flags_t
7197parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
7198 if (parser->explicit_encoding != NULL) {
7199 // A Symbol may optionally have its encoding explicitly set. This will
7200 // happen if an escape sequence results in a non-ASCII code point.
7201 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7202 if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
7203 return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
7204 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7205 return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
7206 } else if (validate) {
7207 parse_symbol_encoding_validate_other(parser, location, contents);
7208 }
7209 } else if (pm_ascii_only_p(contents)) {
7210 // Ruby stipulates that all source files must use an ASCII-compatible
7211 // encoding. Thus, all symbols appearing in source are eligible for
7212 // "downgrading" to US-ASCII.
7213 return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
7214 } else if (validate) {
7215 parse_symbol_encoding_validate_other(parser, location, contents);
7216 }
7217
7218 return 0;
7219}
7220
7221static pm_node_flags_t
7222parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding) {
7223 assert ((modifier == 'n' && modifier_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) ||
7224 (modifier == 'u' && modifier_encoding == PM_ENCODING_UTF_8_ENTRY) ||
7225 (modifier == 'e' && modifier_encoding == PM_ENCODING_EUC_JP_ENTRY) ||
7226 (modifier == 's' && modifier_encoding == PM_ENCODING_WINDOWS_31J_ENTRY));
7227
7228 // There's special validation logic used if a string does not contain any character escape sequences.
7229 if (parser->explicit_encoding == NULL) {
7230 // If an ASCII-only string without character escapes is used with an encoding modifier, then resulting Regexp
7231 // has the modifier encoding, unless the ASCII-8BIT modifier is used, in which case the Regexp "downgrades" to
7232 // the US-ASCII encoding.
7233 if (ascii_only) {
7234 return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags;
7235 }
7236
7237 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7238 if (!ascii_only) {
7239 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
7240 }
7241 } else if (parser->encoding != modifier_encoding) {
7242 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name);
7243
7244 if (modifier == 'n' && !ascii_only) {
7245 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source));
7246 }
7247 }
7248
7249 return flags;
7250 }
7251
7252 // TODO (nirvdrum 21-Feb-2024): To validate regexp sources with character escape sequences we need to know whether hex or Unicode escape sequences were used and Prism doesn't currently provide that data. We handle a subset of unambiguous cases in the meanwhile.
7253 bool mixed_encoding = false;
7254
7255 if (mixed_encoding) {
7256 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7257 } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
7258 // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily.
7259 bool valid_string_in_modifier_encoding = true;
7260
7261 if (!valid_string_in_modifier_encoding) {
7262 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7263 }
7264 } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7265 // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now.
7266 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
7267 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source));
7268 }
7269 }
7270
7271 // We've determined the encoding would naturally be EUC-JP and there is no need to force the encoding to anything else.
7272 return flags;
7273}
7274
7281static pm_node_flags_t
7282parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags) {
7283 // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report.
7284 bool valid_unicode_range = true;
7285 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) {
7286 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7287 return flags;
7288 }
7289
7290 // US-ASCII strings do not admit multi-byte character literals. However, character escape sequences corresponding
7291 // to multi-byte characters are allowed.
7292 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) {
7293 // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the
7294 // following error message appearing twice. We do the same for compatibility.
7295 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
7296 }
7297
7306 if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
7307 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY);
7308 }
7309
7310 if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
7311 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY);
7312 }
7313
7314 if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
7315 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY);
7316 }
7317
7318 if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
7319 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY);
7320 }
7321
7322 // At this point no encoding modifiers will be present on the regular expression as they would have already
7323 // been processed. Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all
7324 // regular expressions without an encoding modifier appearing in source are eligible for "downgrading" to US-ASCII.
7325 if (ascii_only) {
7326 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
7327 }
7328
7329 // A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string
7330 // or by specifying a modifier.
7331 //
7332 // NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points.
7333 if (parser->explicit_encoding != NULL) {
7334 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7335 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
7336 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7337 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
7338 }
7339 }
7340
7341 return 0;
7342}
7343
7348static pm_symbol_node_t *
7349pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
7350 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7351
7352 *node = (pm_symbol_node_t) {
7353 {
7354 .type = PM_SYMBOL_NODE,
7355 .flags = PM_NODE_FLAG_STATIC_LITERAL | flags,
7356 .node_id = PM_NODE_IDENTIFY(parser),
7357 .location = {
7358 .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start),
7359 .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end)
7360 }
7361 },
7362 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7363 .value_loc = PM_LOCATION_TOKEN_VALUE(value),
7364 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7365 .unescaped = *unescaped
7366 };
7367
7368 return node;
7369}
7370
7374static inline pm_symbol_node_t *
7375pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
7376 return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
7377}
7378
7382static pm_symbol_node_t *
7383pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
7384 pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
7385 parser->current_string = PM_STRING_EMPTY;
7386 return node;
7387}
7388
7392static pm_symbol_node_t *
7393pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
7394 pm_symbol_node_t *node;
7395
7396 switch (token->type) {
7397 case PM_TOKEN_LABEL: {
7398 pm_token_t opening = not_provided(parser);
7399 pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
7400
7401 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
7402 node = pm_symbol_node_create(parser, &opening, &label, &closing);
7403
7404 assert((label.end - label.start) >= 0);
7405 pm_string_shared_init(&node->unescaped, label.start, label.end);
7406 pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &label, &node->unescaped, false));
7407
7408 break;
7409 }
7410 case PM_TOKEN_MISSING: {
7411 pm_token_t opening = not_provided(parser);
7412 pm_token_t closing = not_provided(parser);
7413
7414 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end };
7415 node = pm_symbol_node_create(parser, &opening, &label, &closing);
7416 break;
7417 }
7418 default:
7419 assert(false && "unreachable");
7420 node = NULL;
7421 break;
7422 }
7423
7424 return node;
7425}
7426
7430static pm_symbol_node_t *
7431pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
7432 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7433
7434 *node = (pm_symbol_node_t) {
7435 {
7436 .type = PM_SYMBOL_NODE,
7437 .flags = PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
7438 .node_id = PM_NODE_IDENTIFY(parser),
7439 .location = PM_LOCATION_NULL_VALUE(parser)
7440 },
7441 .value_loc = PM_LOCATION_NULL_VALUE(parser),
7442 .unescaped = { 0 }
7443 };
7444
7445 pm_string_constant_init(&node->unescaped, content, strlen(content));
7446 return node;
7447}
7448
7452static bool
7453pm_symbol_node_label_p(pm_node_t *node) {
7454 const uint8_t *end = NULL;
7455
7456 switch (PM_NODE_TYPE(node)) {
7457 case PM_SYMBOL_NODE:
7458 end = ((pm_symbol_node_t *) node)->closing_loc.end;
7459 break;
7460 case PM_INTERPOLATED_SYMBOL_NODE:
7461 end = ((pm_interpolated_symbol_node_t *) node)->closing_loc.end;
7462 break;
7463 default:
7464 return false;
7465 }
7466
7467 return (end != NULL) && (end[-1] == ':');
7468}
7469
7473static pm_symbol_node_t *
7474pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
7475 pm_symbol_node_t *new_node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7476
7477 *new_node = (pm_symbol_node_t) {
7478 {
7479 .type = PM_SYMBOL_NODE,
7480 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7481 .node_id = PM_NODE_IDENTIFY(parser),
7482 .location = {
7483 .start = opening->start,
7484 .end = closing->end
7485 }
7486 },
7487 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7488 .value_loc = node->content_loc,
7489 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7490 .unescaped = node->unescaped
7491 };
7492
7493 pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
7494 pm_node_flag_set((pm_node_t *) new_node, parse_symbol_encoding(parser, &content, &node->unescaped, true));
7495
7496 // We are explicitly _not_ using pm_node_destroy here because we don't want
7497 // to trash the unescaped string. We could instead copy the string if we
7498 // know that it is owned, but we're taking the fast path for now.
7499 xfree(node);
7500
7501 return new_node;
7502}
7503
7507static pm_string_node_t *
7508pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
7509 pm_string_node_t *new_node = PM_NODE_ALLOC(parser, pm_string_node_t);
7510 pm_node_flags_t flags = 0;
7511
7512 switch (parser->frozen_string_literal) {
7513 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
7514 flags = PM_STRING_FLAGS_MUTABLE;
7515 break;
7516 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
7517 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
7518 break;
7519 }
7520
7521 *new_node = (pm_string_node_t) {
7522 {
7523 .type = PM_STRING_NODE,
7524 .flags = flags,
7525 .node_id = PM_NODE_IDENTIFY(parser),
7526 .location = node->base.location
7527 },
7528 .opening_loc = node->opening_loc,
7529 .content_loc = node->value_loc,
7530 .closing_loc = node->closing_loc,
7531 .unescaped = node->unescaped
7532 };
7533
7534 // We are explicitly _not_ using pm_node_destroy here because we don't want
7535 // to trash the unescaped string. We could instead copy the string if we
7536 // know that it is owned, but we're taking the fast path for now.
7537 xfree(node);
7538
7539 return new_node;
7540}
7541
7545static pm_true_node_t *
7546pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
7547 assert(token->type == PM_TOKEN_KEYWORD_TRUE);
7548 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
7549
7550 *node = (pm_true_node_t) {{
7551 .type = PM_TRUE_NODE,
7552 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7553 .node_id = PM_NODE_IDENTIFY(parser),
7554 .location = PM_LOCATION_TOKEN_VALUE(token)
7555 }};
7556
7557 return node;
7558}
7559
7563static pm_true_node_t *
7564pm_true_node_synthesized_create(pm_parser_t *parser) {
7565 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
7566
7567 *node = (pm_true_node_t) {{
7568 .type = PM_TRUE_NODE,
7569 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7570 .node_id = PM_NODE_IDENTIFY(parser),
7571 .location = { .start = parser->start, .end = parser->end }
7572 }};
7573
7574 return node;
7575}
7576
7580static pm_undef_node_t *
7581pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
7582 assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
7583 pm_undef_node_t *node = PM_NODE_ALLOC(parser, pm_undef_node_t);
7584
7585 *node = (pm_undef_node_t) {
7586 {
7587 .type = PM_UNDEF_NODE,
7588 .node_id = PM_NODE_IDENTIFY(parser),
7589 .location = PM_LOCATION_TOKEN_VALUE(token),
7590 },
7591 .keyword_loc = PM_LOCATION_TOKEN_VALUE(token),
7592 .names = { 0 }
7593 };
7594
7595 return node;
7596}
7597
7601static void
7602pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
7603 node->base.location.end = name->location.end;
7604 pm_node_list_append(&node->names, name);
7605}
7606
7610static pm_unless_node_t *
7611pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
7612 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7613 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
7614
7615 const uint8_t *end;
7616 if (statements != NULL) {
7617 end = statements->base.location.end;
7618 } else {
7619 end = predicate->location.end;
7620 }
7621
7622 *node = (pm_unless_node_t) {
7623 {
7624 .type = PM_UNLESS_NODE,
7625 .flags = PM_NODE_FLAG_NEWLINE,
7626 .node_id = PM_NODE_IDENTIFY(parser),
7627 .location = {
7628 .start = keyword->start,
7629 .end = end
7630 },
7631 },
7632 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7633 .predicate = predicate,
7634 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
7635 .statements = statements,
7636 .else_clause = NULL,
7637 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
7638 };
7639
7640 return node;
7641}
7642
7646static pm_unless_node_t *
7647pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
7648 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7649 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
7650
7651 pm_statements_node_t *statements = pm_statements_node_create(parser);
7652 pm_statements_node_body_append(parser, statements, statement, true);
7653
7654 *node = (pm_unless_node_t) {
7655 {
7656 .type = PM_UNLESS_NODE,
7657 .flags = PM_NODE_FLAG_NEWLINE,
7658 .node_id = PM_NODE_IDENTIFY(parser),
7659 .location = {
7660 .start = statement->location.start,
7661 .end = predicate->location.end
7662 },
7663 },
7664 .keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword),
7665 .predicate = predicate,
7666 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7667 .statements = statements,
7668 .else_clause = NULL,
7669 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
7670 };
7671
7672 return node;
7673}
7674
7675static inline void
7676pm_unless_node_end_keyword_loc_set(pm_unless_node_t *node, const pm_token_t *end_keyword) {
7677 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
7678 node->base.location.end = end_keyword->end;
7679}
7680
7686static void
7687pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statements) {
7688 assert(parser->current_block_exits != NULL);
7689
7690 // All of the block exits that we want to remove should be within the
7691 // statements, and since we are modifying the statements, we shouldn't have
7692 // to check the end location.
7693 const uint8_t *start = statements->base.location.start;
7694
7695 for (size_t index = parser->current_block_exits->size; index > 0; index--) {
7696 pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
7697 if (block_exit->location.start < start) break;
7698
7699 // Implicitly remove from the list by lowering the size.
7700 parser->current_block_exits->size--;
7701 }
7702}
7703
7707static pm_until_node_t *
7708pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7709 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7710 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7711
7712 *node = (pm_until_node_t) {
7713 {
7714 .type = PM_UNTIL_NODE,
7715 .flags = flags,
7716 .node_id = PM_NODE_IDENTIFY(parser),
7717 .location = {
7718 .start = keyword->start,
7719 .end = closing->end,
7720 },
7721 },
7722 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7723 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7724 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7725 .predicate = predicate,
7726 .statements = statements
7727 };
7728
7729 return node;
7730}
7731
7735static pm_until_node_t *
7736pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7737 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7738 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7739 pm_loop_modifier_block_exits(parser, statements);
7740
7741 *node = (pm_until_node_t) {
7742 {
7743 .type = PM_UNTIL_NODE,
7744 .flags = flags,
7745 .node_id = PM_NODE_IDENTIFY(parser),
7746 .location = {
7747 .start = statements->base.location.start,
7748 .end = predicate->location.end,
7749 },
7750 },
7751 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7752 .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7753 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7754 .predicate = predicate,
7755 .statements = statements
7756 };
7757
7758 return node;
7759}
7760
7764static pm_when_node_t *
7765pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
7766 pm_when_node_t *node = PM_NODE_ALLOC(parser, pm_when_node_t);
7767
7768 *node = (pm_when_node_t) {
7769 {
7770 .type = PM_WHEN_NODE,
7771 .node_id = PM_NODE_IDENTIFY(parser),
7772 .location = {
7773 .start = keyword->start,
7774 .end = NULL
7775 }
7776 },
7777 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7778 .statements = NULL,
7779 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7780 .conditions = { 0 }
7781 };
7782
7783 return node;
7784}
7785
7789static void
7790pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) {
7791 node->base.location.end = condition->location.end;
7792 pm_node_list_append(&node->conditions, condition);
7793}
7794
7798static inline void
7799pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_keyword) {
7800 node->base.location.end = then_keyword->end;
7801 node->then_keyword_loc = PM_LOCATION_TOKEN_VALUE(then_keyword);
7802}
7803
7807static void
7808pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
7809 if (statements->base.location.end > node->base.location.end) {
7810 node->base.location.end = statements->base.location.end;
7811 }
7812
7813 node->statements = statements;
7814}
7815
7819static pm_while_node_t *
7820pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7821 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7822 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7823
7824 *node = (pm_while_node_t) {
7825 {
7826 .type = PM_WHILE_NODE,
7827 .flags = flags,
7828 .node_id = PM_NODE_IDENTIFY(parser),
7829 .location = {
7830 .start = keyword->start,
7831 .end = closing->end
7832 },
7833 },
7834 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7835 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7836 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7837 .predicate = predicate,
7838 .statements = statements
7839 };
7840
7841 return node;
7842}
7843
7847static pm_while_node_t *
7848pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7849 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7850 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7851 pm_loop_modifier_block_exits(parser, statements);
7852
7853 *node = (pm_while_node_t) {
7854 {
7855 .type = PM_WHILE_NODE,
7856 .flags = flags,
7857 .node_id = PM_NODE_IDENTIFY(parser),
7858 .location = {
7859 .start = statements->base.location.start,
7860 .end = predicate->location.end
7861 },
7862 },
7863 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7864 .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7865 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7866 .predicate = predicate,
7867 .statements = statements
7868 };
7869
7870 return node;
7871}
7872
7876static pm_while_node_t *
7877pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
7878 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7879
7880 *node = (pm_while_node_t) {
7881 {
7882 .type = PM_WHILE_NODE,
7883 .node_id = PM_NODE_IDENTIFY(parser),
7884 .location = PM_LOCATION_NULL_VALUE(parser)
7885 },
7886 .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7887 .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7888 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
7889 .predicate = predicate,
7890 .statements = statements
7891 };
7892
7893 return node;
7894}
7895
7900static pm_x_string_node_t *
7901pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
7902 pm_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_x_string_node_t);
7903
7904 *node = (pm_x_string_node_t) {
7905 {
7906 .type = PM_X_STRING_NODE,
7907 .flags = PM_STRING_FLAGS_FROZEN,
7908 .node_id = PM_NODE_IDENTIFY(parser),
7909 .location = {
7910 .start = opening->start,
7911 .end = closing->end
7912 },
7913 },
7914 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
7915 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7916 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
7917 .unescaped = *unescaped
7918 };
7919
7920 return node;
7921}
7922
7926static inline pm_x_string_node_t *
7927pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7928 return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7929}
7930
7934static pm_yield_node_t *
7935pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
7936 pm_yield_node_t *node = PM_NODE_ALLOC(parser, pm_yield_node_t);
7937
7938 const uint8_t *end;
7939 if (rparen_loc->start != NULL) {
7940 end = rparen_loc->end;
7941 } else if (arguments != NULL) {
7942 end = arguments->base.location.end;
7943 } else if (lparen_loc->start != NULL) {
7944 end = lparen_loc->end;
7945 } else {
7946 end = keyword->end;
7947 }
7948
7949 *node = (pm_yield_node_t) {
7950 {
7951 .type = PM_YIELD_NODE,
7952 .node_id = PM_NODE_IDENTIFY(parser),
7953 .location = {
7954 .start = keyword->start,
7955 .end = end
7956 },
7957 },
7958 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7959 .lparen_loc = *lparen_loc,
7960 .arguments = arguments,
7961 .rparen_loc = *rparen_loc
7962 };
7963
7964 return node;
7965}
7966
7967#undef PM_NODE_ALLOC
7968#undef PM_NODE_IDENTIFY
7969
7974static int
7975pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
7976 pm_scope_t *scope = parser->current_scope;
7977 int depth = 0;
7978
7979 while (scope != NULL) {
7980 if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
7981 if (scope->closed) break;
7982
7983 scope = scope->previous;
7984 depth++;
7985 }
7986
7987 return -1;
7988}
7989
7995static inline int
7996pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
7997 return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
7998}
7999
8003static inline void
8004pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
8005 pm_locals_write(&parser->current_scope->locals, constant_id, start, end, reads);
8006}
8007
8011static pm_constant_id_t
8012pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
8013 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, start, end);
8014 if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
8015 return constant_id;
8016}
8017
8021static inline pm_constant_id_t
8022pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
8023 return pm_parser_local_add_location(parser, token->start, token->end, reads);
8024}
8025
8029static pm_constant_id_t
8030pm_parser_local_add_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
8031 pm_constant_id_t constant_id = pm_parser_constant_id_owned(parser, start, length);
8032 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
8033 return constant_id;
8034}
8035
8039static pm_constant_id_t
8040pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
8041 pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
8042 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
8043 return constant_id;
8044}
8045
8053static bool
8054pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
8055 // We want to check whether the parameter name is a numbered parameter or
8056 // not.
8057 pm_refute_numbered_parameter(parser, name->start, name->end);
8058
8059 // Otherwise we'll fetch the constant id for the parameter name and check
8060 // whether it's already in the current scope.
8061 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
8062
8063 if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
8064 // Add an error if the parameter doesn't start with _ and has been seen before
8065 if ((name->start < name->end) && (*name->start != '_')) {
8066 pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
8067 }
8068 return true;
8069 }
8070 return false;
8071}
8072
8076static void
8077pm_parser_scope_pop(pm_parser_t *parser) {
8078 pm_scope_t *scope = parser->current_scope;
8079 parser->current_scope = scope->previous;
8080 pm_locals_free(&scope->locals);
8081 pm_node_list_free(&scope->implicit_parameters);
8082 xfree(scope);
8083}
8084
8085/******************************************************************************/
8086/* Stack helpers */
8087/******************************************************************************/
8088
8092static inline void
8093pm_state_stack_push(pm_state_stack_t *stack, bool value) {
8094 *stack = (*stack << 1) | (value & 1);
8095}
8096
8100static inline void
8101pm_state_stack_pop(pm_state_stack_t *stack) {
8102 *stack >>= 1;
8103}
8104
8108static inline bool
8109pm_state_stack_p(const pm_state_stack_t *stack) {
8110 return *stack & 1;
8111}
8112
8113static inline void
8114pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
8115 // Use the negation of the value to prevent stack overflow.
8116 pm_state_stack_push(&parser->accepts_block_stack, !value);
8117}
8118
8119static inline void
8120pm_accepts_block_stack_pop(pm_parser_t *parser) {
8121 pm_state_stack_pop(&parser->accepts_block_stack);
8122}
8123
8124static inline bool
8125pm_accepts_block_stack_p(pm_parser_t *parser) {
8126 return !pm_state_stack_p(&parser->accepts_block_stack);
8127}
8128
8129static inline void
8130pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
8131 pm_state_stack_push(&parser->do_loop_stack, value);
8132}
8133
8134static inline void
8135pm_do_loop_stack_pop(pm_parser_t *parser) {
8136 pm_state_stack_pop(&parser->do_loop_stack);
8137}
8138
8139static inline bool
8140pm_do_loop_stack_p(pm_parser_t *parser) {
8141 return pm_state_stack_p(&parser->do_loop_stack);
8142}
8143
8144/******************************************************************************/
8145/* Lexer check helpers */
8146/******************************************************************************/
8147
8152static inline uint8_t
8153peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
8154 if (cursor < parser->end) {
8155 return *cursor;
8156 } else {
8157 return '\0';
8158 }
8159}
8160
8166static inline uint8_t
8167peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
8168 return peek_at(parser, parser->current.end + offset);
8169}
8170
8175static inline uint8_t
8176peek(const pm_parser_t *parser) {
8177 return peek_at(parser, parser->current.end);
8178}
8179
8184static inline bool
8185match(pm_parser_t *parser, uint8_t value) {
8186 if (peek(parser) == value) {
8187 parser->current.end++;
8188 return true;
8189 }
8190 return false;
8191}
8192
8197static inline size_t
8198match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
8199 if (peek_at(parser, cursor) == '\n') {
8200 return 1;
8201 }
8202 if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
8203 return 2;
8204 }
8205 return 0;
8206}
8207
8213static inline size_t
8214match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
8215 return match_eol_at(parser, parser->current.end + offset);
8216}
8217
8223static inline size_t
8224match_eol(pm_parser_t *parser) {
8225 return match_eol_at(parser, parser->current.end);
8226}
8227
8231static inline const uint8_t *
8232next_newline(const uint8_t *cursor, ptrdiff_t length) {
8233 assert(length >= 0);
8234
8235 // Note that it's okay for us to use memchr here to look for \n because none
8236 // of the encodings that we support have \n as a component of a multi-byte
8237 // character.
8238 return memchr(cursor, '\n', (size_t) length);
8239}
8240
8244static inline bool
8245ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
8246 return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
8247}
8248
8253static bool
8254parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
8255 const pm_encoding_t *encoding = pm_encoding_find(start, end);
8256
8257 if (encoding != NULL) {
8258 if (parser->encoding != encoding) {
8259 parser->encoding = encoding;
8260 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
8261 }
8262
8263 parser->encoding_changed = (encoding != PM_ENCODING_UTF_8_ENTRY);
8264 return true;
8265 }
8266
8267 return false;
8268}
8269
8274static void
8275parser_lex_magic_comment_encoding(pm_parser_t *parser) {
8276 const uint8_t *cursor = parser->current.start + 1;
8277 const uint8_t *end = parser->current.end;
8278
8279 bool separator = false;
8280 while (true) {
8281 if (end - cursor <= 6) return;
8282 switch (cursor[6]) {
8283 case 'C': case 'c': cursor += 6; continue;
8284 case 'O': case 'o': cursor += 5; continue;
8285 case 'D': case 'd': cursor += 4; continue;
8286 case 'I': case 'i': cursor += 3; continue;
8287 case 'N': case 'n': cursor += 2; continue;
8288 case 'G': case 'g': cursor += 1; continue;
8289 case '=': case ':':
8290 separator = true;
8291 cursor += 6;
8292 break;
8293 default:
8294 cursor += 6;
8295 if (pm_char_is_whitespace(*cursor)) break;
8296 continue;
8297 }
8298 if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
8299 separator = false;
8300 }
8301
8302 while (true) {
8303 do {
8304 if (++cursor >= end) return;
8305 } while (pm_char_is_whitespace(*cursor));
8306
8307 if (separator) break;
8308 if (*cursor != '=' && *cursor != ':') return;
8309
8310 separator = true;
8311 cursor++;
8312 }
8313
8314 const uint8_t *value_start = cursor;
8315 while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
8316
8317 if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
8318 // If we were unable to parse the encoding value, then we've got an
8319 // issue because we didn't understand the encoding that the user was
8320 // trying to use. In this case we'll keep using the default encoding but
8321 // add an error to the parser to indicate an unsuccessful parse.
8322 pm_parser_err(parser, value_start, cursor, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
8323 }
8324}
8325
8326typedef enum {
8327 PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE,
8328 PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE,
8329 PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID
8330} pm_magic_comment_boolean_value_t;
8331
8336static pm_magic_comment_boolean_value_t
8337parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t value_length) {
8338 if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "true", 4) == 0) {
8339 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE;
8340 } else if (value_length == 5 && pm_strncasecmp(value_start, (const uint8_t *) "false", 5) == 0) {
8341 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE;
8342 } else {
8343 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID;
8344 }
8345}
8346
8347static inline bool
8348pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
8349 return b == '\'' || b == '"' || b == ':' || b == ';';
8350}
8351
8357static inline const uint8_t *
8358parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
8359 while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
8360 if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
8361 return cursor;
8362 }
8363 cursor++;
8364 }
8365 return NULL;
8366}
8367
8378static inline bool
8379parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
8380 bool result = true;
8381
8382 const uint8_t *start = parser->current.start + 1;
8383 const uint8_t *end = parser->current.end;
8384 if (end - start <= 7) return false;
8385
8386 const uint8_t *cursor;
8387 bool indicator = false;
8388
8389 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
8390 start = cursor + 3;
8391
8392 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
8393 end = cursor;
8394 indicator = true;
8395 } else {
8396 // If we have a start marker but not an end marker, then we cannot
8397 // have a magic comment.
8398 return false;
8399 }
8400 }
8401
8402 cursor = start;
8403 while (cursor < end) {
8404 while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
8405
8406 const uint8_t *key_start = cursor;
8407 while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
8408
8409 const uint8_t *key_end = cursor;
8410 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8411 if (cursor == end) break;
8412
8413 if (*cursor == ':') {
8414 cursor++;
8415 } else {
8416 if (!indicator) return false;
8417 continue;
8418 }
8419
8420 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8421 if (cursor == end) break;
8422
8423 const uint8_t *value_start;
8424 const uint8_t *value_end;
8425
8426 if (*cursor == '"') {
8427 value_start = ++cursor;
8428 for (; cursor < end && *cursor != '"'; cursor++) {
8429 if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
8430 }
8431 value_end = cursor;
8432 if (*cursor == '"') cursor++;
8433 } else {
8434 value_start = cursor;
8435 while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
8436 value_end = cursor;
8437 }
8438
8439 if (indicator) {
8440 while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
8441 } else {
8442 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8443 if (cursor != end) return false;
8444 }
8445
8446 // Here, we need to do some processing on the key to swap out dashes for
8447 // underscores. We only need to do this if there _is_ a dash in the key.
8448 pm_string_t key;
8449 const size_t key_length = (size_t) (key_end - key_start);
8450 const uint8_t *dash = pm_memchr(key_start, '-', key_length, parser->encoding_changed, parser->encoding);
8451
8452 if (dash == NULL) {
8453 pm_string_shared_init(&key, key_start, key_end);
8454 } else {
8455 uint8_t *buffer = xmalloc(key_length);
8456 if (buffer == NULL) break;
8457
8458 memcpy(buffer, key_start, key_length);
8459 buffer[dash - key_start] = '_';
8460
8461 while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
8462 buffer[dash - key_start] = '_';
8463 }
8464
8465 pm_string_owned_init(&key, buffer, key_length);
8466 }
8467
8468 // Finally, we can start checking the key against the list of known
8469 // magic comment keys, and potentially change state based on that.
8470 const uint8_t *key_source = pm_string_source(&key);
8471 uint32_t value_length = (uint32_t) (value_end - value_start);
8472
8473 // We only want to attempt to compare against encoding comments if it's
8474 // the first line in the file (or the second in the case of a shebang).
8475 if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
8476 if (
8477 (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
8478 (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
8479 ) {
8480 result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
8481 }
8482 }
8483
8484 if (key_length == 11) {
8485 if (pm_strncasecmp(key_source, (const uint8_t *) "warn_indent", 11) == 0) {
8486 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
8487 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
8488 PM_PARSER_WARN_TOKEN_FORMAT(
8489 parser,
8490 parser->current,
8491 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8492 (int) key_length,
8493 (const char *) key_source,
8494 (int) value_length,
8495 (const char *) value_start
8496 );
8497 break;
8498 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
8499 parser->warn_mismatched_indentation = false;
8500 break;
8501 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
8502 parser->warn_mismatched_indentation = true;
8503 break;
8504 }
8505 }
8506 } else if (key_length == 21) {
8507 if (pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
8508 // We only want to handle frozen string literal comments if it's
8509 // before any semantic tokens have been seen.
8510 if (semantic_token_seen) {
8511 pm_parser_warn_token(parser, &parser->current, PM_WARN_IGNORED_FROZEN_STRING_LITERAL);
8512 } else {
8513 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
8514 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
8515 PM_PARSER_WARN_TOKEN_FORMAT(
8516 parser,
8517 parser->current,
8518 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8519 (int) key_length,
8520 (const char *) key_source,
8521 (int) value_length,
8522 (const char *) value_start
8523 );
8524 break;
8525 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
8527 break;
8528 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
8530 break;
8531 }
8532 }
8533 }
8534 } else if (key_length == 24) {
8535 if (pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
8536 const uint8_t *cursor = parser->current.start;
8537 while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
8538
8539 if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
8540 pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
8541 } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
8542 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
8543 } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
8544 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
8545 } else if (value_length == 23 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_everything", 23) == 0) {
8546 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING);
8547 } else if (value_length == 17 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_copy", 17) == 0) {
8548 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY);
8549 } else {
8550 PM_PARSER_WARN_TOKEN_FORMAT(
8551 parser,
8552 parser->current,
8553 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8554 (int) key_length,
8555 (const char *) key_source,
8556 (int) value_length,
8557 (const char *) value_start
8558 );
8559 }
8560 }
8561 }
8562
8563 // When we're done, we want to free the string in case we had to
8564 // allocate memory for it.
8565 pm_string_free(&key);
8566
8567 // Allocate a new magic comment node to append to the parser's list.
8569 if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) {
8570 magic_comment->key_start = key_start;
8571 magic_comment->value_start = value_start;
8572 magic_comment->key_length = (uint32_t) key_length;
8573 magic_comment->value_length = value_length;
8574 pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
8575 }
8576 }
8577
8578 return result;
8579}
8580
8581/******************************************************************************/
8582/* Context manipulations */
8583/******************************************************************************/
8584
8585static bool
8586context_terminator(pm_context_t context, pm_token_t *token) {
8587 switch (context) {
8588 case PM_CONTEXT_MAIN:
8590 case PM_CONTEXT_DEFINED:
8592 case PM_CONTEXT_TERNARY:
8594 return token->type == PM_TOKEN_EOF;
8596 return token->type == PM_TOKEN_COMMA || token->type == PM_TOKEN_PARENTHESIS_RIGHT;
8597 case PM_CONTEXT_PREEXE:
8598 case PM_CONTEXT_POSTEXE:
8599 return token->type == PM_TOKEN_BRACE_RIGHT;
8600 case PM_CONTEXT_MODULE:
8601 case PM_CONTEXT_CLASS:
8602 case PM_CONTEXT_SCLASS:
8604 case PM_CONTEXT_DEF:
8606 return token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ENSURE;
8607 case PM_CONTEXT_WHILE:
8608 case PM_CONTEXT_UNTIL:
8609 case PM_CONTEXT_ELSE:
8610 case PM_CONTEXT_FOR:
8618 return token->type == PM_TOKEN_KEYWORD_END;
8620 return token->type == PM_TOKEN_KEYWORD_DO || token->type == PM_TOKEN_KEYWORD_THEN;
8622 return token->type == PM_TOKEN_KEYWORD_IN;
8624 return token->type == PM_TOKEN_KEYWORD_WHEN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
8625 case PM_CONTEXT_CASE_IN:
8626 return token->type == PM_TOKEN_KEYWORD_IN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
8627 case PM_CONTEXT_IF:
8628 case PM_CONTEXT_ELSIF:
8629 return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_ELSIF || token->type == PM_TOKEN_KEYWORD_END;
8630 case PM_CONTEXT_UNLESS:
8631 return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
8632 case PM_CONTEXT_EMBEXPR:
8633 return token->type == PM_TOKEN_EMBEXPR_END;
8635 return token->type == PM_TOKEN_BRACE_RIGHT;
8636 case PM_CONTEXT_PARENS:
8637 return token->type == PM_TOKEN_PARENTHESIS_RIGHT;
8638 case PM_CONTEXT_BEGIN:
8646 return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
8654 return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_END;
8656 return token->type == PM_TOKEN_BRACE_RIGHT;
8658 return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
8659 case PM_CONTEXT_NONE:
8660 return false;
8661 }
8662
8663 return false;
8664}
8665
8670static pm_context_t
8671context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
8672 pm_context_node_t *context_node = parser->current_context;
8673
8674 while (context_node != NULL) {
8675 if (context_terminator(context_node->context, token)) return context_node->context;
8676 context_node = context_node->prev;
8677 }
8678
8679 return PM_CONTEXT_NONE;
8680}
8681
8682static bool
8683context_push(pm_parser_t *parser, pm_context_t context) {
8684 pm_context_node_t *context_node = (pm_context_node_t *) xmalloc(sizeof(pm_context_node_t));
8685 if (context_node == NULL) return false;
8686
8687 *context_node = (pm_context_node_t) { .context = context, .prev = NULL };
8688
8689 if (parser->current_context == NULL) {
8690 parser->current_context = context_node;
8691 } else {
8692 context_node->prev = parser->current_context;
8693 parser->current_context = context_node;
8694 }
8695
8696 return true;
8697}
8698
8699static void
8700context_pop(pm_parser_t *parser) {
8701 pm_context_node_t *prev = parser->current_context->prev;
8702 xfree(parser->current_context);
8703 parser->current_context = prev;
8704}
8705
8706static bool
8707context_p(const pm_parser_t *parser, pm_context_t context) {
8708 pm_context_node_t *context_node = parser->current_context;
8709
8710 while (context_node != NULL) {
8711 if (context_node->context == context) return true;
8712 context_node = context_node->prev;
8713 }
8714
8715 return false;
8716}
8717
8718static bool
8719context_def_p(const pm_parser_t *parser) {
8720 pm_context_node_t *context_node = parser->current_context;
8721
8722 while (context_node != NULL) {
8723 switch (context_node->context) {
8724 case PM_CONTEXT_DEF:
8729 return true;
8730 case PM_CONTEXT_CLASS:
8734 case PM_CONTEXT_MODULE:
8738 case PM_CONTEXT_SCLASS:
8742 return false;
8743 default:
8744 context_node = context_node->prev;
8745 }
8746 }
8747
8748 return false;
8749}
8750
8755static const char *
8756context_human(pm_context_t context) {
8757 switch (context) {
8758 case PM_CONTEXT_NONE:
8759 assert(false && "unreachable");
8760 return "";
8761 case PM_CONTEXT_BEGIN: return "begin statement";
8762 case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
8763 case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
8764 case PM_CONTEXT_CASE_WHEN: return "'when' clause";
8765 case PM_CONTEXT_CASE_IN: return "'in' clause";
8766 case PM_CONTEXT_CLASS: return "class definition";
8767 case PM_CONTEXT_DEF: return "method definition";
8768 case PM_CONTEXT_DEF_PARAMS: return "method parameters";
8769 case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
8770 case PM_CONTEXT_DEFINED: return "'defined?' expression";
8771 case PM_CONTEXT_ELSE:
8778 case PM_CONTEXT_SCLASS_ELSE: return "'else' clause";
8779 case PM_CONTEXT_ELSIF: return "'elsif' clause";
8780 case PM_CONTEXT_EMBEXPR: return "embedded expression";
8787 case PM_CONTEXT_SCLASS_ENSURE: return "'ensure' clause";
8788 case PM_CONTEXT_FOR: return "for loop";
8789 case PM_CONTEXT_FOR_INDEX: return "for loop index";
8790 case PM_CONTEXT_IF: return "if statement";
8791 case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
8792 case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
8793 case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
8794 case PM_CONTEXT_MAIN: return "top level context";
8795 case PM_CONTEXT_MODULE: return "module definition";
8796 case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
8797 case PM_CONTEXT_PARENS: return "parentheses";
8798 case PM_CONTEXT_POSTEXE: return "'END' block";
8799 case PM_CONTEXT_PREDICATE: return "predicate";
8800 case PM_CONTEXT_PREEXE: return "'BEGIN' block";
8808 case PM_CONTEXT_SCLASS_RESCUE: return "'rescue' clause";
8809 case PM_CONTEXT_SCLASS: return "singleton class definition";
8810 case PM_CONTEXT_TERNARY: return "ternary expression";
8811 case PM_CONTEXT_UNLESS: return "unless statement";
8812 case PM_CONTEXT_UNTIL: return "until statement";
8813 case PM_CONTEXT_WHILE: return "while statement";
8814 }
8815
8816 assert(false && "unreachable");
8817 return "";
8818}
8819
8820/******************************************************************************/
8821/* Specific token lexers */
8822/******************************************************************************/
8823
8824static inline void
8825pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
8826 if (invalid != NULL) {
8827 pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
8828 pm_parser_err(parser, invalid, invalid + 1, diag_id);
8829 }
8830}
8831
8832static size_t
8833pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
8834 const uint8_t *invalid = NULL;
8835 size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
8836 pm_strspn_number_validate(parser, string, length, invalid);
8837 return length;
8838}
8839
8840static size_t
8841pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8842 const uint8_t *invalid = NULL;
8843 size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
8844 pm_strspn_number_validate(parser, string, length, invalid);
8845 return length;
8846}
8847
8848static size_t
8849pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8850 const uint8_t *invalid = NULL;
8851 size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
8852 pm_strspn_number_validate(parser, string, length, invalid);
8853 return length;
8854}
8855
8856static size_t
8857pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8858 const uint8_t *invalid = NULL;
8859 size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
8860 pm_strspn_number_validate(parser, string, length, invalid);
8861 return length;
8862}
8863
8864static pm_token_type_t
8865lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
8866 pm_token_type_t type = PM_TOKEN_INTEGER;
8867
8868 // Here we're going to attempt to parse the optional decimal portion of a
8869 // float. If it's not there, then it's okay and we'll just continue on.
8870 if (peek(parser) == '.') {
8871 if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8872 parser->current.end += 2;
8873 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8874 type = PM_TOKEN_FLOAT;
8875 } else {
8876 // If we had a . and then something else, then it's not a float
8877 // suffix on a number it's a method call or something else.
8878 return type;
8879 }
8880 }
8881
8882 // Here we're going to attempt to parse the optional exponent portion of a
8883 // float. If it's not there, it's okay and we'll just continue on.
8884 if ((peek(parser) == 'e') || (peek(parser) == 'E')) {
8885 if ((peek_offset(parser, 1) == '+') || (peek_offset(parser, 1) == '-')) {
8886 parser->current.end += 2;
8887
8888 if (pm_char_is_decimal_digit(peek(parser))) {
8889 parser->current.end++;
8890 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8891 } else {
8892 pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
8893 }
8894 } else if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8895 parser->current.end++;
8896 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8897 } else {
8898 return type;
8899 }
8900
8901 *seen_e = true;
8902 type = PM_TOKEN_FLOAT;
8903 }
8904
8905 return type;
8906}
8907
8908static pm_token_type_t
8909lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8910 pm_token_type_t type = PM_TOKEN_INTEGER;
8911 *seen_e = false;
8912
8913 if (peek_offset(parser, -1) == '0') {
8914 switch (*parser->current.end) {
8915 // 0d1111 is a decimal number
8916 case 'd':
8917 case 'D':
8918 parser->current.end++;
8919 if (pm_char_is_decimal_digit(peek(parser))) {
8920 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8921 } else {
8922 match(parser, '_');
8923 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
8924 }
8925
8926 break;
8927
8928 // 0b1111 is a binary number
8929 case 'b':
8930 case 'B':
8931 parser->current.end++;
8932 if (pm_char_is_binary_digit(peek(parser))) {
8933 parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
8934 } else {
8935 match(parser, '_');
8936 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
8937 }
8938
8939 parser->integer_base = PM_INTEGER_BASE_FLAGS_BINARY;
8940 break;
8941
8942 // 0o1111 is an octal number
8943 case 'o':
8944 case 'O':
8945 parser->current.end++;
8946 if (pm_char_is_octal_digit(peek(parser))) {
8947 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8948 } else {
8949 match(parser, '_');
8950 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
8951 }
8952
8953 parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
8954 break;
8955
8956 // 01111 is an octal number
8957 case '_':
8958 case '0':
8959 case '1':
8960 case '2':
8961 case '3':
8962 case '4':
8963 case '5':
8964 case '6':
8965 case '7':
8966 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8967 parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
8968 break;
8969
8970 // 0x1111 is a hexadecimal number
8971 case 'x':
8972 case 'X':
8973 parser->current.end++;
8974 if (pm_char_is_hexadecimal_digit(peek(parser))) {
8975 parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
8976 } else {
8977 match(parser, '_');
8978 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
8979 }
8980
8981 parser->integer_base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
8982 break;
8983
8984 // 0.xxx is a float
8985 case '.': {
8986 type = lex_optional_float_suffix(parser, seen_e);
8987 break;
8988 }
8989
8990 // 0exxx is a float
8991 case 'e':
8992 case 'E': {
8993 type = lex_optional_float_suffix(parser, seen_e);
8994 break;
8995 }
8996 }
8997 } else {
8998 // If it didn't start with a 0, then we'll lex as far as we can into a
8999 // decimal number.
9000 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
9001
9002 // Afterward, we'll lex as far as we can into an optional float suffix.
9003 type = lex_optional_float_suffix(parser, seen_e);
9004 }
9005
9006 // At this point we have a completed number, but we want to provide the user
9007 // with a good experience if they put an additional .xxx fractional
9008 // component on the end, so we'll check for that here.
9009 if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
9010 const uint8_t *fraction_start = parser->current.end;
9011 const uint8_t *fraction_end = parser->current.end + 2;
9012 fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
9013 pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
9014 }
9015
9016 return type;
9017}
9018
9019static pm_token_type_t
9020lex_numeric(pm_parser_t *parser) {
9021 pm_token_type_t type = PM_TOKEN_INTEGER;
9022 parser->integer_base = PM_INTEGER_BASE_FLAGS_DECIMAL;
9023
9024 if (parser->current.end < parser->end) {
9025 bool seen_e = false;
9026 type = lex_numeric_prefix(parser, &seen_e);
9027
9028 const uint8_t *end = parser->current.end;
9029 pm_token_type_t suffix_type = type;
9030
9031 if (type == PM_TOKEN_INTEGER) {
9032 if (match(parser, 'r')) {
9033 suffix_type = PM_TOKEN_INTEGER_RATIONAL;
9034
9035 if (match(parser, 'i')) {
9036 suffix_type = PM_TOKEN_INTEGER_RATIONAL_IMAGINARY;
9037 }
9038 } else if (match(parser, 'i')) {
9039 suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
9040 }
9041 } else {
9042 if (!seen_e && match(parser, 'r')) {
9043 suffix_type = PM_TOKEN_FLOAT_RATIONAL;
9044
9045 if (match(parser, 'i')) {
9046 suffix_type = PM_TOKEN_FLOAT_RATIONAL_IMAGINARY;
9047 }
9048 } else if (match(parser, 'i')) {
9049 suffix_type = PM_TOKEN_FLOAT_IMAGINARY;
9050 }
9051 }
9052
9053 const uint8_t b = peek(parser);
9054 if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
9055 parser->current.end = end;
9056 } else {
9057 type = suffix_type;
9058 }
9059 }
9060
9061 return type;
9062}
9063
9064static pm_token_type_t
9065lex_global_variable(pm_parser_t *parser) {
9066 if (parser->current.end >= parser->end) {
9067 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
9068 return PM_TOKEN_GLOBAL_VARIABLE;
9069 }
9070
9071 // True if multiple characters are allowed after the declaration of the
9072 // global variable. Not true when it starts with "$-".
9073 bool allow_multiple = true;
9074
9075 switch (*parser->current.end) {
9076 case '~': // $~: match-data
9077 case '*': // $*: argv
9078 case '$': // $$: pid
9079 case '?': // $?: last status
9080 case '!': // $!: error string
9081 case '@': // $@: error position
9082 case '/': // $/: input record separator
9083 case '\\': // $\: output record separator
9084 case ';': // $;: field separator
9085 case ',': // $,: output field separator
9086 case '.': // $.: last read line number
9087 case '=': // $=: ignorecase
9088 case ':': // $:: load path
9089 case '<': // $<: reading filename
9090 case '>': // $>: default output handle
9091 case '\"': // $": already loaded files
9092 parser->current.end++;
9093 return PM_TOKEN_GLOBAL_VARIABLE;
9094
9095 case '&': // $&: last match
9096 case '`': // $`: string before last match
9097 case '\'': // $': string after last match
9098 case '+': // $+: string matches last paren.
9099 parser->current.end++;
9100 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_BACK_REFERENCE;
9101
9102 case '0': {
9103 parser->current.end++;
9104 size_t width;
9105
9106 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
9107 do {
9108 parser->current.end += width;
9109 } while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
9110
9111 // $0 isn't allowed to be followed by anything.
9112 pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9113 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
9114 }
9115
9116 return PM_TOKEN_GLOBAL_VARIABLE;
9117 }
9118
9119 case '1':
9120 case '2':
9121 case '3':
9122 case '4':
9123 case '5':
9124 case '6':
9125 case '7':
9126 case '8':
9127 case '9':
9128 parser->current.end += pm_strspn_decimal_digit(parser->current.end, parser->end - parser->current.end);
9129 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_NUMBERED_REFERENCE;
9130
9131 case '-':
9132 parser->current.end++;
9133 allow_multiple = false;
9135 default: {
9136 size_t width;
9137
9138 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
9139 do {
9140 parser->current.end += width;
9141 } while (allow_multiple && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
9142 } else if (pm_char_is_whitespace(peek(parser))) {
9143 // If we get here, then we have a $ followed by whitespace,
9144 // which is not allowed.
9145 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
9146 } else {
9147 // If we get here, then we have a $ followed by something that
9148 // isn't recognized as a global variable.
9149 pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9150 const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9151 PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
9152 }
9153
9154 return PM_TOKEN_GLOBAL_VARIABLE;
9155 }
9156 }
9157}
9158
9171static inline pm_token_type_t
9172lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
9173 if (memcmp(current_start, value, vlen) == 0) {
9174 pm_lex_state_t last_state = parser->lex_state;
9175
9176 if (parser->lex_state & PM_LEX_STATE_FNAME) {
9177 lex_state_set(parser, PM_LEX_STATE_ENDFN);
9178 } else {
9179 lex_state_set(parser, state);
9180 if (state == PM_LEX_STATE_BEG) {
9181 parser->command_start = true;
9182 }
9183
9184 if ((modifier_type != PM_TOKEN_EOF) && !(last_state & (PM_LEX_STATE_BEG | PM_LEX_STATE_LABELED | PM_LEX_STATE_CLASS))) {
9185 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
9186 return modifier_type;
9187 }
9188 }
9189
9190 return type;
9191 }
9192
9193 return PM_TOKEN_EOF;
9194}
9195
9196static pm_token_type_t
9197lex_identifier(pm_parser_t *parser, bool previous_command_start) {
9198 // Lex as far as we can into the current identifier.
9199 size_t width;
9200 const uint8_t *end = parser->end;
9201 const uint8_t *current_start = parser->current.start;
9202 const uint8_t *current_end = parser->current.end;
9203 bool encoding_changed = parser->encoding_changed;
9204
9205 if (encoding_changed) {
9206 while ((width = char_is_identifier(parser, current_end, end - current_end)) > 0) {
9207 current_end += width;
9208 }
9209 } else {
9210 while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
9211 current_end += width;
9212 }
9213 }
9214 parser->current.end = current_end;
9215
9216 // Now cache the length of the identifier so that we can quickly compare it
9217 // against known keywords.
9218 width = (size_t) (current_end - current_start);
9219
9220 if (current_end < end) {
9221 if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
9222 // First we'll attempt to extend the identifier by a ! or ?. Then we'll
9223 // check if we're returning the defined? keyword or just an identifier.
9224 width++;
9225
9226 if (
9227 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
9228 (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
9229 ) {
9230 // If we're in a position where we can accept a : at the end of an
9231 // identifier, then we'll optionally accept it.
9232 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
9233 (void) match(parser, ':');
9234 return PM_TOKEN_LABEL;
9235 }
9236
9237 if (parser->lex_state != PM_LEX_STATE_DOT) {
9238 if (width == 8 && (lex_keyword(parser, current_start, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
9239 return PM_TOKEN_KEYWORD_DEFINED;
9240 }
9241 }
9242
9243 return PM_TOKEN_METHOD_NAME;
9244 }
9245
9246 if (lex_state_p(parser, PM_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
9247 // If we're in a position where we can accept a = at the end of an
9248 // identifier, then we'll optionally accept it.
9249 return PM_TOKEN_IDENTIFIER;
9250 }
9251
9252 if (
9253 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
9254 peek(parser) == ':' && peek_offset(parser, 1) != ':'
9255 ) {
9256 // If we're in a position where we can accept a : at the end of an
9257 // identifier, then we'll optionally accept it.
9258 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
9259 (void) match(parser, ':');
9260 return PM_TOKEN_LABEL;
9261 }
9262 }
9263
9264 if (parser->lex_state != PM_LEX_STATE_DOT) {
9265 pm_token_type_t type;
9266 switch (width) {
9267 case 2:
9268 if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
9269 if (pm_do_loop_stack_p(parser)) {
9270 return PM_TOKEN_KEYWORD_DO_LOOP;
9271 }
9272 return PM_TOKEN_KEYWORD_DO;
9273 }
9274
9275 if ((type = lex_keyword(parser, current_start, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
9276 if ((type = lex_keyword(parser, current_start, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9277 if ((type = lex_keyword(parser, current_start, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9278 break;
9279 case 3:
9280 if ((type = lex_keyword(parser, current_start, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9281 if ((type = lex_keyword(parser, current_start, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9282 if ((type = lex_keyword(parser, current_start, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9283 if ((type = lex_keyword(parser, current_start, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9284 if ((type = lex_keyword(parser, current_start, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9285 if ((type = lex_keyword(parser, current_start, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9286 if ((type = lex_keyword(parser, current_start, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9287 break;
9288 case 4:
9289 if ((type = lex_keyword(parser, current_start, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9290 if ((type = lex_keyword(parser, current_start, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9291 if ((type = lex_keyword(parser, current_start, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9292 if ((type = lex_keyword(parser, current_start, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9293 if ((type = lex_keyword(parser, current_start, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9294 if ((type = lex_keyword(parser, current_start, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9295 if ((type = lex_keyword(parser, current_start, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9296 if ((type = lex_keyword(parser, current_start, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9297 break;
9298 case 5:
9299 if ((type = lex_keyword(parser, current_start, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9300 if ((type = lex_keyword(parser, current_start, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9301 if ((type = lex_keyword(parser, current_start, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9302 if ((type = lex_keyword(parser, current_start, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9303 if ((type = lex_keyword(parser, current_start, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9304 if ((type = lex_keyword(parser, current_start, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9305 if ((type = lex_keyword(parser, current_start, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9306 if ((type = lex_keyword(parser, current_start, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9307 if ((type = lex_keyword(parser, current_start, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9308 if ((type = lex_keyword(parser, current_start, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9309 if ((type = lex_keyword(parser, current_start, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
9310 if ((type = lex_keyword(parser, current_start, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
9311 if ((type = lex_keyword(parser, current_start, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9312 break;
9313 case 6:
9314 if ((type = lex_keyword(parser, current_start, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9315 if ((type = lex_keyword(parser, current_start, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9316 if ((type = lex_keyword(parser, current_start, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
9317 if ((type = lex_keyword(parser, current_start, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9318 if ((type = lex_keyword(parser, current_start, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
9319 break;
9320 case 8:
9321 if ((type = lex_keyword(parser, current_start, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9322 if ((type = lex_keyword(parser, current_start, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9323 break;
9324 case 12:
9325 if ((type = lex_keyword(parser, current_start, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9326 break;
9327 }
9328 }
9329
9330 if (encoding_changed) {
9331 return parser->encoding->isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
9332 }
9333 return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
9334}
9335
9340static bool
9341current_token_starts_line(pm_parser_t *parser) {
9342 return (parser->current.start == parser->start) || (parser->current.start[-1] == '\n');
9343}
9344
9359static pm_token_type_t
9360lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
9361 // If there is no content following this #, then we're at the end of
9362 // the string and we can safely return string content.
9363 if (pound + 1 >= parser->end) {
9364 parser->current.end = pound + 1;
9365 return PM_TOKEN_STRING_CONTENT;
9366 }
9367
9368 // Now we'll check against the character that follows the #. If it constitutes
9369 // valid interplation, we'll handle that, otherwise we'll return
9370 // PM_TOKEN_NOT_PROVIDED.
9371 switch (pound[1]) {
9372 case '@': {
9373 // In this case we may have hit an embedded instance or class variable.
9374 if (pound + 2 >= parser->end) {
9375 parser->current.end = pound + 1;
9376 return PM_TOKEN_STRING_CONTENT;
9377 }
9378
9379 // If we're looking at a @ and there's another @, then we'll skip past the
9380 // second @.
9381 const uint8_t *variable = pound + 2;
9382 if (*variable == '@' && pound + 3 < parser->end) variable++;
9383
9384 if (char_is_identifier_start(parser, variable, parser->end - variable)) {
9385 // At this point we're sure that we've either hit an embedded instance
9386 // or class variable. In this case we'll first need to check if we've
9387 // already consumed content.
9388 if (pound > parser->current.start) {
9389 parser->current.end = pound;
9390 return PM_TOKEN_STRING_CONTENT;
9391 }
9392
9393 // Otherwise we need to return the embedded variable token
9394 // and then switch to the embedded variable lex mode.
9395 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
9396 parser->current.end = pound + 1;
9397 return PM_TOKEN_EMBVAR;
9398 }
9399
9400 // If we didn't get a valid interpolation, then this is just regular
9401 // string content. This is like if we get "#@-". In this case the caller
9402 // should keep lexing.
9403 parser->current.end = pound + 1;
9404 return PM_TOKEN_NOT_PROVIDED;
9405 }
9406 case '$':
9407 // In this case we may have hit an embedded global variable. If there's
9408 // not enough room, then we'll just return string content.
9409 if (pound + 2 >= parser->end) {
9410 parser->current.end = pound + 1;
9411 return PM_TOKEN_STRING_CONTENT;
9412 }
9413
9414 // This is the character that we're going to check to see if it is the
9415 // start of an identifier that would indicate that this is a global
9416 // variable.
9417 const uint8_t *check = pound + 2;
9418
9419 if (pound[2] == '-') {
9420 if (pound + 3 >= parser->end) {
9421 parser->current.end = pound + 2;
9422 return PM_TOKEN_STRING_CONTENT;
9423 }
9424
9425 check++;
9426 }
9427
9428 // If the character that we're going to check is the start of an
9429 // identifier, or we don't have a - and the character is a decimal number
9430 // or a global name punctuation character, then we've hit an embedded
9431 // global variable.
9432 if (
9433 char_is_identifier_start(parser, check, parser->end - check) ||
9434 (pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
9435 ) {
9436 // In this case we've hit an embedded global variable. First check to
9437 // see if we've already consumed content. If we have, then we need to
9438 // return that content as string content first.
9439 if (pound > parser->current.start) {
9440 parser->current.end = pound;
9441 return PM_TOKEN_STRING_CONTENT;
9442 }
9443
9444 // Otherwise, we need to return the embedded variable token and switch
9445 // to the embedded variable lex mode.
9446 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
9447 parser->current.end = pound + 1;
9448 return PM_TOKEN_EMBVAR;
9449 }
9450
9451 // In this case we've hit a #$ that does not indicate a global variable.
9452 // In this case we'll continue lexing past it.
9453 parser->current.end = pound + 1;
9454 return PM_TOKEN_NOT_PROVIDED;
9455 case '{':
9456 // In this case it's the start of an embedded expression. If we have
9457 // already consumed content, then we need to return that content as string
9458 // content first.
9459 if (pound > parser->current.start) {
9460 parser->current.end = pound;
9461 return PM_TOKEN_STRING_CONTENT;
9462 }
9463
9464 parser->enclosure_nesting++;
9465
9466 // Otherwise we'll skip past the #{ and begin lexing the embedded
9467 // expression.
9468 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBEXPR });
9469 parser->current.end = pound + 2;
9470 parser->command_start = true;
9471 pm_do_loop_stack_push(parser, false);
9472 return PM_TOKEN_EMBEXPR_BEGIN;
9473 default:
9474 // In this case we've hit a # that doesn't constitute interpolation. We'll
9475 // mark that by returning the not provided token type. This tells the
9476 // consumer to keep lexing forward.
9477 parser->current.end = pound + 1;
9478 return PM_TOKEN_NOT_PROVIDED;
9479 }
9480}
9481
9482static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
9483static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
9484static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
9485static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
9486static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
9487
9491static const bool ascii_printable_chars[] = {
9492 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9494 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9495 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9496 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9497 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
9498 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9499 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
9500};
9501
9502static inline bool
9503char_is_ascii_printable(const uint8_t b) {
9504 return (b < 0x80) && ascii_printable_chars[b];
9505}
9506
9511static inline uint8_t
9512escape_hexadecimal_digit(const uint8_t value) {
9513 return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
9514}
9515
9521static inline uint32_t
9522escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
9523 uint32_t value = 0;
9524 for (size_t index = 0; index < length; index++) {
9525 if (index != 0) value <<= 4;
9526 value |= escape_hexadecimal_digit(string[index]);
9527 }
9528
9529 // Here we're going to verify that the value is actually a valid Unicode
9530 // codepoint and not a surrogate pair.
9531 if (value >= 0xD800 && value <= 0xDFFF) {
9532 pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
9533 return 0xFFFD;
9534 }
9535
9536 return value;
9537}
9538
9542static inline uint8_t
9543escape_byte(uint8_t value, const uint8_t flags) {
9544 if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
9545 if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
9546 return value;
9547}
9548
9552static inline void
9553escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
9554 // \u escape sequences in string-like structures implicitly change the
9555 // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
9556 // literal.
9557 if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
9558 if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
9559 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
9560 }
9561
9563 }
9564
9565 if (!pm_buffer_append_unicode_codepoint(buffer, value)) {
9566 pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
9567 pm_buffer_append_byte(buffer, 0xEF);
9568 pm_buffer_append_byte(buffer, 0xBF);
9569 pm_buffer_append_byte(buffer, 0xBD);
9570 }
9571}
9572
9577static inline void
9578escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) {
9579 if (byte >= 0x80) {
9580 if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
9581 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
9582 }
9583
9584 parser->explicit_encoding = parser->encoding;
9585 }
9586
9587 pm_buffer_append_byte(buffer, byte);
9588}
9589
9605static inline void
9606escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
9607 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9608 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
9609 }
9610
9611 escape_write_byte_encoded(parser, buffer, byte);
9612}
9613
9617static inline void
9618escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
9619 size_t width;
9620 if (parser->encoding_changed) {
9621 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9622 } else {
9623 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
9624 }
9625
9626 if (width == 1) {
9627 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
9628 } else if (width > 1) {
9629 // Valid multibyte character. Just ignore escape.
9630 pm_buffer_t *b = (flags & PM_ESCAPE_FLAG_REGEXP) ? regular_expression_buffer : buffer;
9631 pm_buffer_append_bytes(b, parser->current.end, width);
9632 parser->current.end += width;
9633 } else {
9634 // Assume the next character wasn't meant to be part of this escape
9635 // sequence since it is invalid. Add an error and move on.
9636 parser->current.end++;
9637 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9638 }
9639}
9640
9646static void
9647escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *type) {
9648#define FLAG(value) ((value & PM_ESCAPE_FLAG_CONTROL) ? "\\C-" : (value & PM_ESCAPE_FLAG_META) ? "\\M-" : "")
9649
9650 PM_PARSER_WARN_TOKEN_FORMAT(
9651 parser,
9652 parser->current,
9653 PM_WARN_INVALID_CHARACTER,
9654 FLAG(flags),
9655 FLAG(flag),
9656 type
9657 );
9658
9659#undef FLAG
9660}
9661
9665static void
9666escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
9667 uint8_t peeked = peek(parser);
9668 switch (peeked) {
9669 case '\\': {
9670 parser->current.end++;
9671 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
9672 return;
9673 }
9674 case '\'': {
9675 parser->current.end++;
9676 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
9677 return;
9678 }
9679 case 'a': {
9680 parser->current.end++;
9681 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
9682 return;
9683 }
9684 case 'b': {
9685 parser->current.end++;
9686 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
9687 return;
9688 }
9689 case 'e': {
9690 parser->current.end++;
9691 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
9692 return;
9693 }
9694 case 'f': {
9695 parser->current.end++;
9696 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
9697 return;
9698 }
9699 case 'n': {
9700 parser->current.end++;
9701 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
9702 return;
9703 }
9704 case 'r': {
9705 parser->current.end++;
9706 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
9707 return;
9708 }
9709 case 's': {
9710 parser->current.end++;
9711 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
9712 return;
9713 }
9714 case 't': {
9715 parser->current.end++;
9716 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
9717 return;
9718 }
9719 case 'v': {
9720 parser->current.end++;
9721 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
9722 return;
9723 }
9724 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
9725 uint8_t value = (uint8_t) (*parser->current.end - '0');
9726 parser->current.end++;
9727
9728 if (pm_char_is_octal_digit(peek(parser))) {
9729 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9730 parser->current.end++;
9731
9732 if (pm_char_is_octal_digit(peek(parser))) {
9733 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9734 parser->current.end++;
9735 }
9736 }
9737
9738 value = escape_byte(value, flags);
9739 escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
9740 return;
9741 }
9742 case 'x': {
9743 const uint8_t *start = parser->current.end - 1;
9744
9745 parser->current.end++;
9746 uint8_t byte = peek(parser);
9747
9748 if (pm_char_is_hexadecimal_digit(byte)) {
9749 uint8_t value = escape_hexadecimal_digit(byte);
9750 parser->current.end++;
9751
9752 byte = peek(parser);
9753 if (pm_char_is_hexadecimal_digit(byte)) {
9754 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
9755 parser->current.end++;
9756 }
9757
9758 value = escape_byte(value, flags);
9759 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9760 if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
9761 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
9762 } else {
9763 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9764 }
9765 }
9766
9767 escape_write_byte_encoded(parser, buffer, value);
9768 } else {
9769 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
9770 }
9771
9772 return;
9773 }
9774 case 'u': {
9775 const uint8_t *start = parser->current.end - 1;
9776 parser->current.end++;
9777
9778 if (parser->current.end == parser->end) {
9779 const uint8_t *start = parser->current.end - 2;
9780 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9781 } else if (peek(parser) == '{') {
9782 const uint8_t *unicode_codepoints_start = parser->current.end - 2;
9783 parser->current.end++;
9784
9785 size_t whitespace;
9786 while (true) {
9787 if ((whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
9788 parser->current.end += whitespace;
9789 } else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
9790 // This is super hacky, but it gets us nicer error
9791 // messages because we can still pass it off to the
9792 // regular expression engine even if we hit an
9793 // unterminated regular expression.
9794 parser->current.end += 2;
9795 } else {
9796 break;
9797 }
9798 }
9799
9800 const uint8_t *extra_codepoints_start = NULL;
9801 int codepoints_count = 0;
9802
9803 while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
9804 const uint8_t *unicode_start = parser->current.end;
9805 size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
9806
9807 if (hexadecimal_length > 6) {
9808 // \u{nnnn} character literal allows only 1-6 hexadecimal digits
9809 pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
9810 } else if (hexadecimal_length == 0) {
9811 // there are not hexadecimal characters
9812
9813 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9814 // If this is a regular expression, we are going to
9815 // let the regular expression engine handle this
9816 // error instead of us.
9817 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9818 } else {
9819 pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
9820 pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9821 }
9822
9823 return;
9824 }
9825
9826 parser->current.end += hexadecimal_length;
9827 codepoints_count++;
9828 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
9829 extra_codepoints_start = unicode_start;
9830 }
9831
9832 uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
9833 escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
9834
9835 parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
9836 }
9837
9838 // ?\u{nnnn} character literal should contain only one codepoint
9839 // and cannot be like ?\u{nnnn mmmm}.
9840 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
9841 pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
9842 }
9843
9844 if (parser->current.end == parser->end) {
9845 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
9846 } else if (peek(parser) == '}') {
9847 parser->current.end++;
9848 } else {
9849 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9850 // If this is a regular expression, we are going to let
9851 // the regular expression engine handle this error
9852 // instead of us.
9853 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9854 } else {
9855 pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9856 }
9857 }
9858
9859 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9860 pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
9861 }
9862 } else {
9863 size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
9864
9865 if (length == 0) {
9866 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9867 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9868 } else {
9869 const uint8_t *start = parser->current.end - 2;
9870 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9871 }
9872 } else if (length == 4) {
9873 uint32_t value = escape_unicode(parser, parser->current.end, 4);
9874
9875 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9876 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
9877 }
9878
9879 escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
9880 parser->current.end += 4;
9881 } else {
9882 parser->current.end += length;
9883
9884 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9885 // If this is a regular expression, we are going to let
9886 // the regular expression engine handle this error
9887 // instead of us.
9888 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9889 } else {
9890 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
9891 }
9892 }
9893 }
9894
9895 return;
9896 }
9897 case 'c': {
9898 parser->current.end++;
9899 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9900 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9901 }
9902
9903 if (parser->current.end == parser->end) {
9904 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9905 return;
9906 }
9907
9908 uint8_t peeked = peek(parser);
9909 switch (peeked) {
9910 case '?': {
9911 parser->current.end++;
9912 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9913 return;
9914 }
9915 case '\\':
9916 parser->current.end++;
9917
9918 if (match(parser, 'u') || match(parser, 'U')) {
9919 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9920 return;
9921 }
9922
9923 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9924 return;
9925 case ' ':
9926 parser->current.end++;
9927 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9928 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9929 return;
9930 case '\t':
9931 parser->current.end++;
9932 escape_read_warn(parser, flags, 0, "\\t");
9933 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9934 return;
9935 default: {
9936 if (!char_is_ascii_printable(peeked)) {
9937 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9938 return;
9939 }
9940
9941 parser->current.end++;
9942 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9943 return;
9944 }
9945 }
9946 }
9947 case 'C': {
9948 parser->current.end++;
9949 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9950 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9951 }
9952
9953 if (peek(parser) != '-') {
9954 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9955 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9956 return;
9957 }
9958
9959 parser->current.end++;
9960 if (parser->current.end == parser->end) {
9961 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9962 return;
9963 }
9964
9965 uint8_t peeked = peek(parser);
9966 switch (peeked) {
9967 case '?': {
9968 parser->current.end++;
9969 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9970 return;
9971 }
9972 case '\\':
9973 parser->current.end++;
9974
9975 if (match(parser, 'u') || match(parser, 'U')) {
9976 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9977 return;
9978 }
9979
9980 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9981 return;
9982 case ' ':
9983 parser->current.end++;
9984 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9985 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9986 return;
9987 case '\t':
9988 parser->current.end++;
9989 escape_read_warn(parser, flags, 0, "\\t");
9990 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9991 return;
9992 default: {
9993 if (!char_is_ascii_printable(peeked)) {
9994 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9995 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9996 return;
9997 }
9998
9999 parser->current.end++;
10000 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
10001 return;
10002 }
10003 }
10004 }
10005 case 'M': {
10006 parser->current.end++;
10007 if (flags & PM_ESCAPE_FLAG_META) {
10008 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
10009 }
10010
10011 if (peek(parser) != '-') {
10012 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10013 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10014 return;
10015 }
10016
10017 parser->current.end++;
10018 if (parser->current.end == parser->end) {
10019 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
10020 return;
10021 }
10022
10023 uint8_t peeked = peek(parser);
10024 switch (peeked) {
10025 case '\\':
10026 parser->current.end++;
10027
10028 if (match(parser, 'u') || match(parser, 'U')) {
10029 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
10030 return;
10031 }
10032
10033 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
10034 return;
10035 case ' ':
10036 parser->current.end++;
10037 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_META, "\\s");
10038 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10039 return;
10040 case '\t':
10041 parser->current.end++;
10042 escape_read_warn(parser, flags & ((uint8_t) ~PM_ESCAPE_FLAG_CONTROL), PM_ESCAPE_FLAG_META, "\\t");
10043 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10044 return;
10045 default:
10046 if (!char_is_ascii_printable(peeked)) {
10047 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10048 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10049 return;
10050 }
10051
10052 parser->current.end++;
10053 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10054 return;
10055 }
10056 }
10057 case '\r': {
10058 if (peek_offset(parser, 1) == '\n') {
10059 parser->current.end += 2;
10060 escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
10061 return;
10062 }
10064 }
10065 default: {
10066 if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
10067 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10068 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10069 return;
10070 }
10071 if (parser->current.end < parser->end) {
10072 escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags);
10073 } else {
10074 pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
10075 }
10076 return;
10077 }
10078 }
10079}
10080
10106static pm_token_type_t
10107lex_question_mark(pm_parser_t *parser) {
10108 if (lex_state_end_p(parser)) {
10109 lex_state_set(parser, PM_LEX_STATE_BEG);
10110 return PM_TOKEN_QUESTION_MARK;
10111 }
10112
10113 if (parser->current.end >= parser->end) {
10114 pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
10115 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
10116 return PM_TOKEN_CHARACTER_LITERAL;
10117 }
10118
10119 if (pm_char_is_whitespace(*parser->current.end)) {
10120 lex_state_set(parser, PM_LEX_STATE_BEG);
10121 return PM_TOKEN_QUESTION_MARK;
10122 }
10123
10124 lex_state_set(parser, PM_LEX_STATE_BEG);
10125
10126 if (match(parser, '\\')) {
10127 lex_state_set(parser, PM_LEX_STATE_END);
10128
10129 pm_buffer_t buffer;
10130 pm_buffer_init_capacity(&buffer, 3);
10131
10132 escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
10133 pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
10134
10135 return PM_TOKEN_CHARACTER_LITERAL;
10136 } else {
10137 size_t encoding_width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10138
10139 // Ternary operators can have a ? immediately followed by an identifier
10140 // which starts with an underscore. We check for this case here.
10141 if (
10142 !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
10143 (
10144 (parser->current.end + encoding_width >= parser->end) ||
10145 !char_is_identifier(parser, parser->current.end + encoding_width, parser->end - (parser->current.end + encoding_width))
10146 )
10147 ) {
10148 lex_state_set(parser, PM_LEX_STATE_END);
10149 parser->current.end += encoding_width;
10150 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
10151 return PM_TOKEN_CHARACTER_LITERAL;
10152 }
10153 }
10154
10155 return PM_TOKEN_QUESTION_MARK;
10156}
10157
10162static pm_token_type_t
10163lex_at_variable(pm_parser_t *parser) {
10164 pm_token_type_t type = match(parser, '@') ? PM_TOKEN_CLASS_VARIABLE : PM_TOKEN_INSTANCE_VARIABLE;
10165 const uint8_t *end = parser->end;
10166
10167 size_t width;
10168 if ((width = char_is_identifier_start(parser, parser->current.end, end - parser->current.end)) > 0) {
10169 parser->current.end += width;
10170
10171 while ((width = char_is_identifier(parser, parser->current.end, end - parser->current.end)) > 0) {
10172 parser->current.end += width;
10173 }
10174 } else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) {
10175 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
10176 if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) {
10177 diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
10178 }
10179
10180 size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end);
10181 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
10182 } else {
10183 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
10184 pm_parser_err_token(parser, &parser->current, diag_id);
10185 }
10186
10187 // If we're lexing an embedded variable, then we need to pop back into the
10188 // parent lex context.
10189 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
10190 lex_mode_pop(parser);
10191 }
10192
10193 return type;
10194}
10195
10199static inline void
10200parser_lex_callback(pm_parser_t *parser) {
10201 if (parser->lex_callback) {
10202 parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current);
10203 }
10204}
10205
10209static inline pm_comment_t *
10210parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
10211 pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t));
10212 if (comment == NULL) return NULL;
10213
10214 *comment = (pm_comment_t) {
10215 .type = type,
10216 .location = { parser->current.start, parser->current.end }
10217 };
10218
10219 return comment;
10220}
10221
10227static pm_token_type_t
10228lex_embdoc(pm_parser_t *parser) {
10229 // First, lex out the EMBDOC_BEGIN token.
10230 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10231
10232 if (newline == NULL) {
10233 parser->current.end = parser->end;
10234 } else {
10235 pm_newline_list_append(&parser->newline_list, newline);
10236 parser->current.end = newline + 1;
10237 }
10238
10239 parser->current.type = PM_TOKEN_EMBDOC_BEGIN;
10240 parser_lex_callback(parser);
10241
10242 // Now, create a comment that is going to be attached to the parser.
10243 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
10244 if (comment == NULL) return PM_TOKEN_EOF;
10245
10246 // Now, loop until we find the end of the embedded documentation or the end
10247 // of the file.
10248 while (parser->current.end + 4 <= parser->end) {
10249 parser->current.start = parser->current.end;
10250
10251 // If we've hit the end of the embedded documentation then we'll return
10252 // that token here.
10253 if (
10254 (memcmp(parser->current.end, "=end", 4) == 0) &&
10255 (
10256 (parser->current.end + 4 == parser->end) || // end of file
10257 pm_char_is_whitespace(parser->current.end[4]) || // whitespace
10258 (parser->current.end[4] == '\0') || // NUL or end of script
10259 (parser->current.end[4] == '\004') || // ^D
10260 (parser->current.end[4] == '\032') // ^Z
10261 )
10262 ) {
10263 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10264
10265 if (newline == NULL) {
10266 parser->current.end = parser->end;
10267 } else {
10268 pm_newline_list_append(&parser->newline_list, newline);
10269 parser->current.end = newline + 1;
10270 }
10271
10272 parser->current.type = PM_TOKEN_EMBDOC_END;
10273 parser_lex_callback(parser);
10274
10275 comment->location.end = parser->current.end;
10276 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10277
10278 return PM_TOKEN_EMBDOC_END;
10279 }
10280
10281 // Otherwise, we'll parse until the end of the line and return a line of
10282 // embedded documentation.
10283 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10284
10285 if (newline == NULL) {
10286 parser->current.end = parser->end;
10287 } else {
10288 pm_newline_list_append(&parser->newline_list, newline);
10289 parser->current.end = newline + 1;
10290 }
10291
10292 parser->current.type = PM_TOKEN_EMBDOC_LINE;
10293 parser_lex_callback(parser);
10294 }
10295
10296 pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
10297
10298 comment->location.end = parser->current.end;
10299 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10300
10301 return PM_TOKEN_EOF;
10302}
10303
10309static inline void
10310parser_lex_ignored_newline(pm_parser_t *parser) {
10311 parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
10312 parser_lex_callback(parser);
10313}
10314
10324static inline void
10325parser_flush_heredoc_end(pm_parser_t *parser) {
10326 assert(parser->heredoc_end <= parser->end);
10327 parser->next_start = parser->heredoc_end;
10328 parser->heredoc_end = NULL;
10329}
10330
10334static bool
10335parser_end_of_line_p(const pm_parser_t *parser) {
10336 const uint8_t *cursor = parser->current.end;
10337
10338 while (cursor < parser->end && *cursor != '\n' && *cursor != '#') {
10339 if (!pm_char_is_inline_whitespace(*cursor++)) return false;
10340 }
10341
10342 return true;
10343}
10344
10363typedef struct {
10369
10374 const uint8_t *cursor;
10376
10396
10400static inline void
10401pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
10402 pm_buffer_append_byte(&token_buffer->buffer, byte);
10403}
10404
10405static inline void
10406pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
10407 pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
10408}
10409
10413static inline size_t
10414parser_char_width(const pm_parser_t *parser) {
10415 size_t width;
10416 if (parser->encoding_changed) {
10417 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10418 } else {
10419 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
10420 }
10421
10422 // TODO: If the character is invalid in the given encoding, then we'll just
10423 // push one byte into the buffer. This should actually be an error.
10424 return (width == 0 ? 1 : width);
10425}
10426
10430static void
10431pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
10432 size_t width = parser_char_width(parser);
10433 pm_buffer_append_bytes(&token_buffer->buffer, parser->current.end, width);
10434 parser->current.end += width;
10435}
10436
10437static void
10438pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
10439 size_t width = parser_char_width(parser);
10440 pm_buffer_append_bytes(&token_buffer->base.buffer, parser->current.end, width);
10441 pm_buffer_append_bytes(&token_buffer->regexp_buffer, parser->current.end, width);
10442 parser->current.end += width;
10443}
10444
10445static bool
10446pm_slice_ascii_only_p(const uint8_t *value, size_t length) {
10447 for (size_t index = 0; index < length; index++) {
10448 if (value[index] & 0x80) return false;
10449 }
10450
10451 return true;
10452}
10453
10460static inline void
10461pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10462 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->buffer), pm_buffer_length(&token_buffer->buffer));
10463}
10464
10465static inline void
10466pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10467 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->base.buffer), pm_buffer_length(&token_buffer->base.buffer));
10468 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p((const uint8_t *) pm_buffer_value(&token_buffer->regexp_buffer), pm_buffer_length(&token_buffer->regexp_buffer));
10469 pm_buffer_free(&token_buffer->regexp_buffer);
10470}
10471
10481static void
10482pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10483 if (token_buffer->cursor == NULL) {
10484 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
10485 } else {
10486 pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
10487 pm_token_buffer_copy(parser, token_buffer);
10488 }
10489}
10490
10491static void
10492pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10493 if (token_buffer->base.cursor == NULL) {
10494 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
10495 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p(parser->current.start, (size_t) (parser->current.end - parser->current.start));
10496 } else {
10497 pm_buffer_append_bytes(&token_buffer->base.buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
10498 pm_buffer_append_bytes(&token_buffer->regexp_buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
10499 pm_regexp_token_buffer_copy(parser, token_buffer);
10500 }
10501}
10502
10503#define PM_TOKEN_BUFFER_DEFAULT_SIZE 16
10504
10513static void
10514pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10515 const uint8_t *start;
10516 if (token_buffer->cursor == NULL) {
10517 pm_buffer_init_capacity(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10518 start = parser->current.start;
10519 } else {
10520 start = token_buffer->cursor;
10521 }
10522
10523 const uint8_t *end = parser->current.end - 1;
10524 assert(end >= start);
10525 pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
10526
10527 token_buffer->cursor = end;
10528}
10529
10530static void
10531pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10532 const uint8_t *start;
10533 if (token_buffer->base.cursor == NULL) {
10534 pm_buffer_init_capacity(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10535 pm_buffer_init_capacity(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10536 start = parser->current.start;
10537 } else {
10538 start = token_buffer->base.cursor;
10539 }
10540
10541 const uint8_t *end = parser->current.end - 1;
10542 pm_buffer_append_bytes(&token_buffer->base.buffer, start, (size_t) (end - start));
10543 pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, (size_t) (end - start));
10544
10545 token_buffer->base.cursor = end;
10546}
10547
10548#undef PM_TOKEN_BUFFER_DEFAULT_SIZE
10549
10554static inline size_t
10555pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
10556 size_t whitespace = 0;
10557
10558 switch (indent) {
10559 case PM_HEREDOC_INDENT_NONE:
10560 // Do nothing, we can't match a terminator with
10561 // indentation and there's no need to calculate common
10562 // whitespace.
10563 break;
10564 case PM_HEREDOC_INDENT_DASH:
10565 // Skip past inline whitespace.
10566 *cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
10567 break;
10568 case PM_HEREDOC_INDENT_TILDE:
10569 // Skip past inline whitespace and calculate common
10570 // whitespace.
10571 while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
10572 if (**cursor == '\t') {
10573 whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
10574 } else {
10575 whitespace++;
10576 }
10577 (*cursor)++;
10578 }
10579
10580 break;
10581 }
10582
10583 return whitespace;
10584}
10585
10590static uint8_t
10591pm_lex_percent_delimiter(pm_parser_t *parser) {
10592 size_t eol_length = match_eol(parser);
10593
10594 if (eol_length) {
10595 if (parser->heredoc_end) {
10596 // If we have already lexed a heredoc, then the newline has already
10597 // been added to the list. In this case we want to just flush the
10598 // heredoc end.
10599 parser_flush_heredoc_end(parser);
10600 } else {
10601 // Otherwise, we'll add the newline to the list of newlines.
10602 pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
10603 }
10604
10605 uint8_t delimiter = *parser->current.end;
10606
10607 // If our delimiter is \r\n, we want to treat it as if it's \n.
10608 // For example, %\r\nfoo\r\n should be "foo"
10609 if (eol_length == 2) {
10610 delimiter = *(parser->current.end + 1);
10611 }
10612
10613 parser->current.end += eol_length;
10614 return delimiter;
10615 }
10616
10617 return *parser->current.end++;
10618}
10619
10624#define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
10625
10632static void
10633parser_lex(pm_parser_t *parser) {
10634 assert(parser->current.end <= parser->end);
10635 parser->previous = parser->current;
10636
10637 // This value mirrors cmd_state from CRuby.
10638 bool previous_command_start = parser->command_start;
10639 parser->command_start = false;
10640
10641 // This is used to communicate to the newline lexing function that we've
10642 // already seen a comment.
10643 bool lexed_comment = false;
10644
10645 // Here we cache the current value of the semantic token seen flag. This is
10646 // used to reset it in case we find a token that shouldn't flip this flag.
10647 unsigned int semantic_token_seen = parser->semantic_token_seen;
10648 parser->semantic_token_seen = true;
10649
10650 switch (parser->lex_modes.current->mode) {
10651 case PM_LEX_DEFAULT:
10652 case PM_LEX_EMBEXPR:
10653 case PM_LEX_EMBVAR:
10654
10655 // We have a specific named label here because we are going to jump back to
10656 // this location in the event that we have lexed a token that should not be
10657 // returned to the parser. This includes comments, ignored newlines, and
10658 // invalid tokens of some form.
10659 lex_next_token: {
10660 // If we have the special next_start pointer set, then we're going to jump
10661 // to that location and start lexing from there.
10662 if (parser->next_start != NULL) {
10663 parser->current.end = parser->next_start;
10664 parser->next_start = NULL;
10665 }
10666
10667 // This value mirrors space_seen from CRuby. It tracks whether or not
10668 // space has been eaten before the start of the next token.
10669 bool space_seen = false;
10670
10671 // First, we're going to skip past any whitespace at the front of the next
10672 // token.
10673 bool chomping = true;
10674 while (parser->current.end < parser->end && chomping) {
10675 switch (*parser->current.end) {
10676 case ' ':
10677 case '\t':
10678 case '\f':
10679 case '\v':
10680 parser->current.end++;
10681 space_seen = true;
10682 break;
10683 case '\r':
10684 if (match_eol_offset(parser, 1)) {
10685 chomping = false;
10686 } else {
10687 pm_parser_warn(parser, parser->current.end, parser->current.end + 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
10688 parser->current.end++;
10689 space_seen = true;
10690 }
10691 break;
10692 case '\\': {
10693 size_t eol_length = match_eol_offset(parser, 1);
10694 if (eol_length) {
10695 if (parser->heredoc_end) {
10696 parser->current.end = parser->heredoc_end;
10697 parser->heredoc_end = NULL;
10698 } else {
10699 parser->current.end += eol_length + 1;
10700 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
10701 space_seen = true;
10702 }
10703 } else if (pm_char_is_inline_whitespace(*parser->current.end)) {
10704 parser->current.end += 2;
10705 } else {
10706 chomping = false;
10707 }
10708
10709 break;
10710 }
10711 default:
10712 chomping = false;
10713 break;
10714 }
10715 }
10716
10717 // Next, we'll set to start of this token to be the current end.
10718 parser->current.start = parser->current.end;
10719
10720 // We'll check if we're at the end of the file. If we are, then we
10721 // need to return the EOF token.
10722 if (parser->current.end >= parser->end) {
10723 // If we hit EOF, but the EOF came immediately after a newline,
10724 // set the start of the token to the newline. This way any EOF
10725 // errors will be reported as happening on that line rather than
10726 // a line after. For example "foo(\n" should report an error
10727 // on line 1 even though EOF technically occurs on line 2.
10728 if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
10729 parser->current.start -= 1;
10730 }
10731 LEX(PM_TOKEN_EOF);
10732 }
10733
10734 // Finally, we'll check the current character to determine the next
10735 // token.
10736 switch (*parser->current.end++) {
10737 case '\0': // NUL or end of script
10738 case '\004': // ^D
10739 case '\032': // ^Z
10740 parser->current.end--;
10741 LEX(PM_TOKEN_EOF);
10742
10743 case '#': { // comments
10744 const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
10745 parser->current.end = ending == NULL ? parser->end : ending;
10746
10747 // If we found a comment while lexing, then we're going to
10748 // add it to the list of comments in the file and keep
10749 // lexing.
10750 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_INLINE);
10751 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10752
10753 if (ending) parser->current.end++;
10754 parser->current.type = PM_TOKEN_COMMENT;
10755 parser_lex_callback(parser);
10756
10757 // Here, parse the comment to see if it's a magic comment
10758 // and potentially change state on the parser.
10759 if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
10760 ptrdiff_t length = parser->current.end - parser->current.start;
10761
10762 // If we didn't find a magic comment within the first
10763 // pass and we're at the start of the file, then we need
10764 // to do another pass to potentially find other patterns
10765 // for encoding comments.
10766 if (length >= 10 && !parser->encoding_locked) {
10767 parser_lex_magic_comment_encoding(parser);
10768 }
10769 }
10770
10771 lexed_comment = true;
10772 }
10774 case '\r':
10775 case '\n': {
10776 parser->semantic_token_seen = semantic_token_seen & 0x1;
10777 size_t eol_length = match_eol_at(parser, parser->current.end - 1);
10778
10779 if (eol_length) {
10780 // The only way you can have carriage returns in this
10781 // particular loop is if you have a carriage return
10782 // followed by a newline. In that case we'll just skip
10783 // over the carriage return and continue lexing, in
10784 // order to make it so that the newline token
10785 // encapsulates both the carriage return and the
10786 // newline. Note that we need to check that we haven't
10787 // already lexed a comment here because that falls
10788 // through into here as well.
10789 if (!lexed_comment) {
10790 parser->current.end += eol_length - 1; // skip CR
10791 }
10792
10793 if (parser->heredoc_end == NULL) {
10794 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
10795 }
10796 }
10797
10798 if (parser->heredoc_end) {
10799 parser_flush_heredoc_end(parser);
10800 }
10801
10802 // If this is an ignored newline, then we can continue lexing after
10803 // calling the callback with the ignored newline token.
10804 switch (lex_state_ignored_p(parser)) {
10805 case PM_IGNORED_NEWLINE_NONE:
10806 break;
10807 case PM_IGNORED_NEWLINE_PATTERN:
10808 if (parser->pattern_matching_newlines || parser->in_keyword_arg) {
10809 if (!lexed_comment) parser_lex_ignored_newline(parser);
10810 lex_state_set(parser, PM_LEX_STATE_BEG);
10811 parser->command_start = true;
10812 parser->current.type = PM_TOKEN_NEWLINE;
10813 return;
10814 }
10816 case PM_IGNORED_NEWLINE_ALL:
10817 if (!lexed_comment) parser_lex_ignored_newline(parser);
10818 lexed_comment = false;
10819 goto lex_next_token;
10820 }
10821
10822 // Here we need to look ahead and see if there is a call operator
10823 // (either . or &.) that starts the next line. If there is, then this
10824 // is going to become an ignored newline and we're going to instead
10825 // return the call operator.
10826 const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
10827 next_content += pm_strspn_inline_whitespace(next_content, parser->end - next_content);
10828
10829 if (next_content < parser->end) {
10830 // If we hit a comment after a newline, then we're going to check
10831 // if it's ignored or if it's followed by a method call ('.').
10832 // If it is, then we're going to call the
10833 // callback with an ignored newline and then continue lexing.
10834 // Otherwise we'll return a regular newline.
10835 if (next_content[0] == '#') {
10836 // Here we look for a "." or "&." following a "\n".
10837 const uint8_t *following = next_newline(next_content, parser->end - next_content);
10838
10839 while (following && (following + 1 < parser->end)) {
10840 following++;
10841 following += pm_strspn_inline_whitespace(following, parser->end - following);
10842
10843 // If this is not followed by a comment, then we can break out
10844 // of this loop.
10845 if (peek_at(parser, following) != '#') break;
10846
10847 // If there is a comment, then we need to find the end of the
10848 // comment and continue searching from there.
10849 following = next_newline(following, parser->end - following);
10850 }
10851
10852 // If the lex state was ignored, or we hit a '.' or a '&.',
10853 // we will lex the ignored newline
10854 if (
10855 lex_state_ignored_p(parser) ||
10856 (following && (
10857 (peek_at(parser, following) == '.') ||
10858 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
10859 ))
10860 ) {
10861 if (!lexed_comment) parser_lex_ignored_newline(parser);
10862 lexed_comment = false;
10863 goto lex_next_token;
10864 }
10865 }
10866
10867 // If we hit a . after a newline, then we're in a call chain and
10868 // we need to return the call operator.
10869 if (next_content[0] == '.') {
10870 // To match ripper, we need to emit an ignored newline even though
10871 // it's a real newline in the case that we have a beginless range
10872 // on a subsequent line.
10873 if (peek_at(parser, next_content + 1) == '.') {
10874 if (!lexed_comment) parser_lex_ignored_newline(parser);
10875 lex_state_set(parser, PM_LEX_STATE_BEG);
10876 parser->command_start = true;
10877 parser->current.type = PM_TOKEN_NEWLINE;
10878 return;
10879 }
10880
10881 if (!lexed_comment) parser_lex_ignored_newline(parser);
10882 lex_state_set(parser, PM_LEX_STATE_DOT);
10883 parser->current.start = next_content;
10884 parser->current.end = next_content + 1;
10885 parser->next_start = NULL;
10886 LEX(PM_TOKEN_DOT);
10887 }
10888
10889 // If we hit a &. after a newline, then we're in a call chain and
10890 // we need to return the call operator.
10891 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
10892 if (!lexed_comment) parser_lex_ignored_newline(parser);
10893 lex_state_set(parser, PM_LEX_STATE_DOT);
10894 parser->current.start = next_content;
10895 parser->current.end = next_content + 2;
10896 parser->next_start = NULL;
10897 LEX(PM_TOKEN_AMPERSAND_DOT);
10898 }
10899 }
10900
10901 // At this point we know this is a regular newline, and we can set the
10902 // necessary state and return the token.
10903 lex_state_set(parser, PM_LEX_STATE_BEG);
10904 parser->command_start = true;
10905 parser->current.type = PM_TOKEN_NEWLINE;
10906 if (!lexed_comment) parser_lex_callback(parser);
10907 return;
10908 }
10909
10910 // ,
10911 case ',':
10912 if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
10913 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
10914 }
10915
10916 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10917 LEX(PM_TOKEN_COMMA);
10918
10919 // (
10920 case '(': {
10921 pm_token_type_t type = PM_TOKEN_PARENTHESIS_LEFT;
10922
10923 if (space_seen && (lex_state_arg_p(parser) || parser->lex_state == (PM_LEX_STATE_END | PM_LEX_STATE_LABEL))) {
10924 type = PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES;
10925 }
10926
10927 parser->enclosure_nesting++;
10928 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10929 pm_do_loop_stack_push(parser, false);
10930 LEX(type);
10931 }
10932
10933 // )
10934 case ')':
10935 parser->enclosure_nesting--;
10936 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10937 pm_do_loop_stack_pop(parser);
10938 LEX(PM_TOKEN_PARENTHESIS_RIGHT);
10939
10940 // ;
10941 case ';':
10942 lex_state_set(parser, PM_LEX_STATE_BEG);
10943 parser->command_start = true;
10944 LEX(PM_TOKEN_SEMICOLON);
10945
10946 // [ [] []=
10947 case '[':
10948 parser->enclosure_nesting++;
10949 pm_token_type_t type = PM_TOKEN_BRACKET_LEFT;
10950
10951 if (lex_state_operator_p(parser)) {
10952 if (match(parser, ']')) {
10953 parser->enclosure_nesting--;
10954 lex_state_set(parser, PM_LEX_STATE_ARG);
10955 LEX(match(parser, '=') ? PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL : PM_TOKEN_BRACKET_LEFT_RIGHT);
10956 }
10957
10958 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABEL);
10959 LEX(type);
10960 }
10961
10962 if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, PM_LEX_STATE_LABELED)))) {
10963 type = PM_TOKEN_BRACKET_LEFT_ARRAY;
10964 }
10965
10966 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10967 pm_do_loop_stack_push(parser, false);
10968 LEX(type);
10969
10970 // ]
10971 case ']':
10972 parser->enclosure_nesting--;
10973 lex_state_set(parser, PM_LEX_STATE_END);
10974 pm_do_loop_stack_pop(parser);
10975 LEX(PM_TOKEN_BRACKET_RIGHT);
10976
10977 // {
10978 case '{': {
10979 pm_token_type_t type = PM_TOKEN_BRACE_LEFT;
10980
10981 if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
10982 // This { begins a lambda
10983 parser->command_start = true;
10984 lex_state_set(parser, PM_LEX_STATE_BEG);
10985 type = PM_TOKEN_LAMBDA_BEGIN;
10986 } else if (lex_state_p(parser, PM_LEX_STATE_LABELED)) {
10987 // This { begins a hash literal
10988 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10989 } else if (lex_state_p(parser, PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_END | PM_LEX_STATE_ENDFN)) {
10990 // This { begins a block
10991 parser->command_start = true;
10992 lex_state_set(parser, PM_LEX_STATE_BEG);
10993 } else if (lex_state_p(parser, PM_LEX_STATE_ENDARG)) {
10994 // This { begins a block on a command
10995 parser->command_start = true;
10996 lex_state_set(parser, PM_LEX_STATE_BEG);
10997 } else {
10998 // This { begins a hash literal
10999 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
11000 }
11001
11002 parser->enclosure_nesting++;
11003 parser->brace_nesting++;
11004 pm_do_loop_stack_push(parser, false);
11005
11006 LEX(type);
11007 }
11008
11009 // }
11010 case '}':
11011 parser->enclosure_nesting--;
11012 pm_do_loop_stack_pop(parser);
11013
11014 if ((parser->lex_modes.current->mode == PM_LEX_EMBEXPR) && (parser->brace_nesting == 0)) {
11015 lex_mode_pop(parser);
11016 LEX(PM_TOKEN_EMBEXPR_END);
11017 }
11018
11019 parser->brace_nesting--;
11020 lex_state_set(parser, PM_LEX_STATE_END);
11021 LEX(PM_TOKEN_BRACE_RIGHT);
11022
11023 // * ** **= *=
11024 case '*': {
11025 if (match(parser, '*')) {
11026 if (match(parser, '=')) {
11027 lex_state_set(parser, PM_LEX_STATE_BEG);
11028 LEX(PM_TOKEN_STAR_STAR_EQUAL);
11029 }
11030
11031 pm_token_type_t type = PM_TOKEN_STAR_STAR;
11032
11033 if (lex_state_spcarg_p(parser, space_seen)) {
11034 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR);
11035 type = PM_TOKEN_USTAR_STAR;
11036 } else if (lex_state_beg_p(parser)) {
11037 type = PM_TOKEN_USTAR_STAR;
11038 } else if (ambiguous_operator_p(parser, space_seen)) {
11039 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
11040 }
11041
11042 if (lex_state_operator_p(parser)) {
11043 lex_state_set(parser, PM_LEX_STATE_ARG);
11044 } else {
11045 lex_state_set(parser, PM_LEX_STATE_BEG);
11046 }
11047
11048 LEX(type);
11049 }
11050
11051 if (match(parser, '=')) {
11052 lex_state_set(parser, PM_LEX_STATE_BEG);
11053 LEX(PM_TOKEN_STAR_EQUAL);
11054 }
11055
11056 pm_token_type_t type = PM_TOKEN_STAR;
11057
11058 if (lex_state_spcarg_p(parser, space_seen)) {
11059 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
11060 type = PM_TOKEN_USTAR;
11061 } else if (lex_state_beg_p(parser)) {
11062 type = PM_TOKEN_USTAR;
11063 } else if (ambiguous_operator_p(parser, space_seen)) {
11064 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
11065 }
11066
11067 if (lex_state_operator_p(parser)) {
11068 lex_state_set(parser, PM_LEX_STATE_ARG);
11069 } else {
11070 lex_state_set(parser, PM_LEX_STATE_BEG);
11071 }
11072
11073 LEX(type);
11074 }
11075
11076 // ! != !~ !@
11077 case '!':
11078 if (lex_state_operator_p(parser)) {
11079 lex_state_set(parser, PM_LEX_STATE_ARG);
11080 if (match(parser, '@')) {
11081 LEX(PM_TOKEN_BANG);
11082 }
11083 } else {
11084 lex_state_set(parser, PM_LEX_STATE_BEG);
11085 }
11086
11087 if (match(parser, '=')) {
11088 LEX(PM_TOKEN_BANG_EQUAL);
11089 }
11090
11091 if (match(parser, '~')) {
11092 LEX(PM_TOKEN_BANG_TILDE);
11093 }
11094
11095 LEX(PM_TOKEN_BANG);
11096
11097 // = => =~ == === =begin
11098 case '=':
11099 if (
11100 current_token_starts_line(parser) &&
11101 (parser->current.end + 5 <= parser->end) &&
11102 memcmp(parser->current.end, "begin", 5) == 0 &&
11103 (pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) == '\0'))
11104 ) {
11105 pm_token_type_t type = lex_embdoc(parser);
11106 if (type == PM_TOKEN_EOF) {
11107 LEX(type);
11108 }
11109
11110 goto lex_next_token;
11111 }
11112
11113 if (lex_state_operator_p(parser)) {
11114 lex_state_set(parser, PM_LEX_STATE_ARG);
11115 } else {
11116 lex_state_set(parser, PM_LEX_STATE_BEG);
11117 }
11118
11119 if (match(parser, '>')) {
11120 LEX(PM_TOKEN_EQUAL_GREATER);
11121 }
11122
11123 if (match(parser, '~')) {
11124 LEX(PM_TOKEN_EQUAL_TILDE);
11125 }
11126
11127 if (match(parser, '=')) {
11128 LEX(match(parser, '=') ? PM_TOKEN_EQUAL_EQUAL_EQUAL : PM_TOKEN_EQUAL_EQUAL);
11129 }
11130
11131 LEX(PM_TOKEN_EQUAL);
11132
11133 // < << <<= <= <=>
11134 case '<':
11135 if (match(parser, '<')) {
11136 if (
11137 !lex_state_p(parser, PM_LEX_STATE_DOT | PM_LEX_STATE_CLASS) &&
11138 !lex_state_end_p(parser) &&
11139 (!lex_state_p(parser, PM_LEX_STATE_ARG_ANY) || lex_state_p(parser, PM_LEX_STATE_LABELED) || space_seen)
11140 ) {
11141 const uint8_t *end = parser->current.end;
11142
11143 pm_heredoc_quote_t quote = PM_HEREDOC_QUOTE_NONE;
11144 pm_heredoc_indent_t indent = PM_HEREDOC_INDENT_NONE;
11145
11146 if (match(parser, '-')) {
11147 indent = PM_HEREDOC_INDENT_DASH;
11148 }
11149 else if (match(parser, '~')) {
11150 indent = PM_HEREDOC_INDENT_TILDE;
11151 }
11152
11153 if (match(parser, '`')) {
11154 quote = PM_HEREDOC_QUOTE_BACKTICK;
11155 }
11156 else if (match(parser, '"')) {
11157 quote = PM_HEREDOC_QUOTE_DOUBLE;
11158 }
11159 else if (match(parser, '\'')) {
11160 quote = PM_HEREDOC_QUOTE_SINGLE;
11161 }
11162
11163 const uint8_t *ident_start = parser->current.end;
11164 size_t width = 0;
11165
11166 if (parser->current.end >= parser->end) {
11167 parser->current.end = end;
11168 } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) == 0) {
11169 parser->current.end = end;
11170 } else {
11171 if (quote == PM_HEREDOC_QUOTE_NONE) {
11172 parser->current.end += width;
11173
11174 while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end))) {
11175 parser->current.end += width;
11176 }
11177 } else {
11178 // If we have quotes, then we're going to go until we find the
11179 // end quote.
11180 while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
11181 if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
11182 parser->current.end++;
11183 }
11184 }
11185
11186 size_t ident_length = (size_t) (parser->current.end - ident_start);
11187 bool ident_error = false;
11188
11189 if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
11190 pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
11191 ident_error = true;
11192 }
11193
11194 parser->explicit_encoding = NULL;
11195 lex_mode_push(parser, (pm_lex_mode_t) {
11196 .mode = PM_LEX_HEREDOC,
11197 .as.heredoc = {
11198 .base = {
11199 .ident_start = ident_start,
11200 .ident_length = ident_length,
11201 .quote = quote,
11202 .indent = indent
11203 },
11204 .next_start = parser->current.end,
11205 .common_whitespace = NULL,
11206 .line_continuation = false
11207 }
11208 });
11209
11210 if (parser->heredoc_end == NULL) {
11211 const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
11212
11213 if (body_start == NULL) {
11214 // If there is no newline after the heredoc identifier, then
11215 // this is not a valid heredoc declaration. In this case we
11216 // will add an error, but we will still return a heredoc
11217 // start.
11218 if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
11219 body_start = parser->end;
11220 } else {
11221 // Otherwise, we want to indicate that the body of the
11222 // heredoc starts on the character after the next newline.
11223 pm_newline_list_append(&parser->newline_list, body_start);
11224 body_start++;
11225 }
11226
11227 parser->next_start = body_start;
11228 } else {
11229 parser->next_start = parser->heredoc_end;
11230 }
11231
11232 LEX(PM_TOKEN_HEREDOC_START);
11233 }
11234 }
11235
11236 if (match(parser, '=')) {
11237 lex_state_set(parser, PM_LEX_STATE_BEG);
11238 LEX(PM_TOKEN_LESS_LESS_EQUAL);
11239 }
11240
11241 if (ambiguous_operator_p(parser, space_seen)) {
11242 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
11243 }
11244
11245 if (lex_state_operator_p(parser)) {
11246 lex_state_set(parser, PM_LEX_STATE_ARG);
11247 } else {
11248 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
11249 lex_state_set(parser, PM_LEX_STATE_BEG);
11250 }
11251
11252 LEX(PM_TOKEN_LESS_LESS);
11253 }
11254
11255 if (lex_state_operator_p(parser)) {
11256 lex_state_set(parser, PM_LEX_STATE_ARG);
11257 } else {
11258 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
11259 lex_state_set(parser, PM_LEX_STATE_BEG);
11260 }
11261
11262 if (match(parser, '=')) {
11263 if (match(parser, '>')) {
11264 LEX(PM_TOKEN_LESS_EQUAL_GREATER);
11265 }
11266
11267 LEX(PM_TOKEN_LESS_EQUAL);
11268 }
11269
11270 LEX(PM_TOKEN_LESS);
11271
11272 // > >> >>= >=
11273 case '>':
11274 if (match(parser, '>')) {
11275 if (lex_state_operator_p(parser)) {
11276 lex_state_set(parser, PM_LEX_STATE_ARG);
11277 } else {
11278 lex_state_set(parser, PM_LEX_STATE_BEG);
11279 }
11280 LEX(match(parser, '=') ? PM_TOKEN_GREATER_GREATER_EQUAL : PM_TOKEN_GREATER_GREATER);
11281 }
11282
11283 if (lex_state_operator_p(parser)) {
11284 lex_state_set(parser, PM_LEX_STATE_ARG);
11285 } else {
11286 lex_state_set(parser, PM_LEX_STATE_BEG);
11287 }
11288
11289 LEX(match(parser, '=') ? PM_TOKEN_GREATER_EQUAL : PM_TOKEN_GREATER);
11290
11291 // double-quoted string literal
11292 case '"': {
11293 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
11294 lex_mode_push_string(parser, true, label_allowed, '\0', '"');
11295 LEX(PM_TOKEN_STRING_BEGIN);
11296 }
11297
11298 // xstring literal
11299 case '`': {
11300 if (lex_state_p(parser, PM_LEX_STATE_FNAME)) {
11301 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11302 LEX(PM_TOKEN_BACKTICK);
11303 }
11304
11305 if (lex_state_p(parser, PM_LEX_STATE_DOT)) {
11306 if (previous_command_start) {
11307 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11308 } else {
11309 lex_state_set(parser, PM_LEX_STATE_ARG);
11310 }
11311
11312 LEX(PM_TOKEN_BACKTICK);
11313 }
11314
11315 lex_mode_push_string(parser, true, false, '\0', '`');
11316 LEX(PM_TOKEN_BACKTICK);
11317 }
11318
11319 // single-quoted string literal
11320 case '\'': {
11321 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
11322 lex_mode_push_string(parser, false, label_allowed, '\0', '\'');
11323 LEX(PM_TOKEN_STRING_BEGIN);
11324 }
11325
11326 // ? character literal
11327 case '?':
11328 LEX(lex_question_mark(parser));
11329
11330 // & && &&= &=
11331 case '&': {
11332 if (match(parser, '&')) {
11333 lex_state_set(parser, PM_LEX_STATE_BEG);
11334
11335 if (match(parser, '=')) {
11336 LEX(PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
11337 }
11338
11339 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
11340 }
11341
11342 if (match(parser, '=')) {
11343 lex_state_set(parser, PM_LEX_STATE_BEG);
11344 LEX(PM_TOKEN_AMPERSAND_EQUAL);
11345 }
11346
11347 if (match(parser, '.')) {
11348 lex_state_set(parser, PM_LEX_STATE_DOT);
11349 LEX(PM_TOKEN_AMPERSAND_DOT);
11350 }
11351
11352 pm_token_type_t type = PM_TOKEN_AMPERSAND;
11353 if (lex_state_spcarg_p(parser, space_seen)) {
11354 if ((peek(parser) != ':') || (peek_offset(parser, 1) == '\0')) {
11355 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
11356 } else {
11357 const uint8_t delim = peek_offset(parser, 1);
11358
11359 if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1, parser->end - (parser->current.end + 1))) {
11360 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
11361 }
11362 }
11363
11364 type = PM_TOKEN_UAMPERSAND;
11365 } else if (lex_state_beg_p(parser)) {
11366 type = PM_TOKEN_UAMPERSAND;
11367 } else if (ambiguous_operator_p(parser, space_seen)) {
11368 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
11369 }
11370
11371 if (lex_state_operator_p(parser)) {
11372 lex_state_set(parser, PM_LEX_STATE_ARG);
11373 } else {
11374 lex_state_set(parser, PM_LEX_STATE_BEG);
11375 }
11376
11377 LEX(type);
11378 }
11379
11380 // | || ||= |=
11381 case '|':
11382 if (match(parser, '|')) {
11383 if (match(parser, '=')) {
11384 lex_state_set(parser, PM_LEX_STATE_BEG);
11385 LEX(PM_TOKEN_PIPE_PIPE_EQUAL);
11386 }
11387
11388 if (lex_state_p(parser, PM_LEX_STATE_BEG)) {
11389 parser->current.end--;
11390 LEX(PM_TOKEN_PIPE);
11391 }
11392
11393 lex_state_set(parser, PM_LEX_STATE_BEG);
11394 LEX(PM_TOKEN_PIPE_PIPE);
11395 }
11396
11397 if (match(parser, '=')) {
11398 lex_state_set(parser, PM_LEX_STATE_BEG);
11399 LEX(PM_TOKEN_PIPE_EQUAL);
11400 }
11401
11402 if (lex_state_operator_p(parser)) {
11403 lex_state_set(parser, PM_LEX_STATE_ARG);
11404 } else {
11405 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
11406 }
11407
11408 LEX(PM_TOKEN_PIPE);
11409
11410 // + += +@
11411 case '+': {
11412 if (lex_state_operator_p(parser)) {
11413 lex_state_set(parser, PM_LEX_STATE_ARG);
11414
11415 if (match(parser, '@')) {
11416 LEX(PM_TOKEN_UPLUS);
11417 }
11418
11419 LEX(PM_TOKEN_PLUS);
11420 }
11421
11422 if (match(parser, '=')) {
11423 lex_state_set(parser, PM_LEX_STATE_BEG);
11424 LEX(PM_TOKEN_PLUS_EQUAL);
11425 }
11426
11427 if (
11428 lex_state_beg_p(parser) ||
11429 (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
11430 ) {
11431 lex_state_set(parser, PM_LEX_STATE_BEG);
11432
11433 if (pm_char_is_decimal_digit(peek(parser))) {
11434 parser->current.end++;
11435 pm_token_type_t type = lex_numeric(parser);
11436 lex_state_set(parser, PM_LEX_STATE_END);
11437 LEX(type);
11438 }
11439
11440 LEX(PM_TOKEN_UPLUS);
11441 }
11442
11443 if (ambiguous_operator_p(parser, space_seen)) {
11444 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
11445 }
11446
11447 lex_state_set(parser, PM_LEX_STATE_BEG);
11448 LEX(PM_TOKEN_PLUS);
11449 }
11450
11451 // - -= -@
11452 case '-': {
11453 if (lex_state_operator_p(parser)) {
11454 lex_state_set(parser, PM_LEX_STATE_ARG);
11455
11456 if (match(parser, '@')) {
11457 LEX(PM_TOKEN_UMINUS);
11458 }
11459
11460 LEX(PM_TOKEN_MINUS);
11461 }
11462
11463 if (match(parser, '=')) {
11464 lex_state_set(parser, PM_LEX_STATE_BEG);
11465 LEX(PM_TOKEN_MINUS_EQUAL);
11466 }
11467
11468 if (match(parser, '>')) {
11469 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11470 LEX(PM_TOKEN_MINUS_GREATER);
11471 }
11472
11473 bool spcarg = lex_state_spcarg_p(parser, space_seen);
11474 bool is_beg = lex_state_beg_p(parser);
11475 if (!is_beg && spcarg) {
11476 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
11477 }
11478
11479 if (is_beg || spcarg) {
11480 lex_state_set(parser, PM_LEX_STATE_BEG);
11481 LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
11482 }
11483
11484 if (ambiguous_operator_p(parser, space_seen)) {
11485 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
11486 }
11487
11488 lex_state_set(parser, PM_LEX_STATE_BEG);
11489 LEX(PM_TOKEN_MINUS);
11490 }
11491
11492 // . .. ...
11493 case '.': {
11494 bool beg_p = lex_state_beg_p(parser);
11495
11496 if (match(parser, '.')) {
11497 if (match(parser, '.')) {
11498 // If we're _not_ inside a range within default parameters
11499 if (!context_p(parser, PM_CONTEXT_DEFAULT_PARAMS) && context_p(parser, PM_CONTEXT_DEF_PARAMS)) {
11500 if (lex_state_p(parser, PM_LEX_STATE_END)) {
11501 lex_state_set(parser, PM_LEX_STATE_BEG);
11502 } else {
11503 lex_state_set(parser, PM_LEX_STATE_ENDARG);
11504 }
11505 LEX(PM_TOKEN_UDOT_DOT_DOT);
11506 }
11507
11508 if (parser->enclosure_nesting == 0 && parser_end_of_line_p(parser)) {
11509 pm_parser_warn_token(parser, &parser->current, PM_WARN_DOT_DOT_DOT_EOL);
11510 }
11511
11512 lex_state_set(parser, PM_LEX_STATE_BEG);
11513 LEX(beg_p ? PM_TOKEN_UDOT_DOT_DOT : PM_TOKEN_DOT_DOT_DOT);
11514 }
11515
11516 lex_state_set(parser, PM_LEX_STATE_BEG);
11517 LEX(beg_p ? PM_TOKEN_UDOT_DOT : PM_TOKEN_DOT_DOT);
11518 }
11519
11520 lex_state_set(parser, PM_LEX_STATE_DOT);
11521 LEX(PM_TOKEN_DOT);
11522 }
11523
11524 // integer
11525 case '0':
11526 case '1':
11527 case '2':
11528 case '3':
11529 case '4':
11530 case '5':
11531 case '6':
11532 case '7':
11533 case '8':
11534 case '9': {
11535 pm_token_type_t type = lex_numeric(parser);
11536 lex_state_set(parser, PM_LEX_STATE_END);
11537 LEX(type);
11538 }
11539
11540 // :: symbol
11541 case ':':
11542 if (match(parser, ':')) {
11543 if (lex_state_beg_p(parser) || lex_state_p(parser, PM_LEX_STATE_CLASS) || (lex_state_p(parser, PM_LEX_STATE_ARG_ANY) && space_seen)) {
11544 lex_state_set(parser, PM_LEX_STATE_BEG);
11545 LEX(PM_TOKEN_UCOLON_COLON);
11546 }
11547
11548 lex_state_set(parser, PM_LEX_STATE_DOT);
11549 LEX(PM_TOKEN_COLON_COLON);
11550 }
11551
11552 if (lex_state_end_p(parser) || pm_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
11553 lex_state_set(parser, PM_LEX_STATE_BEG);
11554 LEX(PM_TOKEN_COLON);
11555 }
11556
11557 if (peek(parser) == '"' || peek(parser) == '\'') {
11558 lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
11559 parser->current.end++;
11560 }
11561
11562 lex_state_set(parser, PM_LEX_STATE_FNAME);
11563 LEX(PM_TOKEN_SYMBOL_BEGIN);
11564
11565 // / /=
11566 case '/':
11567 if (lex_state_beg_p(parser)) {
11568 lex_mode_push_regexp(parser, '\0', '/');
11569 LEX(PM_TOKEN_REGEXP_BEGIN);
11570 }
11571
11572 if (match(parser, '=')) {
11573 lex_state_set(parser, PM_LEX_STATE_BEG);
11574 LEX(PM_TOKEN_SLASH_EQUAL);
11575 }
11576
11577 if (lex_state_spcarg_p(parser, space_seen)) {
11578 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
11579 lex_mode_push_regexp(parser, '\0', '/');
11580 LEX(PM_TOKEN_REGEXP_BEGIN);
11581 }
11582
11583 if (ambiguous_operator_p(parser, space_seen)) {
11584 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
11585 }
11586
11587 if (lex_state_operator_p(parser)) {
11588 lex_state_set(parser, PM_LEX_STATE_ARG);
11589 } else {
11590 lex_state_set(parser, PM_LEX_STATE_BEG);
11591 }
11592
11593 LEX(PM_TOKEN_SLASH);
11594
11595 // ^ ^=
11596 case '^':
11597 if (lex_state_operator_p(parser)) {
11598 lex_state_set(parser, PM_LEX_STATE_ARG);
11599 } else {
11600 lex_state_set(parser, PM_LEX_STATE_BEG);
11601 }
11602 LEX(match(parser, '=') ? PM_TOKEN_CARET_EQUAL : PM_TOKEN_CARET);
11603
11604 // ~ ~@
11605 case '~':
11606 if (lex_state_operator_p(parser)) {
11607 (void) match(parser, '@');
11608 lex_state_set(parser, PM_LEX_STATE_ARG);
11609 } else {
11610 lex_state_set(parser, PM_LEX_STATE_BEG);
11611 }
11612
11613 LEX(PM_TOKEN_TILDE);
11614
11615 // % %= %i %I %q %Q %w %W
11616 case '%': {
11617 // If there is no subsequent character then we have an
11618 // invalid token. We're going to say it's the percent
11619 // operator because we don't want to move into the string
11620 // lex mode unnecessarily.
11621 if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
11622 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
11623 LEX(PM_TOKEN_PERCENT);
11624 }
11625
11626 if (!lex_state_beg_p(parser) && match(parser, '=')) {
11627 lex_state_set(parser, PM_LEX_STATE_BEG);
11628 LEX(PM_TOKEN_PERCENT_EQUAL);
11629 } else if (
11630 lex_state_beg_p(parser) ||
11631 (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
11632 lex_state_spcarg_p(parser, space_seen)
11633 ) {
11634 if (!parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end)) {
11635 if (*parser->current.end >= 0x80) {
11636 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11637 }
11638
11639 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11640 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11641 LEX(PM_TOKEN_STRING_BEGIN);
11642 }
11643
11644 // Delimiters for %-literals cannot be alphanumeric. We
11645 // validate that here.
11646 uint8_t delimiter = peek_offset(parser, 1);
11647 if (delimiter >= 0x80 || parser->encoding->alnum_char(&delimiter, 1)) {
11648 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11649 goto lex_next_token;
11650 }
11651
11652 switch (peek(parser)) {
11653 case 'i': {
11654 parser->current.end++;
11655
11656 if (parser->current.end < parser->end) {
11657 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11658 } else {
11659 lex_mode_push_list_eof(parser);
11660 }
11661
11662 LEX(PM_TOKEN_PERCENT_LOWER_I);
11663 }
11664 case 'I': {
11665 parser->current.end++;
11666
11667 if (parser->current.end < parser->end) {
11668 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11669 } else {
11670 lex_mode_push_list_eof(parser);
11671 }
11672
11673 LEX(PM_TOKEN_PERCENT_UPPER_I);
11674 }
11675 case 'r': {
11676 parser->current.end++;
11677
11678 if (parser->current.end < parser->end) {
11679 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11680 lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11681 } else {
11682 lex_mode_push_regexp(parser, '\0', '\0');
11683 }
11684
11685 LEX(PM_TOKEN_REGEXP_BEGIN);
11686 }
11687 case 'q': {
11688 parser->current.end++;
11689
11690 if (parser->current.end < parser->end) {
11691 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11692 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11693 } else {
11694 lex_mode_push_string_eof(parser);
11695 }
11696
11697 LEX(PM_TOKEN_STRING_BEGIN);
11698 }
11699 case 'Q': {
11700 parser->current.end++;
11701
11702 if (parser->current.end < parser->end) {
11703 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11704 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11705 } else {
11706 lex_mode_push_string_eof(parser);
11707 }
11708
11709 LEX(PM_TOKEN_STRING_BEGIN);
11710 }
11711 case 's': {
11712 parser->current.end++;
11713
11714 if (parser->current.end < parser->end) {
11715 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11716 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11717 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
11718 } else {
11719 lex_mode_push_string_eof(parser);
11720 }
11721
11722 LEX(PM_TOKEN_SYMBOL_BEGIN);
11723 }
11724 case 'w': {
11725 parser->current.end++;
11726
11727 if (parser->current.end < parser->end) {
11728 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11729 } else {
11730 lex_mode_push_list_eof(parser);
11731 }
11732
11733 LEX(PM_TOKEN_PERCENT_LOWER_W);
11734 }
11735 case 'W': {
11736 parser->current.end++;
11737
11738 if (parser->current.end < parser->end) {
11739 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11740 } else {
11741 lex_mode_push_list_eof(parser);
11742 }
11743
11744 LEX(PM_TOKEN_PERCENT_UPPER_W);
11745 }
11746 case 'x': {
11747 parser->current.end++;
11748
11749 if (parser->current.end < parser->end) {
11750 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11751 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11752 } else {
11753 lex_mode_push_string_eof(parser);
11754 }
11755
11756 LEX(PM_TOKEN_PERCENT_LOWER_X);
11757 }
11758 default:
11759 // If we get to this point, then we have a % that is completely
11760 // unparsable. In this case we'll just drop it from the parser
11761 // and skip past it and hope that the next token is something
11762 // that we can parse.
11763 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11764 goto lex_next_token;
11765 }
11766 }
11767
11768 if (ambiguous_operator_p(parser, space_seen)) {
11769 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
11770 }
11771
11772 lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
11773 LEX(PM_TOKEN_PERCENT);
11774 }
11775
11776 // global variable
11777 case '$': {
11778 pm_token_type_t type = lex_global_variable(parser);
11779
11780 // If we're lexing an embedded variable, then we need to pop back into
11781 // the parent lex context.
11782 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
11783 lex_mode_pop(parser);
11784 }
11785
11786 lex_state_set(parser, PM_LEX_STATE_END);
11787 LEX(type);
11788 }
11789
11790 // instance variable, class variable
11791 case '@':
11792 lex_state_set(parser, parser->lex_state & PM_LEX_STATE_FNAME ? PM_LEX_STATE_ENDFN : PM_LEX_STATE_END);
11793 LEX(lex_at_variable(parser));
11794
11795 default: {
11796 if (*parser->current.start != '_') {
11797 size_t width = char_is_identifier_start(parser, parser->current.start, parser->end - parser->current.start);
11798
11799 // If this isn't the beginning of an identifier, then
11800 // it's an invalid token as we've exhausted all of the
11801 // other options. We'll skip past it and return the next
11802 // token after adding an appropriate error message.
11803 if (!width) {
11804 if (*parser->current.start >= 0x80) {
11805 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
11806 } else if (*parser->current.start == '\\') {
11807 switch (peek_at(parser, parser->current.start + 1)) {
11808 case ' ':
11809 parser->current.end++;
11810 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
11811 break;
11812 case '\f':
11813 parser->current.end++;
11814 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
11815 break;
11816 case '\t':
11817 parser->current.end++;
11818 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
11819 break;
11820 case '\v':
11821 parser->current.end++;
11822 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
11823 break;
11824 case '\r':
11825 if (peek_at(parser, parser->current.start + 2) != '\n') {
11826 parser->current.end++;
11827 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
11828 break;
11829 }
11831 default:
11832 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
11833 break;
11834 }
11835 } else if (char_is_ascii_printable(*parser->current.start)) {
11836 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
11837 } else {
11838 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
11839 }
11840
11841 goto lex_next_token;
11842 }
11843
11844 parser->current.end = parser->current.start + width;
11845 }
11846
11847 pm_token_type_t type = lex_identifier(parser, previous_command_start);
11848
11849 // If we've hit a __END__ and it was at the start of the
11850 // line or the start of the file and it is followed by
11851 // either a \n or a \r\n, then this is the last token of the
11852 // file.
11853 if (
11854 ((parser->current.end - parser->current.start) == 7) &&
11855 current_token_starts_line(parser) &&
11856 (memcmp(parser->current.start, "__END__", 7) == 0) &&
11857 (parser->current.end == parser->end || match_eol(parser))
11858 ) {
11859 // Since we know we're about to add an __END__ comment,
11860 // we know we need to add all of the newlines to get the
11861 // correct column information for it.
11862 const uint8_t *cursor = parser->current.end;
11863 while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
11864 pm_newline_list_append(&parser->newline_list, cursor++);
11865 }
11866
11867 parser->current.end = parser->end;
11868 parser->current.type = PM_TOKEN___END__;
11869 parser_lex_callback(parser);
11870
11871 parser->data_loc.start = parser->current.start;
11872 parser->data_loc.end = parser->current.end;
11873
11874 LEX(PM_TOKEN_EOF);
11875 }
11876
11877 pm_lex_state_t last_state = parser->lex_state;
11878
11879 if (type == PM_TOKEN_IDENTIFIER || type == PM_TOKEN_CONSTANT || type == PM_TOKEN_METHOD_NAME) {
11880 if (lex_state_p(parser, PM_LEX_STATE_BEG_ANY | PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_DOT)) {
11881 if (previous_command_start) {
11882 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11883 } else {
11884 lex_state_set(parser, PM_LEX_STATE_ARG);
11885 }
11886 } else if (parser->lex_state == PM_LEX_STATE_FNAME) {
11887 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11888 } else {
11889 lex_state_set(parser, PM_LEX_STATE_END);
11890 }
11891 }
11892
11893 if (
11894 !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
11895 (type == PM_TOKEN_IDENTIFIER) &&
11896 ((pm_parser_local_depth(parser, &parser->current) != -1) ||
11897 pm_token_is_numbered_parameter(parser->current.start, parser->current.end))
11898 ) {
11899 lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
11900 }
11901
11902 LEX(type);
11903 }
11904 }
11905 }
11906 case PM_LEX_LIST: {
11907 if (parser->next_start != NULL) {
11908 parser->current.end = parser->next_start;
11909 parser->next_start = NULL;
11910 }
11911
11912 // First we'll set the beginning of the token.
11913 parser->current.start = parser->current.end;
11914
11915 // If there's any whitespace at the start of the list, then we're
11916 // going to trim it off the beginning and create a new token.
11917 size_t whitespace;
11918
11919 if (parser->heredoc_end) {
11920 whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
11921 if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') {
11922 whitespace += 1;
11923 }
11924 } else {
11925 whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list);
11926 }
11927
11928 if (whitespace > 0) {
11929 parser->current.end += whitespace;
11930 if (peek_offset(parser, -1) == '\n') {
11931 // mutates next_start
11932 parser_flush_heredoc_end(parser);
11933 }
11934 LEX(PM_TOKEN_WORDS_SEP);
11935 }
11936
11937 // We'll check if we're at the end of the file. If we are, then we
11938 // need to return the EOF token.
11939 if (parser->current.end >= parser->end) {
11940 LEX(PM_TOKEN_EOF);
11941 }
11942
11943 // Here we'll get a list of the places where strpbrk should break,
11944 // and then find the first one.
11945 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11946 const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
11947 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11948
11949 // If we haven't found an escape yet, then this buffer will be
11950 // unallocated since we can refer directly to the source string.
11951 pm_token_buffer_t token_buffer = { 0 };
11952
11953 while (breakpoint != NULL) {
11954 // If we hit whitespace, then we must have received content by
11955 // now, so we can return an element of the list.
11956 if (pm_char_is_whitespace(*breakpoint)) {
11957 parser->current.end = breakpoint;
11958 pm_token_buffer_flush(parser, &token_buffer);
11959 LEX(PM_TOKEN_STRING_CONTENT);
11960 }
11961
11962 // If we hit the terminator, we need to check which token to
11963 // return.
11964 if (*breakpoint == lex_mode->as.list.terminator) {
11965 // If this terminator doesn't actually close the list, then
11966 // we need to continue on past it.
11967 if (lex_mode->as.list.nesting > 0) {
11968 parser->current.end = breakpoint + 1;
11969 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11970 lex_mode->as.list.nesting--;
11971 continue;
11972 }
11973
11974 // If we've hit the terminator and we've already skipped
11975 // past content, then we can return a list node.
11976 if (breakpoint > parser->current.start) {
11977 parser->current.end = breakpoint;
11978 pm_token_buffer_flush(parser, &token_buffer);
11979 LEX(PM_TOKEN_STRING_CONTENT);
11980 }
11981
11982 // Otherwise, switch back to the default state and return
11983 // the end of the list.
11984 parser->current.end = breakpoint + 1;
11985 lex_mode_pop(parser);
11986 lex_state_set(parser, PM_LEX_STATE_END);
11987 LEX(PM_TOKEN_STRING_END);
11988 }
11989
11990 // If we hit a null byte, skip directly past it.
11991 if (*breakpoint == '\0') {
11992 breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
11993 continue;
11994 }
11995
11996 // If we hit escapes, then we need to treat the next token
11997 // literally. In this case we'll skip past the next character
11998 // and find the next breakpoint.
11999 if (*breakpoint == '\\') {
12000 parser->current.end = breakpoint + 1;
12001
12002 // If we've hit the end of the file, then break out of the
12003 // loop by setting the breakpoint to NULL.
12004 if (parser->current.end == parser->end) {
12005 breakpoint = NULL;
12006 continue;
12007 }
12008
12009 pm_token_buffer_escape(parser, &token_buffer);
12010 uint8_t peeked = peek(parser);
12011
12012 switch (peeked) {
12013 case ' ':
12014 case '\f':
12015 case '\t':
12016 case '\v':
12017 case '\\':
12018 pm_token_buffer_push_byte(&token_buffer, peeked);
12019 parser->current.end++;
12020 break;
12021 case '\r':
12022 parser->current.end++;
12023 if (peek(parser) != '\n') {
12024 pm_token_buffer_push_byte(&token_buffer, '\r');
12025 break;
12026 }
12028 case '\n':
12029 pm_token_buffer_push_byte(&token_buffer, '\n');
12030
12031 if (parser->heredoc_end) {
12032 // ... if we are on the same line as a heredoc,
12033 // flush the heredoc and continue parsing after
12034 // heredoc_end.
12035 parser_flush_heredoc_end(parser);
12036 pm_token_buffer_copy(parser, &token_buffer);
12037 LEX(PM_TOKEN_STRING_CONTENT);
12038 } else {
12039 // ... else track the newline.
12040 pm_newline_list_append(&parser->newline_list, parser->current.end);
12041 }
12042
12043 parser->current.end++;
12044 break;
12045 default:
12046 if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
12047 pm_token_buffer_push_byte(&token_buffer, peeked);
12048 parser->current.end++;
12049 } else if (lex_mode->as.list.interpolation) {
12050 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12051 } else {
12052 pm_token_buffer_push_byte(&token_buffer, '\\');
12053 pm_token_buffer_push_escaped(&token_buffer, parser);
12054 }
12055
12056 break;
12057 }
12058
12059 token_buffer.cursor = parser->current.end;
12060 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12061 continue;
12062 }
12063
12064 // If we hit a #, then we will attempt to lex interpolation.
12065 if (*breakpoint == '#') {
12066 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12067
12068 if (type == PM_TOKEN_NOT_PROVIDED) {
12069 // If we haven't returned at this point then we had something
12070 // that looked like an interpolated class or instance variable
12071 // like "#@" but wasn't actually. In this case we'll just skip
12072 // to the next breakpoint.
12073 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12074 continue;
12075 }
12076
12077 if (type == PM_TOKEN_STRING_CONTENT) {
12078 pm_token_buffer_flush(parser, &token_buffer);
12079 }
12080
12081 LEX(type);
12082 }
12083
12084 // If we've hit the incrementor, then we need to skip past it
12085 // and find the next breakpoint.
12086 assert(*breakpoint == lex_mode->as.list.incrementor);
12087 parser->current.end = breakpoint + 1;
12088 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12089 lex_mode->as.list.nesting++;
12090 continue;
12091 }
12092
12093 if (parser->current.end > parser->current.start) {
12094 pm_token_buffer_flush(parser, &token_buffer);
12095 LEX(PM_TOKEN_STRING_CONTENT);
12096 }
12097
12098 // If we were unable to find a breakpoint, then this token hits the
12099 // end of the file.
12100 parser->current.end = parser->end;
12101 pm_token_buffer_flush(parser, &token_buffer);
12102 LEX(PM_TOKEN_STRING_CONTENT);
12103 }
12104 case PM_LEX_REGEXP: {
12105 // First, we'll set to start of this token to be the current end.
12106 if (parser->next_start == NULL) {
12107 parser->current.start = parser->current.end;
12108 } else {
12109 parser->current.start = parser->next_start;
12110 parser->current.end = parser->next_start;
12111 parser->next_start = NULL;
12112 }
12113
12114 // We'll check if we're at the end of the file. If we are, then we
12115 // need to return the EOF token.
12116 if (parser->current.end >= parser->end) {
12117 LEX(PM_TOKEN_EOF);
12118 }
12119
12120 // Get a reference to the current mode.
12121 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12122
12123 // These are the places where we need to split up the content of the
12124 // regular expression. We'll use strpbrk to find the first of these
12125 // characters.
12126 const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
12127 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12128 pm_regexp_token_buffer_t token_buffer = { 0 };
12129
12130 while (breakpoint != NULL) {
12131 uint8_t term = lex_mode->as.regexp.terminator;
12132 bool is_terminator = (*breakpoint == term);
12133
12134 // If the terminator is newline, we need to consider \r\n _also_ a newline
12135 // For example: `%\nfoo\r\n`
12136 // The string should be "foo", not "foo\r"
12137 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12138 if (term == '\n') {
12139 is_terminator = true;
12140 }
12141
12142 // If the terminator is a CR, but we see a CRLF, we need to
12143 // treat the CRLF as a newline, meaning this is _not_ the
12144 // terminator
12145 if (term == '\r') {
12146 is_terminator = false;
12147 }
12148 }
12149
12150 // If we hit the terminator, we need to determine what kind of
12151 // token to return.
12152 if (is_terminator) {
12153 if (lex_mode->as.regexp.nesting > 0) {
12154 parser->current.end = breakpoint + 1;
12155 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12156 lex_mode->as.regexp.nesting--;
12157 continue;
12158 }
12159
12160 // Here we've hit the terminator. If we have already consumed
12161 // content then we need to return that content as string content
12162 // first.
12163 if (breakpoint > parser->current.start) {
12164 parser->current.end = breakpoint;
12165 pm_regexp_token_buffer_flush(parser, &token_buffer);
12166 LEX(PM_TOKEN_STRING_CONTENT);
12167 }
12168
12169 // Check here if we need to track the newline.
12170 size_t eol_length = match_eol_at(parser, breakpoint);
12171 if (eol_length) {
12172 parser->current.end = breakpoint + eol_length;
12173 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
12174 } else {
12175 parser->current.end = breakpoint + 1;
12176 }
12177
12178 // Since we've hit the terminator of the regular expression,
12179 // we now need to parse the options.
12180 parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
12181
12182 lex_mode_pop(parser);
12183 lex_state_set(parser, PM_LEX_STATE_END);
12184 LEX(PM_TOKEN_REGEXP_END);
12185 }
12186
12187 // If we've hit the incrementor, then we need to skip past it
12188 // and find the next breakpoint.
12189 if (*breakpoint && *breakpoint == lex_mode->as.regexp.incrementor) {
12190 parser->current.end = breakpoint + 1;
12191 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12192 lex_mode->as.regexp.nesting++;
12193 continue;
12194 }
12195
12196 switch (*breakpoint) {
12197 case '\0':
12198 // If we hit a null byte, skip directly past it.
12199 parser->current.end = breakpoint + 1;
12200 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12201 break;
12202 case '\r':
12203 if (peek_at(parser, breakpoint + 1) != '\n') {
12204 parser->current.end = breakpoint + 1;
12205 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12206 break;
12207 }
12208
12209 breakpoint++;
12210 parser->current.end = breakpoint;
12211 pm_regexp_token_buffer_escape(parser, &token_buffer);
12212 token_buffer.base.cursor = breakpoint;
12213
12215 case '\n':
12216 // If we've hit a newline, then we need to track that in
12217 // the list of newlines.
12218 if (parser->heredoc_end == NULL) {
12219 pm_newline_list_append(&parser->newline_list, breakpoint);
12220 parser->current.end = breakpoint + 1;
12221 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12222 break;
12223 }
12224
12225 parser->current.end = breakpoint + 1;
12226 parser_flush_heredoc_end(parser);
12227 pm_regexp_token_buffer_flush(parser, &token_buffer);
12228 LEX(PM_TOKEN_STRING_CONTENT);
12229 case '\\': {
12230 // If we hit escapes, then we need to treat the next
12231 // token literally. In this case we'll skip past the
12232 // next character and find the next breakpoint.
12233 parser->current.end = breakpoint + 1;
12234
12235 // If we've hit the end of the file, then break out of
12236 // the loop by setting the breakpoint to NULL.
12237 if (parser->current.end == parser->end) {
12238 breakpoint = NULL;
12239 break;
12240 }
12241
12242 pm_regexp_token_buffer_escape(parser, &token_buffer);
12243 uint8_t peeked = peek(parser);
12244
12245 switch (peeked) {
12246 case '\r':
12247 parser->current.end++;
12248 if (peek(parser) != '\n') {
12249 if (lex_mode->as.regexp.terminator != '\r') {
12250 pm_token_buffer_push_byte(&token_buffer.base, '\\');
12251 }
12252 pm_regexp_token_buffer_push_byte(&token_buffer, '\r');
12253 pm_token_buffer_push_byte(&token_buffer.base, '\r');
12254 break;
12255 }
12257 case '\n':
12258 if (parser->heredoc_end) {
12259 // ... if we are on the same line as a heredoc,
12260 // flush the heredoc and continue parsing after
12261 // heredoc_end.
12262 parser_flush_heredoc_end(parser);
12263 pm_regexp_token_buffer_copy(parser, &token_buffer);
12264 LEX(PM_TOKEN_STRING_CONTENT);
12265 } else {
12266 // ... else track the newline.
12267 pm_newline_list_append(&parser->newline_list, parser->current.end);
12268 }
12269
12270 parser->current.end++;
12271 break;
12272 case 'c':
12273 case 'C':
12274 case 'M':
12275 case 'u':
12276 case 'x':
12277 escape_read(parser, &token_buffer.regexp_buffer, &token_buffer.base.buffer, PM_ESCAPE_FLAG_REGEXP);
12278 break;
12279 default:
12280 if (lex_mode->as.regexp.terminator == peeked) {
12281 // Some characters when they are used as the
12282 // terminator also receive an escape. They are
12283 // enumerated here.
12284 switch (peeked) {
12285 case '$': case ')': case '*': case '+':
12286 case '.': case '>': case '?': case ']':
12287 case '^': case '|': case '}':
12288 pm_token_buffer_push_byte(&token_buffer.base, '\\');
12289 break;
12290 default:
12291 break;
12292 }
12293
12294 pm_regexp_token_buffer_push_byte(&token_buffer, peeked);
12295 pm_token_buffer_push_byte(&token_buffer.base, peeked);
12296 parser->current.end++;
12297 break;
12298 }
12299
12300 if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer.base, '\\');
12301 pm_regexp_token_buffer_push_escaped(&token_buffer, parser);
12302 break;
12303 }
12304
12305 token_buffer.base.cursor = parser->current.end;
12306 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12307 break;
12308 }
12309 case '#': {
12310 // If we hit a #, then we will attempt to lex
12311 // interpolation.
12312 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12313
12314 if (type == PM_TOKEN_NOT_PROVIDED) {
12315 // If we haven't returned at this point then we had
12316 // something that looked like an interpolated class or
12317 // instance variable like "#@" but wasn't actually. In
12318 // this case we'll just skip to the next breakpoint.
12319 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12320 break;
12321 }
12322
12323 if (type == PM_TOKEN_STRING_CONTENT) {
12324 pm_regexp_token_buffer_flush(parser, &token_buffer);
12325 }
12326
12327 LEX(type);
12328 }
12329 default:
12330 assert(false && "unreachable");
12331 break;
12332 }
12333 }
12334
12335 if (parser->current.end > parser->current.start) {
12336 pm_regexp_token_buffer_flush(parser, &token_buffer);
12337 LEX(PM_TOKEN_STRING_CONTENT);
12338 }
12339
12340 // If we were unable to find a breakpoint, then this token hits the
12341 // end of the file.
12342 parser->current.end = parser->end;
12343 pm_regexp_token_buffer_flush(parser, &token_buffer);
12344 LEX(PM_TOKEN_STRING_CONTENT);
12345 }
12346 case PM_LEX_STRING: {
12347 // First, we'll set to start of this token to be the current end.
12348 if (parser->next_start == NULL) {
12349 parser->current.start = parser->current.end;
12350 } else {
12351 parser->current.start = parser->next_start;
12352 parser->current.end = parser->next_start;
12353 parser->next_start = NULL;
12354 }
12355
12356 // We'll check if we're at the end of the file. If we are, then we need to
12357 // return the EOF token.
12358 if (parser->current.end >= parser->end) {
12359 LEX(PM_TOKEN_EOF);
12360 }
12361
12362 // These are the places where we need to split up the content of the
12363 // string. We'll use strpbrk to find the first of these characters.
12364 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12365 const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
12366 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12367
12368 // If we haven't found an escape yet, then this buffer will be
12369 // unallocated since we can refer directly to the source string.
12370 pm_token_buffer_t token_buffer = { 0 };
12371
12372 while (breakpoint != NULL) {
12373 // If we hit the incrementor, then we'll increment then nesting and
12374 // continue lexing.
12375 if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
12376 lex_mode->as.string.nesting++;
12377 parser->current.end = breakpoint + 1;
12378 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12379 continue;
12380 }
12381
12382 uint8_t term = lex_mode->as.string.terminator;
12383 bool is_terminator = (*breakpoint == term);
12384
12385 // If the terminator is newline, we need to consider \r\n _also_ a newline
12386 // For example: `%r\nfoo\r\n`
12387 // The string should be /foo/, not /foo\r/
12388 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12389 if (term == '\n') {
12390 is_terminator = true;
12391 }
12392
12393 // If the terminator is a CR, but we see a CRLF, we need to
12394 // treat the CRLF as a newline, meaning this is _not_ the
12395 // terminator
12396 if (term == '\r') {
12397 is_terminator = false;
12398 }
12399 }
12400
12401 // Note that we have to check the terminator here first because we could
12402 // potentially be parsing a % string that has a # character as the
12403 // terminator.
12404 if (is_terminator) {
12405 // If this terminator doesn't actually close the string, then we need
12406 // to continue on past it.
12407 if (lex_mode->as.string.nesting > 0) {
12408 parser->current.end = breakpoint + 1;
12409 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12410 lex_mode->as.string.nesting--;
12411 continue;
12412 }
12413
12414 // Here we've hit the terminator. If we have already consumed content
12415 // then we need to return that content as string content first.
12416 if (breakpoint > parser->current.start) {
12417 parser->current.end = breakpoint;
12418 pm_token_buffer_flush(parser, &token_buffer);
12419 LEX(PM_TOKEN_STRING_CONTENT);
12420 }
12421
12422 // Otherwise we need to switch back to the parent lex mode and
12423 // return the end of the string.
12424 size_t eol_length = match_eol_at(parser, breakpoint);
12425 if (eol_length) {
12426 parser->current.end = breakpoint + eol_length;
12427 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
12428 } else {
12429 parser->current.end = breakpoint + 1;
12430 }
12431
12432 if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
12433 parser->current.end++;
12434 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
12435 lex_mode_pop(parser);
12436 LEX(PM_TOKEN_LABEL_END);
12437 }
12438
12439 lex_state_set(parser, PM_LEX_STATE_END);
12440 lex_mode_pop(parser);
12441 LEX(PM_TOKEN_STRING_END);
12442 }
12443
12444 switch (*breakpoint) {
12445 case '\0':
12446 // Skip directly past the null character.
12447 parser->current.end = breakpoint + 1;
12448 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12449 break;
12450 case '\r':
12451 if (peek_at(parser, breakpoint + 1) != '\n') {
12452 parser->current.end = breakpoint + 1;
12453 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12454 break;
12455 }
12456
12457 // If we hit a \r\n sequence, then we need to treat it
12458 // as a newline.
12459 breakpoint++;
12460 parser->current.end = breakpoint;
12461 pm_token_buffer_escape(parser, &token_buffer);
12462 token_buffer.cursor = breakpoint;
12463
12465 case '\n':
12466 // When we hit a newline, we need to flush any potential
12467 // heredocs. Note that this has to happen after we check
12468 // for the terminator in case the terminator is a
12469 // newline character.
12470 if (parser->heredoc_end == NULL) {
12471 pm_newline_list_append(&parser->newline_list, breakpoint);
12472 parser->current.end = breakpoint + 1;
12473 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12474 break;
12475 }
12476
12477 parser->current.end = breakpoint + 1;
12478 parser_flush_heredoc_end(parser);
12479 pm_token_buffer_flush(parser, &token_buffer);
12480 LEX(PM_TOKEN_STRING_CONTENT);
12481 case '\\': {
12482 // Here we hit escapes.
12483 parser->current.end = breakpoint + 1;
12484
12485 // If we've hit the end of the file, then break out of
12486 // the loop by setting the breakpoint to NULL.
12487 if (parser->current.end == parser->end) {
12488 breakpoint = NULL;
12489 continue;
12490 }
12491
12492 pm_token_buffer_escape(parser, &token_buffer);
12493 uint8_t peeked = peek(parser);
12494
12495 switch (peeked) {
12496 case '\\':
12497 pm_token_buffer_push_byte(&token_buffer, '\\');
12498 parser->current.end++;
12499 break;
12500 case '\r':
12501 parser->current.end++;
12502 if (peek(parser) != '\n') {
12503 if (!lex_mode->as.string.interpolation) {
12504 pm_token_buffer_push_byte(&token_buffer, '\\');
12505 }
12506 pm_token_buffer_push_byte(&token_buffer, '\r');
12507 break;
12508 }
12510 case '\n':
12511 if (!lex_mode->as.string.interpolation) {
12512 pm_token_buffer_push_byte(&token_buffer, '\\');
12513 pm_token_buffer_push_byte(&token_buffer, '\n');
12514 }
12515
12516 if (parser->heredoc_end) {
12517 // ... if we are on the same line as a heredoc,
12518 // flush the heredoc and continue parsing after
12519 // heredoc_end.
12520 parser_flush_heredoc_end(parser);
12521 pm_token_buffer_copy(parser, &token_buffer);
12522 LEX(PM_TOKEN_STRING_CONTENT);
12523 } else {
12524 // ... else track the newline.
12525 pm_newline_list_append(&parser->newline_list, parser->current.end);
12526 }
12527
12528 parser->current.end++;
12529 break;
12530 default:
12531 if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
12532 pm_token_buffer_push_byte(&token_buffer, peeked);
12533 parser->current.end++;
12534 } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
12535 pm_token_buffer_push_byte(&token_buffer, peeked);
12536 parser->current.end++;
12537 } else if (lex_mode->as.string.interpolation) {
12538 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12539 } else {
12540 pm_token_buffer_push_byte(&token_buffer, '\\');
12541 pm_token_buffer_push_escaped(&token_buffer, parser);
12542 }
12543
12544 break;
12545 }
12546
12547 token_buffer.cursor = parser->current.end;
12548 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12549 break;
12550 }
12551 case '#': {
12552 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12553
12554 if (type == PM_TOKEN_NOT_PROVIDED) {
12555 // If we haven't returned at this point then we had something that
12556 // looked like an interpolated class or instance variable like "#@"
12557 // but wasn't actually. In this case we'll just skip to the next
12558 // breakpoint.
12559 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12560 break;
12561 }
12562
12563 if (type == PM_TOKEN_STRING_CONTENT) {
12564 pm_token_buffer_flush(parser, &token_buffer);
12565 }
12566
12567 LEX(type);
12568 }
12569 default:
12570 assert(false && "unreachable");
12571 }
12572 }
12573
12574 if (parser->current.end > parser->current.start) {
12575 pm_token_buffer_flush(parser, &token_buffer);
12576 LEX(PM_TOKEN_STRING_CONTENT);
12577 }
12578
12579 // If we've hit the end of the string, then this is an unterminated
12580 // string. In that case we'll return a string content token.
12581 parser->current.end = parser->end;
12582 pm_token_buffer_flush(parser, &token_buffer);
12583 LEX(PM_TOKEN_STRING_CONTENT);
12584 }
12585 case PM_LEX_HEREDOC: {
12586 // First, we'll set to start of this token.
12587 if (parser->next_start == NULL) {
12588 parser->current.start = parser->current.end;
12589 } else {
12590 parser->current.start = parser->next_start;
12591 parser->current.end = parser->next_start;
12592 parser->heredoc_end = NULL;
12593 parser->next_start = NULL;
12594 }
12595
12596 // Now let's grab the information about the identifier off of the
12597 // current lex mode.
12598 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12599 pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
12600
12601 bool line_continuation = lex_mode->as.heredoc.line_continuation;
12602 lex_mode->as.heredoc.line_continuation = false;
12603
12604 // We'll check if we're at the end of the file. If we are, then we
12605 // will add an error (because we weren't able to find the
12606 // terminator) but still continue parsing so that content after the
12607 // declaration of the heredoc can be parsed.
12608 if (parser->current.end >= parser->end) {
12609 pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
12610 parser->next_start = lex_mode->as.heredoc.next_start;
12611 parser->heredoc_end = parser->current.end;
12612 lex_state_set(parser, PM_LEX_STATE_END);
12613 lex_mode_pop(parser);
12614 LEX(PM_TOKEN_HEREDOC_END);
12615 }
12616
12617 const uint8_t *ident_start = heredoc_lex_mode->ident_start;
12618 size_t ident_length = heredoc_lex_mode->ident_length;
12619
12620 // If we are immediately following a newline and we have hit the
12621 // terminator, then we need to return the ending of the heredoc.
12622 if (current_token_starts_line(parser)) {
12623 const uint8_t *start = parser->current.start;
12624
12625 if (!line_continuation && (start + ident_length <= parser->end)) {
12626 const uint8_t *newline = next_newline(start, parser->end - start);
12627 const uint8_t *ident_end = newline;
12628 const uint8_t *terminator_end = newline;
12629
12630 if (newline == NULL) {
12631 terminator_end = parser->end;
12632 ident_end = parser->end;
12633 } else {
12634 terminator_end++;
12635 if (newline[-1] == '\r') {
12636 ident_end--; // Remove \r
12637 }
12638 }
12639
12640 const uint8_t *terminator_start = ident_end - ident_length;
12641 const uint8_t *cursor = start;
12642
12643 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12644 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12645 cursor++;
12646 }
12647 }
12648
12649 if (
12650 (cursor == terminator_start) &&
12651 (memcmp(terminator_start, ident_start, ident_length) == 0)
12652 ) {
12653 if (newline != NULL) {
12654 pm_newline_list_append(&parser->newline_list, newline);
12655 }
12656
12657 parser->current.end = terminator_end;
12658 if (*lex_mode->as.heredoc.next_start == '\\') {
12659 parser->next_start = NULL;
12660 } else {
12661 parser->next_start = lex_mode->as.heredoc.next_start;
12662 parser->heredoc_end = parser->current.end;
12663 }
12664
12665 lex_state_set(parser, PM_LEX_STATE_END);
12666 lex_mode_pop(parser);
12667 LEX(PM_TOKEN_HEREDOC_END);
12668 }
12669 }
12670
12671 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
12672 if (
12673 heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
12674 lex_mode->as.heredoc.common_whitespace != NULL &&
12675 (*lex_mode->as.heredoc.common_whitespace > whitespace) &&
12676 peek_at(parser, start) != '\n'
12677 ) {
12678 *lex_mode->as.heredoc.common_whitespace = whitespace;
12679 }
12680 }
12681
12682 // Otherwise we'll be parsing string content. These are the places
12683 // where we need to split up the content of the heredoc. We'll use
12684 // strpbrk to find the first of these characters.
12685 uint8_t breakpoints[] = "\r\n\\#";
12686
12687 pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
12688 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12689 breakpoints[3] = '\0';
12690 }
12691
12692 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12693 pm_token_buffer_t token_buffer = { 0 };
12694 bool was_line_continuation = false;
12695
12696 while (breakpoint != NULL) {
12697 switch (*breakpoint) {
12698 case '\0':
12699 // Skip directly past the null character.
12700 parser->current.end = breakpoint + 1;
12701 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12702 break;
12703 case '\r':
12704 parser->current.end = breakpoint + 1;
12705
12706 if (peek_at(parser, breakpoint + 1) != '\n') {
12707 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12708 break;
12709 }
12710
12711 // If we hit a \r\n sequence, then we want to replace it
12712 // with a single \n character in the final string.
12713 breakpoint++;
12714 pm_token_buffer_escape(parser, &token_buffer);
12715 token_buffer.cursor = breakpoint;
12716
12718 case '\n': {
12719 if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
12720 parser_flush_heredoc_end(parser);
12721 parser->current.end = breakpoint + 1;
12722 pm_token_buffer_flush(parser, &token_buffer);
12723 LEX(PM_TOKEN_STRING_CONTENT);
12724 }
12725
12726 pm_newline_list_append(&parser->newline_list, breakpoint);
12727
12728 // If we have a - or ~ heredoc, then we can match after
12729 // some leading whitespace.
12730 const uint8_t *start = breakpoint + 1;
12731
12732 if (!was_line_continuation && (start + ident_length <= parser->end)) {
12733 // We want to match the terminator starting from the end of the line in case
12734 // there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
12735 const uint8_t *newline = next_newline(start, parser->end - start);
12736
12737 if (newline == NULL) {
12738 newline = parser->end;
12739 } else if (newline[-1] == '\r') {
12740 newline--; // Remove \r
12741 }
12742
12743 // Start of a possible terminator.
12744 const uint8_t *terminator_start = newline - ident_length;
12745
12746 // Cursor to check for the leading whitespace. We skip the
12747 // leading whitespace if we have a - or ~ heredoc.
12748 const uint8_t *cursor = start;
12749
12750 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12751 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12752 cursor++;
12753 }
12754 }
12755
12756 if (
12757 cursor == terminator_start &&
12758 (memcmp(terminator_start, ident_start, ident_length) == 0)
12759 ) {
12760 parser->current.end = breakpoint + 1;
12761 pm_token_buffer_flush(parser, &token_buffer);
12762 LEX(PM_TOKEN_STRING_CONTENT);
12763 }
12764 }
12765
12766 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
12767
12768 // If we have hit a newline that is followed by a valid
12769 // terminator, then we need to return the content of the
12770 // heredoc here as string content. Then, the next time a
12771 // token is lexed, it will match again and return the
12772 // end of the heredoc.
12773 if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
12774 if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
12775 *lex_mode->as.heredoc.common_whitespace = whitespace;
12776 }
12777
12778 parser->current.end = breakpoint + 1;
12779 pm_token_buffer_flush(parser, &token_buffer);
12780 LEX(PM_TOKEN_STRING_CONTENT);
12781 }
12782
12783 // Otherwise we hit a newline and it wasn't followed by
12784 // a terminator, so we can continue parsing.
12785 parser->current.end = breakpoint + 1;
12786 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12787 break;
12788 }
12789 case '\\': {
12790 // If we hit an escape, then we need to skip past
12791 // however many characters the escape takes up. However
12792 // it's important that if \n or \r\n are escaped, we
12793 // stop looping before the newline and not after the
12794 // newline so that we can still potentially find the
12795 // terminator of the heredoc.
12796 parser->current.end = breakpoint + 1;
12797
12798 // If we've hit the end of the file, then break out of
12799 // the loop by setting the breakpoint to NULL.
12800 if (parser->current.end == parser->end) {
12801 breakpoint = NULL;
12802 continue;
12803 }
12804
12805 pm_token_buffer_escape(parser, &token_buffer);
12806 uint8_t peeked = peek(parser);
12807
12808 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12809 switch (peeked) {
12810 case '\r':
12811 parser->current.end++;
12812 if (peek(parser) != '\n') {
12813 pm_token_buffer_push_byte(&token_buffer, '\\');
12814 pm_token_buffer_push_byte(&token_buffer, '\r');
12815 break;
12816 }
12818 case '\n':
12819 pm_token_buffer_push_byte(&token_buffer, '\\');
12820 pm_token_buffer_push_byte(&token_buffer, '\n');
12821 token_buffer.cursor = parser->current.end + 1;
12822 breakpoint = parser->current.end;
12823 continue;
12824 default:
12825 pm_token_buffer_push_byte(&token_buffer, '\\');
12826 pm_token_buffer_push_escaped(&token_buffer, parser);
12827 break;
12828 }
12829 } else {
12830 switch (peeked) {
12831 case '\r':
12832 parser->current.end++;
12833 if (peek(parser) != '\n') {
12834 pm_token_buffer_push_byte(&token_buffer, '\r');
12835 break;
12836 }
12838 case '\n':
12839 // If we are in a tilde here, we should
12840 // break out of the loop and return the
12841 // string content.
12842 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12843 const uint8_t *end = parser->current.end;
12844 pm_newline_list_append(&parser->newline_list, end);
12845
12846 // Here we want the buffer to only
12847 // include up to the backslash.
12848 parser->current.end = breakpoint;
12849 pm_token_buffer_flush(parser, &token_buffer);
12850
12851 // Now we can advance the end of the
12852 // token past the newline.
12853 parser->current.end = end + 1;
12854 lex_mode->as.heredoc.line_continuation = true;
12855 LEX(PM_TOKEN_STRING_CONTENT);
12856 }
12857
12858 was_line_continuation = true;
12859 token_buffer.cursor = parser->current.end + 1;
12860 breakpoint = parser->current.end;
12861 continue;
12862 default:
12863 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12864 break;
12865 }
12866 }
12867
12868 token_buffer.cursor = parser->current.end;
12869 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12870 break;
12871 }
12872 case '#': {
12873 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12874
12875 if (type == PM_TOKEN_NOT_PROVIDED) {
12876 // If we haven't returned at this point then we had
12877 // something that looked like an interpolated class
12878 // or instance variable like "#@" but wasn't
12879 // actually. In this case we'll just skip to the
12880 // next breakpoint.
12881 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12882 break;
12883 }
12884
12885 if (type == PM_TOKEN_STRING_CONTENT) {
12886 pm_token_buffer_flush(parser, &token_buffer);
12887 }
12888
12889 LEX(type);
12890 }
12891 default:
12892 assert(false && "unreachable");
12893 }
12894
12895 was_line_continuation = false;
12896 }
12897
12898 if (parser->current.end > parser->current.start) {
12899 parser->current.end = parser->end;
12900 pm_token_buffer_flush(parser, &token_buffer);
12901 LEX(PM_TOKEN_STRING_CONTENT);
12902 }
12903
12904 // If we've hit the end of the string, then this is an unterminated
12905 // heredoc. In that case we'll return a string content token.
12906 parser->current.end = parser->end;
12907 pm_token_buffer_flush(parser, &token_buffer);
12908 LEX(PM_TOKEN_STRING_CONTENT);
12909 }
12910 }
12911
12912 assert(false && "unreachable");
12913}
12914
12915#undef LEX
12916
12917/******************************************************************************/
12918/* Parse functions */
12919/******************************************************************************/
12920
12929typedef enum {
12930 PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
12931 PM_BINDING_POWER_STATEMENT = 2,
12932 PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
12933 PM_BINDING_POWER_MODIFIER = 6, // if unless until while
12934 PM_BINDING_POWER_COMPOSITION = 8, // and or
12935 PM_BINDING_POWER_NOT = 10, // not
12936 PM_BINDING_POWER_MATCH = 12, // => in
12937 PM_BINDING_POWER_DEFINED = 14, // defined?
12938 PM_BINDING_POWER_MULTI_ASSIGNMENT = 16, // =
12939 PM_BINDING_POWER_ASSIGNMENT = 18, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
12940 PM_BINDING_POWER_TERNARY = 20, // ?:
12941 PM_BINDING_POWER_RANGE = 22, // .. ...
12942 PM_BINDING_POWER_LOGICAL_OR = 24, // ||
12943 PM_BINDING_POWER_LOGICAL_AND = 26, // &&
12944 PM_BINDING_POWER_EQUALITY = 28, // <=> == === != =~ !~
12945 PM_BINDING_POWER_COMPARISON = 30, // > >= < <=
12946 PM_BINDING_POWER_BITWISE_OR = 32, // | ^
12947 PM_BINDING_POWER_BITWISE_AND = 34, // &
12948 PM_BINDING_POWER_SHIFT = 36, // << >>
12949 PM_BINDING_POWER_TERM = 38, // + -
12950 PM_BINDING_POWER_FACTOR = 40, // * / %
12951 PM_BINDING_POWER_UMINUS = 42, // -@
12952 PM_BINDING_POWER_EXPONENT = 44, // **
12953 PM_BINDING_POWER_UNARY = 46, // ! ~ +@
12954 PM_BINDING_POWER_INDEX = 48, // [] []=
12955 PM_BINDING_POWER_CALL = 50, // :: .
12956 PM_BINDING_POWER_MAX = 52
12957} pm_binding_power_t;
12958
12963typedef struct {
12965 pm_binding_power_t left;
12966
12968 pm_binding_power_t right;
12969
12972
12979
12980#define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
12981#define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
12982#define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
12983#define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
12984#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
12985
12986pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
12987 // rescue
12988 [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = { PM_BINDING_POWER_MODIFIER_RESCUE, PM_BINDING_POWER_COMPOSITION, true, false },
12989
12990 // if unless until while
12991 [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12992 [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12993 [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12994 [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12995
12996 // and or
12997 [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12998 [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12999
13000 // => in
13001 [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
13002 [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
13003
13004 // &&= &= ^= = >>= <<= -= %= |= ||= += /= *= **=
13005 [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
13006 [PM_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
13007 [PM_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
13008 [PM_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT,
13009 [PM_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT,
13010 [PM_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT,
13011 [PM_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
13012 [PM_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT,
13013 [PM_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
13014 [PM_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
13015 [PM_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
13016 [PM_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT,
13017 [PM_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
13018 [PM_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
13019
13020 // ?:
13021 [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
13022
13023 // .. ...
13024 [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
13025 [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
13026 [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
13027 [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
13028
13029 // ||
13030 [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
13031
13032 // &&
13033 [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
13034
13035 // != !~ == === =~ <=>
13036 [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13037 [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13038 [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13039 [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13040 [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13041 [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13042
13043 // > >= < <=
13044 [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13045 [PM_TOKEN_GREATER_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13046 [PM_TOKEN_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13047 [PM_TOKEN_LESS_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13048
13049 // ^ |
13050 [PM_TOKEN_CARET] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
13051 [PM_TOKEN_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
13052
13053 // &
13054 [PM_TOKEN_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_AND),
13055
13056 // >> <<
13057 [PM_TOKEN_GREATER_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
13058 [PM_TOKEN_LESS_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
13059
13060 // - +
13061 [PM_TOKEN_MINUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
13062 [PM_TOKEN_PLUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
13063
13064 // % / *
13065 [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13066 [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13067 [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13068 [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
13069
13070 // -@
13071 [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
13072 [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false, false },
13073
13074 // **
13075 [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
13076 [PM_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13077
13078 // ! ~ +@
13079 [PM_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13080 [PM_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13081 [PM_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13082
13083 // [
13084 [PM_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_INDEX),
13085
13086 // :: . &.
13087 [PM_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
13088 [PM_TOKEN_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
13089 [PM_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL)
13090};
13091
13092#undef BINDING_POWER_ASSIGNMENT
13093#undef LEFT_ASSOCIATIVE
13094#undef RIGHT_ASSOCIATIVE
13095#undef RIGHT_ASSOCIATIVE_UNARY
13096
13100static inline bool
13101match1(const pm_parser_t *parser, pm_token_type_t type) {
13102 return parser->current.type == type;
13103}
13104
13108static inline bool
13109match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13110 return match1(parser, type1) || match1(parser, type2);
13111}
13112
13116static inline bool
13117match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
13118 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
13119}
13120
13124static inline bool
13125match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
13126 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
13127}
13128
13132static inline bool
13133match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
13134 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
13135}
13136
13140static inline bool
13141match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
13142 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
13143}
13144
13151static bool
13152accept1(pm_parser_t *parser, pm_token_type_t type) {
13153 if (match1(parser, type)) {
13154 parser_lex(parser);
13155 return true;
13156 }
13157 return false;
13158}
13159
13164static inline bool
13165accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13166 if (match2(parser, type1, type2)) {
13167 parser_lex(parser);
13168 return true;
13169 }
13170 return false;
13171}
13172
13184static void
13185expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
13186 if (accept1(parser, type)) return;
13187
13188 const uint8_t *location = parser->previous.end;
13189 pm_parser_err(parser, location, location, diag_id);
13190
13191 parser->previous.start = location;
13192 parser->previous.type = PM_TOKEN_MISSING;
13193}
13194
13199static void
13200expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_diagnostic_id_t diag_id) {
13201 if (accept2(parser, type1, type2)) return;
13202
13203 const uint8_t *location = parser->previous.end;
13204 pm_parser_err(parser, location, location, diag_id);
13205
13206 parser->previous.start = location;
13207 parser->previous.type = PM_TOKEN_MISSING;
13208}
13209
13214static void
13215expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
13216 if (match1(parser, PM_TOKEN_HEREDOC_END)) {
13217 parser_lex(parser);
13218 } else {
13219 pm_parser_err_heredoc_term(parser, ident_start, ident_length);
13220 parser->previous.start = parser->previous.end;
13221 parser->previous.type = PM_TOKEN_MISSING;
13222 }
13223}
13224
13225static pm_node_t *
13226parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth);
13227
13232static pm_node_t *
13233parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
13234 pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
13235 pm_assert_value_expression(parser, node);
13236 return node;
13237}
13238
13257static inline bool
13258token_begins_expression_p(pm_token_type_t type) {
13259 switch (type) {
13260 case PM_TOKEN_EQUAL_GREATER:
13261 case PM_TOKEN_KEYWORD_IN:
13262 // We need to special case this because it is a binary operator that
13263 // should not be marked as beginning an expression.
13264 return false;
13265 case PM_TOKEN_BRACE_RIGHT:
13266 case PM_TOKEN_BRACKET_RIGHT:
13267 case PM_TOKEN_COLON:
13268 case PM_TOKEN_COMMA:
13269 case PM_TOKEN_EMBEXPR_END:
13270 case PM_TOKEN_EOF:
13271 case PM_TOKEN_LAMBDA_BEGIN:
13272 case PM_TOKEN_KEYWORD_DO:
13273 case PM_TOKEN_KEYWORD_DO_LOOP:
13274 case PM_TOKEN_KEYWORD_END:
13275 case PM_TOKEN_KEYWORD_ELSE:
13276 case PM_TOKEN_KEYWORD_ELSIF:
13277 case PM_TOKEN_KEYWORD_ENSURE:
13278 case PM_TOKEN_KEYWORD_THEN:
13279 case PM_TOKEN_KEYWORD_RESCUE:
13280 case PM_TOKEN_KEYWORD_WHEN:
13281 case PM_TOKEN_NEWLINE:
13282 case PM_TOKEN_PARENTHESIS_RIGHT:
13283 case PM_TOKEN_SEMICOLON:
13284 // The reason we need this short-circuit is because we're using the
13285 // binding powers table to tell us if the subsequent token could
13286 // potentially be the start of an expression. If there _is_ a binding
13287 // power for one of these tokens, then we should remove it from this list
13288 // and let it be handled by the default case below.
13289 assert(pm_binding_powers[type].left == PM_BINDING_POWER_UNSET);
13290 return false;
13291 case PM_TOKEN_UAMPERSAND:
13292 // This is a special case because this unary operator cannot appear
13293 // as a general operator, it only appears in certain circumstances.
13294 return false;
13295 case PM_TOKEN_UCOLON_COLON:
13296 case PM_TOKEN_UMINUS:
13297 case PM_TOKEN_UMINUS_NUM:
13298 case PM_TOKEN_UPLUS:
13299 case PM_TOKEN_BANG:
13300 case PM_TOKEN_TILDE:
13301 case PM_TOKEN_UDOT_DOT:
13302 case PM_TOKEN_UDOT_DOT_DOT:
13303 // These unary tokens actually do have binding power associated with them
13304 // so that we can correctly place them into the precedence order. But we
13305 // want them to be marked as beginning an expression, so we need to
13306 // special case them here.
13307 return true;
13308 default:
13309 return pm_binding_powers[type].left == PM_BINDING_POWER_UNSET;
13310 }
13311}
13312
13317static pm_node_t *
13318parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
13319 if (accept1(parser, PM_TOKEN_USTAR)) {
13320 pm_token_t operator = parser->previous;
13321 pm_node_t *expression = parse_value_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13322 return (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
13323 }
13324
13325 return parse_value_expression(parser, binding_power, accepts_command_call, false, diag_id, depth);
13326}
13327
13332static void
13333parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
13334 // The method name needs to change. If we previously had
13335 // foo, we now need foo=. In this case we'll allocate a new
13336 // owned string, copy the previous method name in, and
13337 // append an =.
13338 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
13339 size_t length = constant->length;
13340 uint8_t *name = xcalloc(length + 1, sizeof(uint8_t));
13341 if (name == NULL) return;
13342
13343 memcpy(name, constant->start, length);
13344 name[length] = '=';
13345
13346 // Now switch the name to the new string.
13347 // This silences clang analyzer warning about leak of memory pointed by `name`.
13348 // NOLINTNEXTLINE(clang-analyzer-*)
13349 *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
13350}
13351
13358static pm_node_t *
13359parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
13360 switch (PM_NODE_TYPE(target)) {
13361 case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13362 case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13363 case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13364 case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13365 case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13366 case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13367 case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13368 default: break;
13369 }
13370
13371 pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
13372 pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
13373
13374 pm_node_destroy(parser, target);
13375 return (pm_node_t *) result;
13376}
13377
13383static void
13384parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
13385 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
13386
13387 for (size_t index = 0; index < implicit_parameters->size; index++) {
13388 if (implicit_parameters->nodes[index] == node) {
13389 // If the node is not the last one in the list, we need to shift the
13390 // remaining nodes down to fill the gap. This is extremely unlikely
13391 // to happen.
13392 if (index != implicit_parameters->size - 1) {
13393 memcpy(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
13394 }
13395
13396 implicit_parameters->size--;
13397 break;
13398 }
13399 }
13400}
13401
13410static pm_node_t *
13411parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
13412 switch (PM_NODE_TYPE(target)) {
13413 case PM_MISSING_NODE:
13414 return target;
13415 case PM_SOURCE_ENCODING_NODE:
13416 case PM_FALSE_NODE:
13417 case PM_SOURCE_FILE_NODE:
13418 case PM_SOURCE_LINE_NODE:
13419 case PM_NIL_NODE:
13420 case PM_SELF_NODE:
13421 case PM_TRUE_NODE: {
13422 // In these special cases, we have specific error messages and we
13423 // will replace them with local variable writes.
13424 return parse_unwriteable_target(parser, target);
13425 }
13426 case PM_CLASS_VARIABLE_READ_NODE:
13428 target->type = PM_CLASS_VARIABLE_TARGET_NODE;
13429 return target;
13430 case PM_CONSTANT_PATH_NODE:
13431 if (context_def_p(parser)) {
13432 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13433 }
13434
13436 target->type = PM_CONSTANT_PATH_TARGET_NODE;
13437
13438 return target;
13439 case PM_CONSTANT_READ_NODE:
13440 if (context_def_p(parser)) {
13441 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13442 }
13443
13444 assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
13445 target->type = PM_CONSTANT_TARGET_NODE;
13446
13447 return target;
13448 case PM_BACK_REFERENCE_READ_NODE:
13449 case PM_NUMBERED_REFERENCE_READ_NODE:
13450 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13451 return target;
13452 case PM_GLOBAL_VARIABLE_READ_NODE:
13454 target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
13455 return target;
13456 case PM_LOCAL_VARIABLE_READ_NODE: {
13457 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13458 PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
13459 parse_target_implicit_parameter(parser, target);
13460 }
13461
13462 const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
13463 uint32_t name = cast->name;
13464 uint32_t depth = cast->depth;
13465 pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
13466
13468 target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
13469
13470 return target;
13471 }
13472 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
13473 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13474 pm_node_t *node = (pm_node_t *) pm_local_variable_target_node_create(parser, &target->location, name, 0);
13475
13476 parse_target_implicit_parameter(parser, target);
13477 pm_node_destroy(parser, target);
13478
13479 return node;
13480 }
13481 case PM_INSTANCE_VARIABLE_READ_NODE:
13483 target->type = PM_INSTANCE_VARIABLE_TARGET_NODE;
13484 return target;
13485 case PM_MULTI_TARGET_NODE:
13486 if (splat_parent) {
13487 // Multi target is not accepted in all positions. If this is one
13488 // of them, then we need to add an error.
13489 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13490 }
13491
13492 return target;
13493 case PM_SPLAT_NODE: {
13494 pm_splat_node_t *splat = (pm_splat_node_t *) target;
13495
13496 if (splat->expression != NULL) {
13497 splat->expression = parse_target(parser, splat->expression, multiple, true);
13498 }
13499
13500 return (pm_node_t *) splat;
13501 }
13502 case PM_CALL_NODE: {
13503 pm_call_node_t *call = (pm_call_node_t *) target;
13504
13505 // If we have no arguments to the call node and we need this to be a
13506 // target then this is either a method call or a local variable
13507 // write.
13508 if (
13509 (call->message_loc.start != NULL) &&
13510 (call->message_loc.end[-1] != '!') &&
13511 (call->message_loc.end[-1] != '?') &&
13512 (call->opening_loc.start == NULL) &&
13513 (call->arguments == NULL) &&
13514 (call->block == NULL)
13515 ) {
13516 if (call->receiver == NULL) {
13517 // When we get here, we have a local variable write, because it
13518 // was previously marked as a method call but now we have an =.
13519 // This looks like:
13520 //
13521 // foo = 1
13522 //
13523 // When it was parsed in the prefix position, foo was seen as a
13524 // method call with no receiver and no arguments. Now we have an
13525 // =, so we know it's a local variable write.
13526 const pm_location_t message_loc = call->message_loc;
13527
13528 pm_constant_id_t name = pm_parser_local_add_location(parser, message_loc.start, message_loc.end, 0);
13529 pm_node_destroy(parser, target);
13530
13531 return (pm_node_t *) pm_local_variable_target_node_create(parser, &message_loc, name, 0);
13532 }
13533
13534 if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
13535 if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
13536 pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
13537 }
13538
13539 parse_write_name(parser, &call->name);
13540 return (pm_node_t *) pm_call_target_node_create(parser, call);
13541 }
13542 }
13543
13544 // If there is no call operator and the message is "[]" then this is
13545 // an aref expression, and we can transform it into an aset
13546 // expression.
13547 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13548 return (pm_node_t *) pm_index_target_node_create(parser, call);
13549 }
13550 }
13552 default:
13553 // In this case we have a node that we don't know how to convert
13554 // into a target. We need to treat it as an error. For now, we'll
13555 // mark it as an error and just skip right past it.
13556 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13557 return target;
13558 }
13559}
13560
13565static pm_node_t *
13566parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13567 pm_node_t *result = parse_target(parser, target, multiple, false);
13568
13569 // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
13570 // parens after the targets.
13571 if (
13572 !match1(parser, PM_TOKEN_EQUAL) &&
13573 !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
13574 !(context_p(parser, PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
13575 ) {
13576 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13577 }
13578
13579 return result;
13580}
13581
13586static pm_node_t *
13587parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
13588 pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
13589
13590 if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
13591 return (pm_node_t *) pm_shareable_constant_node_create(parser, write, shareable_constant);
13592 }
13593
13594 return write;
13595}
13596
13600static pm_node_t *
13601parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
13602 switch (PM_NODE_TYPE(target)) {
13603 case PM_MISSING_NODE:
13604 pm_node_destroy(parser, value);
13605 return target;
13606 case PM_CLASS_VARIABLE_READ_NODE: {
13607 pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
13608 pm_node_destroy(parser, target);
13609 return (pm_node_t *) node;
13610 }
13611 case PM_CONSTANT_PATH_NODE: {
13612 pm_node_t *node = (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
13613
13614 if (context_def_p(parser)) {
13615 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13616 }
13617
13618 return parse_shareable_constant_write(parser, node);
13619 }
13620 case PM_CONSTANT_READ_NODE: {
13621 pm_node_t *node = (pm_node_t *) pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
13622
13623 if (context_def_p(parser)) {
13624 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13625 }
13626
13627 pm_node_destroy(parser, target);
13628 return parse_shareable_constant_write(parser, node);
13629 }
13630 case PM_BACK_REFERENCE_READ_NODE:
13631 case PM_NUMBERED_REFERENCE_READ_NODE:
13632 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13634 case PM_GLOBAL_VARIABLE_READ_NODE: {
13635 pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
13636 pm_node_destroy(parser, target);
13637 return (pm_node_t *) node;
13638 }
13639 case PM_LOCAL_VARIABLE_READ_NODE: {
13641
13642 pm_constant_id_t name = local_read->name;
13643 pm_location_t name_loc = target->location;
13644
13645 uint32_t depth = local_read->depth;
13646 pm_scope_t *scope = pm_parser_scope_find(parser, depth);
13647
13648 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13649 pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
13650 PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
13651 parse_target_implicit_parameter(parser, target);
13652 }
13653
13654 pm_locals_unread(&scope->locals, name);
13655 pm_node_destroy(parser, target);
13656
13657 return (pm_node_t *) pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator);
13658 }
13659 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
13660 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13661 pm_node_t *node = (pm_node_t *) pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator);
13662
13663 parse_target_implicit_parameter(parser, target);
13664 pm_node_destroy(parser, target);
13665
13666 return node;
13667 }
13668 case PM_INSTANCE_VARIABLE_READ_NODE: {
13669 pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value);
13670 pm_node_destroy(parser, target);
13671 return write_node;
13672 }
13673 case PM_MULTI_TARGET_NODE:
13674 return (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value);
13675 case PM_SPLAT_NODE: {
13676 pm_splat_node_t *splat = (pm_splat_node_t *) target;
13677
13678 if (splat->expression != NULL) {
13679 splat->expression = parse_write(parser, splat->expression, operator, value);
13680 }
13681
13682 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
13683 pm_multi_target_node_targets_append(parser, multi_target, (pm_node_t *) splat);
13684
13685 return (pm_node_t *) pm_multi_write_node_create(parser, multi_target, operator, value);
13686 }
13687 case PM_CALL_NODE: {
13688 pm_call_node_t *call = (pm_call_node_t *) target;
13689
13690 // If we have no arguments to the call node and we need this to be a
13691 // target then this is either a method call or a local variable
13692 // write.
13693 if (
13694 (call->message_loc.start != NULL) &&
13695 (call->message_loc.end[-1] != '!') &&
13696 (call->message_loc.end[-1] != '?') &&
13697 (call->opening_loc.start == NULL) &&
13698 (call->arguments == NULL) &&
13699 (call->block == NULL)
13700 ) {
13701 if (call->receiver == NULL) {
13702 // When we get here, we have a local variable write, because it
13703 // was previously marked as a method call but now we have an =.
13704 // This looks like:
13705 //
13706 // foo = 1
13707 //
13708 // When it was parsed in the prefix position, foo was seen as a
13709 // method call with no receiver and no arguments. Now we have an
13710 // =, so we know it's a local variable write.
13711 const pm_location_t message = call->message_loc;
13712
13713 pm_parser_local_add_location(parser, message.start, message.end, 0);
13714 pm_node_destroy(parser, target);
13715
13716 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end);
13717 target = (pm_node_t *) pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator);
13718
13719 pm_refute_numbered_parameter(parser, message.start, message.end);
13720 return target;
13721 }
13722
13723 if (char_is_identifier_start(parser, call->message_loc.start, parser->end - call->message_loc.start)) {
13724 // When we get here, we have a method call, because it was
13725 // previously marked as a method call but now we have an =. This
13726 // looks like:
13727 //
13728 // foo.bar = 1
13729 //
13730 // When it was parsed in the prefix position, foo.bar was seen as a
13731 // method call with no arguments. Now we have an =, so we know it's
13732 // a method call with an argument. In this case we will create the
13733 // arguments node, parse the argument, and add it to the list.
13734 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
13735 call->arguments = arguments;
13736
13737 pm_arguments_node_arguments_append(arguments, value);
13738 call->base.location.end = arguments->base.location.end;
13739
13740 parse_write_name(parser, &call->name);
13741 pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13742
13743 return (pm_node_t *) call;
13744 }
13745 }
13746
13747 // If there is no call operator and the message is "[]" then this is
13748 // an aref expression, and we can transform it into an aset
13749 // expression.
13750 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13751 if (call->arguments == NULL) {
13752 call->arguments = pm_arguments_node_create(parser);
13753 }
13754
13755 pm_arguments_node_arguments_append(call->arguments, value);
13756 target->location.end = value->location.end;
13757
13758 // Replace the name with "[]=".
13759 call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
13760
13761 // Ensure that the arguments for []= don't contain keywords
13762 pm_index_arguments_check(parser, call->arguments, call->block);
13763 pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13764
13765 return target;
13766 }
13767
13768 // If there are arguments on the call node, then it can't be a method
13769 // call ending with = or a local variable write, so it must be a
13770 // syntax error. In this case we'll fall through to our default
13771 // handling. We need to free the value that we parsed because there
13772 // is no way for us to attach it to the tree at this point.
13773 pm_node_destroy(parser, value);
13774 }
13776 default:
13777 // In this case we have a node that we don't know how to convert into a
13778 // target. We need to treat it as an error. For now, we'll mark it as an
13779 // error and just skip right past it.
13780 pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
13781 return target;
13782 }
13783}
13784
13791static pm_node_t *
13792parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t *equals, pm_node_t *value) {
13793 switch (PM_NODE_TYPE(target)) {
13794 case PM_SOURCE_ENCODING_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13795 case PM_FALSE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13796 case PM_SOURCE_FILE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13797 case PM_SOURCE_LINE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13798 case PM_NIL_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13799 case PM_SELF_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13800 case PM_TRUE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13801 default: break;
13802 }
13803
13804 pm_constant_id_t name = pm_parser_local_add_location(parser, target->location.start, target->location.end, 1);
13805 pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
13806
13807 pm_node_destroy(parser, target);
13808 return (pm_node_t *) result;
13809}
13810
13821static pm_node_t *
13822parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13823 bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13824
13825 pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13826 pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
13827
13828 while (accept1(parser, PM_TOKEN_COMMA)) {
13829 if (accept1(parser, PM_TOKEN_USTAR)) {
13830 // Here we have a splat operator. It can have a name or be
13831 // anonymous. It can be the final target or be in the middle if
13832 // there haven't been any others yet.
13833 if (has_rest) {
13834 pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
13835 }
13836
13837 pm_token_t star_operator = parser->previous;
13838 pm_node_t *name = NULL;
13839
13840 if (token_begins_expression_p(parser->current.type)) {
13841 name = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13842 name = parse_target(parser, name, true, true);
13843 }
13844
13845 pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
13846 pm_multi_target_node_targets_append(parser, result, splat);
13847 has_rest = true;
13848 } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13849 context_push(parser, PM_CONTEXT_MULTI_TARGET);
13850 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13851 target = parse_target(parser, target, true, false);
13852
13853 pm_multi_target_node_targets_append(parser, result, target);
13854 context_pop(parser);
13855 } else if (token_begins_expression_p(parser->current.type)) {
13856 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13857 target = parse_target(parser, target, true, false);
13858
13859 pm_multi_target_node_targets_append(parser, result, target);
13860 } else if (!match1(parser, PM_TOKEN_EOF)) {
13861 // If we get here, then we have a trailing , in a multi target node.
13862 // We'll add an implicit rest node to represent this.
13863 pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
13864 pm_multi_target_node_targets_append(parser, result, rest);
13865 break;
13866 }
13867 }
13868
13869 return (pm_node_t *) result;
13870}
13871
13876static pm_node_t *
13877parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13878 pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
13879 accept1(parser, PM_TOKEN_NEWLINE);
13880
13881 // Ensure that we have either an = or a ) after the targets.
13882 if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
13883 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13884 }
13885
13886 return result;
13887}
13888
13892static pm_statements_node_t *
13893parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
13894 // First, skip past any optional terminators that might be at the beginning
13895 // of the statements.
13896 while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13897
13898 // If we have a terminator, then we can just return NULL.
13899 if (context_terminator(context, &parser->current)) return NULL;
13900
13901 pm_statements_node_t *statements = pm_statements_node_create(parser);
13902
13903 // At this point we know we have at least one statement, and that it
13904 // immediately follows the current token.
13905 context_push(parser, context);
13906
13907 while (true) {
13908 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
13909 pm_statements_node_body_append(parser, statements, node, true);
13910
13911 // If we're recovering from a syntax error, then we need to stop parsing
13912 // the statements now.
13913 if (parser->recovering) {
13914 // If this is the level of context where the recovery has happened,
13915 // then we can mark the parser as done recovering.
13916 if (context_terminator(context, &parser->current)) parser->recovering = false;
13917 break;
13918 }
13919
13920 // If we have a terminator, then we will parse all consecutive
13921 // terminators and then continue parsing the statements list.
13922 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13923 // If we have a terminator, then we will continue parsing the
13924 // statements list.
13925 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13926 if (context_terminator(context, &parser->current)) break;
13927
13928 // Now we can continue parsing the list of statements.
13929 continue;
13930 }
13931
13932 // At this point we have a list of statements that are not terminated by
13933 // a newline or semicolon. At this point we need to check if we're at
13934 // the end of the statements list. If we are, then we should break out
13935 // of the loop.
13936 if (context_terminator(context, &parser->current)) break;
13937
13938 // At this point, we have a syntax error, because the statement was not
13939 // terminated by a newline or semicolon, and we're not at the end of the
13940 // statements list. Ideally we should scan forward to determine if we
13941 // should insert a missing terminator or break out of parsing the
13942 // statements list at this point.
13943 //
13944 // We don't have that yet, so instead we'll do a more naive approach. If
13945 // we were unable to parse an expression, then we will skip past this
13946 // token and continue parsing the statements list. Otherwise we'll add
13947 // an error and continue parsing the statements list.
13948 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
13949 parser_lex(parser);
13950
13951 // If we are at the end of the file, then we need to stop parsing
13952 // the statements entirely at this point. Mark the parser as
13953 // recovering, as we know that EOF closes the top-level context, and
13954 // then break out of the loop.
13955 if (match1(parser, PM_TOKEN_EOF)) {
13956 parser->recovering = true;
13957 break;
13958 }
13959
13960 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13961 if (context_terminator(context, &parser->current)) break;
13962 } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
13963 // This is an inlined version of accept1 because the error that we
13964 // want to add has varargs. If this happens again, we should
13965 // probably extract a helper function.
13966 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
13967 parser->previous.start = parser->previous.end;
13968 parser->previous.type = PM_TOKEN_MISSING;
13969 }
13970 }
13971
13972 context_pop(parser);
13973 bool last_value = true;
13974 switch (context) {
13977 last_value = false;
13978 break;
13979 default:
13980 break;
13981 }
13982 pm_void_statements_check(parser, statements, last_value);
13983
13984 return statements;
13985}
13986
13991static void
13992pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13993 const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
13994
13995 if (duplicated != NULL) {
13996 pm_buffer_t buffer = { 0 };
13997 pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
13998
13999 pm_diagnostic_list_append_format(
14000 &parser->warning_list,
14001 duplicated->location.start,
14002 duplicated->location.end,
14003 PM_WARN_DUPLICATED_HASH_KEY,
14004 (int) pm_buffer_length(&buffer),
14005 pm_buffer_value(&buffer),
14006 pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
14007 );
14008
14009 pm_buffer_free(&buffer);
14010 }
14011}
14012
14017static void
14018pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
14019 pm_node_t *previous;
14020
14021 if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
14022 pm_diagnostic_list_append_format(
14023 &parser->warning_list,
14024 node->location.start,
14025 node->location.end,
14026 PM_WARN_DUPLICATED_WHEN_CLAUSE,
14027 pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
14028 pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
14029 );
14030 }
14031}
14032
14036static bool
14037parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node, uint16_t depth) {
14038 assert(PM_NODE_TYPE_P(node, PM_HASH_NODE) || PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE));
14039 bool contains_keyword_splat = false;
14040
14041 while (true) {
14042 pm_node_t *element;
14043
14044 switch (parser->current.type) {
14045 case PM_TOKEN_USTAR_STAR: {
14046 parser_lex(parser);
14047 pm_token_t operator = parser->previous;
14048 pm_node_t *value = NULL;
14049
14050 if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
14051 // If we're about to parse a nested hash that is being
14052 // pushed into this hash directly with **, then we want the
14053 // inner hash to share the static literals with the outer
14054 // hash.
14055 parser->current_hash_keys = literals;
14056 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
14057 } else if (token_begins_expression_p(parser->current.type)) {
14058 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
14059 } else {
14060 pm_parser_scope_forwarding_keywords_check(parser, &operator);
14061 }
14062
14063 element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
14064 contains_keyword_splat = true;
14065 break;
14066 }
14067 case PM_TOKEN_LABEL: {
14068 pm_token_t label = parser->current;
14069 parser_lex(parser);
14070
14071 pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &label);
14072 pm_hash_key_static_literals_add(parser, literals, key);
14073
14074 pm_token_t operator = not_provided(parser);
14075 pm_node_t *value = NULL;
14076
14077 if (token_begins_expression_p(parser->current.type)) {
14078 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
14079 } else {
14080 if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
14081 pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
14082 value = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
14083 } else {
14084 int depth = -1;
14085 pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
14086
14087 if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
14088 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
14089 } else {
14090 depth = pm_parser_local_depth(parser, &identifier);
14091 }
14092
14093 if (depth == -1) {
14094 value = (pm_node_t *) pm_call_node_variable_call_create(parser, &identifier);
14095 } else {
14096 value = (pm_node_t *) pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth);
14097 }
14098 }
14099
14100 value->location.end++;
14101 value = (pm_node_t *) pm_implicit_node_create(parser, value);
14102 }
14103
14104 element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
14105 break;
14106 }
14107 default: {
14108 pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
14109
14110 // Hash keys that are strings are automatically frozen. We will
14111 // mark that here.
14112 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
14113 pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
14114 }
14115
14116 pm_hash_key_static_literals_add(parser, literals, key);
14117
14118 pm_token_t operator;
14119 if (pm_symbol_node_label_p(key)) {
14120 operator = not_provided(parser);
14121 } else {
14122 expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
14123 operator = parser->previous;
14124 }
14125
14126 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
14127 element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
14128 break;
14129 }
14130 }
14131
14132 if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
14133 pm_hash_node_elements_append((pm_hash_node_t *) node, element);
14134 } else {
14135 pm_keyword_hash_node_elements_append((pm_keyword_hash_node_t *) node, element);
14136 }
14137
14138 // If there's no comma after the element, then we're done.
14139 if (!accept1(parser, PM_TOKEN_COMMA)) break;
14140
14141 // If the next element starts with a label or a **, then we know we have
14142 // another element in the hash, so we'll continue parsing.
14143 if (match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)) continue;
14144
14145 // Otherwise we need to check if the subsequent token begins an expression.
14146 // If it does, then we'll continue parsing.
14147 if (token_begins_expression_p(parser->current.type)) continue;
14148
14149 // Otherwise by default we will exit out of this loop.
14150 break;
14151 }
14152
14153 return contains_keyword_splat;
14154}
14155
14159static inline void
14160parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
14161 if (arguments->arguments == NULL) {
14162 arguments->arguments = pm_arguments_node_create(parser);
14163 }
14164
14165 pm_arguments_node_arguments_append(arguments->arguments, argument);
14166}
14167
14171static void
14172parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
14173 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
14174
14175 // First we need to check if the next token is one that could be the start
14176 // of an argument. If it's not, then we can just return.
14177 if (
14178 match2(parser, terminator, PM_TOKEN_EOF) ||
14179 (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
14180 context_terminator(parser->current_context->context, &parser->current)
14181 ) {
14182 return;
14183 }
14184
14185 bool parsed_first_argument = false;
14186 bool parsed_bare_hash = false;
14187 bool parsed_block_argument = false;
14188 bool parsed_forwarding_arguments = false;
14189
14190 while (!match1(parser, PM_TOKEN_EOF)) {
14191 if (parsed_forwarding_arguments) {
14192 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
14193 }
14194
14195 pm_node_t *argument = NULL;
14196
14197 switch (parser->current.type) {
14198 case PM_TOKEN_USTAR_STAR:
14199 case PM_TOKEN_LABEL: {
14200 if (parsed_bare_hash) {
14201 pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
14202 }
14203
14204 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
14205 argument = (pm_node_t *) hash;
14206
14207 pm_static_literals_t hash_keys = { 0 };
14208 bool contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) hash, (uint16_t) (depth + 1));
14209
14210 parse_arguments_append(parser, arguments, argument);
14211
14212 pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
14213 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
14214 pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
14215
14216 pm_static_literals_free(&hash_keys);
14217 parsed_bare_hash = true;
14218
14219 break;
14220 }
14221 case PM_TOKEN_UAMPERSAND: {
14222 parser_lex(parser);
14223 pm_token_t operator = parser->previous;
14224 pm_node_t *expression = NULL;
14225
14226 if (token_begins_expression_p(parser->current.type)) {
14227 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
14228 } else {
14229 pm_parser_scope_forwarding_block_check(parser, &operator);
14230 }
14231
14232 argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
14233 if (parsed_block_argument) {
14234 parse_arguments_append(parser, arguments, argument);
14235 } else {
14236 arguments->block = argument;
14237 }
14238
14239 if (match1(parser, PM_TOKEN_COMMA)) {
14240 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
14241 }
14242
14243 parsed_block_argument = true;
14244 break;
14245 }
14246 case PM_TOKEN_USTAR: {
14247 parser_lex(parser);
14248 pm_token_t operator = parser->previous;
14249
14250 if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
14251 pm_parser_scope_forwarding_positionals_check(parser, &operator);
14252 argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
14253 if (parsed_bare_hash) {
14254 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14255 }
14256 } else {
14257 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
14258
14259 if (parsed_bare_hash) {
14260 pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14261 }
14262
14263 argument = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
14264 }
14265
14266 parse_arguments_append(parser, arguments, argument);
14267 break;
14268 }
14269 case PM_TOKEN_UDOT_DOT_DOT: {
14270 if (accepts_forwarding) {
14271 parser_lex(parser);
14272
14273 if (token_begins_expression_p(parser->current.type)) {
14274 // If the token begins an expression then this ... was
14275 // not actually argument forwarding but was instead a
14276 // range.
14277 pm_token_t operator = parser->previous;
14278 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
14279
14280 // If we parse a range, we need to validate that we
14281 // didn't accidentally violate the nonassoc rules of the
14282 // ... operator.
14283 if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
14284 pm_range_node_t *range = (pm_range_node_t *) right;
14285 pm_parser_err(parser, range->operator_loc.start, range->operator_loc.end, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
14286 }
14287
14288 argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
14289 } else {
14290 pm_parser_scope_forwarding_all_check(parser, &parser->previous);
14291 if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
14292 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
14293 }
14294
14295 argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
14296 parse_arguments_append(parser, arguments, argument);
14297 pm_node_flag_set((pm_node_t *) arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
14298 arguments->has_forwarding = true;
14299 parsed_forwarding_arguments = true;
14300 break;
14301 }
14302 }
14303 }
14305 default: {
14306 if (argument == NULL) {
14307 argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
14308 }
14309
14310 bool contains_keywords = false;
14311 bool contains_keyword_splat = false;
14312
14313 if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
14314 if (parsed_bare_hash) {
14315 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
14316 }
14317
14318 pm_token_t operator;
14319 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
14320 operator = parser->previous;
14321 } else {
14322 operator = not_provided(parser);
14323 }
14324
14325 pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
14326 contains_keywords = true;
14327
14328 // Create the set of static literals for this hash.
14329 pm_static_literals_t hash_keys = { 0 };
14330 pm_hash_key_static_literals_add(parser, &hash_keys, argument);
14331
14332 // Finish parsing the one we are part way through.
14333 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
14334 argument = (pm_node_t *) pm_assoc_node_create(parser, argument, &operator, value);
14335
14336 pm_keyword_hash_node_elements_append(bare_hash, argument);
14337 argument = (pm_node_t *) bare_hash;
14338
14339 // Then parse more if we have a comma
14340 if (accept1(parser, PM_TOKEN_COMMA) && (
14341 token_begins_expression_p(parser->current.type) ||
14342 match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
14343 )) {
14344 contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) bare_hash, (uint16_t) (depth + 1));
14345 }
14346
14347 pm_static_literals_free(&hash_keys);
14348 parsed_bare_hash = true;
14349 }
14350
14351 parse_arguments_append(parser, arguments, argument);
14352
14353 pm_node_flags_t flags = 0;
14354 if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
14355 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
14356 pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
14357
14358 break;
14359 }
14360 }
14361
14362 parsed_first_argument = true;
14363
14364 // If parsing the argument failed, we need to stop parsing arguments.
14365 if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
14366
14367 // If the terminator of these arguments is not EOF, then we have a
14368 // specific token we're looking for. In that case we can accept a
14369 // newline here because it is not functioning as a statement terminator.
14370 bool accepted_newline = false;
14371 if (terminator != PM_TOKEN_EOF) {
14372 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14373 }
14374
14375 if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
14376 // If we previously were on a comma and we just parsed a bare hash,
14377 // then we want to continue parsing arguments. This is because the
14378 // comma was grabbed up by the hash parser.
14379 } else if (accept1(parser, PM_TOKEN_COMMA)) {
14380 // If there was a comma, then we need to check if we also accepted a
14381 // newline. If we did, then this is a syntax error.
14382 if (accepted_newline) {
14383 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14384 }
14385 } else {
14386 // If there is no comma at the end of the argument list then we're
14387 // done parsing arguments and can break out of this loop.
14388 break;
14389 }
14390
14391 // If we hit the terminator, then that means we have a trailing comma so
14392 // we can accept that output as well.
14393 if (match1(parser, terminator)) break;
14394 }
14395}
14396
14408parse_required_destructured_parameter(pm_parser_t *parser) {
14409 expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
14410
14411 pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
14412 pm_multi_target_node_opening_set(node, &parser->previous);
14413
14414 do {
14415 pm_node_t *param;
14416
14417 // If we get here then we have a trailing comma, which isn't allowed in
14418 // the grammar. In other places, multi targets _do_ allow trailing
14419 // commas, so here we'll assume this is a mistake of the user not
14420 // knowing it's not allowed here.
14421 if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14422 param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
14423 pm_multi_target_node_targets_append(parser, node, param);
14424 pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14425 break;
14426 }
14427
14428 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
14429 param = (pm_node_t *) parse_required_destructured_parameter(parser);
14430 } else if (accept1(parser, PM_TOKEN_USTAR)) {
14431 pm_token_t star = parser->previous;
14432 pm_node_t *value = NULL;
14433
14434 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14435 pm_token_t name = parser->previous;
14436 value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
14437 if (pm_parser_parameter_name_check(parser, &name)) {
14438 pm_node_flag_set_repeated_parameter(value);
14439 }
14440 pm_parser_local_add_token(parser, &name, 1);
14441 }
14442
14443 param = (pm_node_t *) pm_splat_node_create(parser, &star, value);
14444 } else {
14445 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
14446 pm_token_t name = parser->previous;
14447
14448 param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
14449 if (pm_parser_parameter_name_check(parser, &name)) {
14450 pm_node_flag_set_repeated_parameter(param);
14451 }
14452 pm_parser_local_add_token(parser, &name, 1);
14453 }
14454
14455 pm_multi_target_node_targets_append(parser, node, param);
14456 } while (accept1(parser, PM_TOKEN_COMMA));
14457
14458 accept1(parser, PM_TOKEN_NEWLINE);
14459 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
14460 pm_multi_target_node_closing_set(node, &parser->previous);
14461
14462 return node;
14463}
14464
14469typedef enum {
14470 PM_PARAMETERS_NO_CHANGE = 0, // Extra state for tokens that should not change the state
14471 PM_PARAMETERS_ORDER_NOTHING_AFTER = 1,
14472 PM_PARAMETERS_ORDER_KEYWORDS_REST,
14473 PM_PARAMETERS_ORDER_KEYWORDS,
14474 PM_PARAMETERS_ORDER_REST,
14475 PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14476 PM_PARAMETERS_ORDER_OPTIONAL,
14477 PM_PARAMETERS_ORDER_NAMED,
14478 PM_PARAMETERS_ORDER_NONE,
14479} pm_parameters_order_t;
14480
14484static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
14485 [0] = PM_PARAMETERS_NO_CHANGE,
14486 [PM_TOKEN_UAMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14487 [PM_TOKEN_AMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14488 [PM_TOKEN_UDOT_DOT_DOT] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14489 [PM_TOKEN_IDENTIFIER] = PM_PARAMETERS_ORDER_NAMED,
14490 [PM_TOKEN_PARENTHESIS_LEFT] = PM_PARAMETERS_ORDER_NAMED,
14491 [PM_TOKEN_EQUAL] = PM_PARAMETERS_ORDER_OPTIONAL,
14492 [PM_TOKEN_LABEL] = PM_PARAMETERS_ORDER_KEYWORDS,
14493 [PM_TOKEN_USTAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14494 [PM_TOKEN_STAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14495 [PM_TOKEN_USTAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST,
14496 [PM_TOKEN_STAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST
14497};
14498
14506static bool
14507update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
14508 pm_parameters_order_t state = parameters_ordering[token->type];
14509 if (state == PM_PARAMETERS_NO_CHANGE) return true;
14510
14511 // If we see another ordered argument after a optional argument
14512 // we only continue parsing ordered arguments until we stop seeing ordered arguments.
14513 if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14514 *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
14515 return true;
14516 } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14517 return true;
14518 }
14519
14520 if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14521 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
14522 return false;
14523 } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
14524 pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
14525 return false;
14526 } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
14527 // We know what transition we failed on, so we can provide a better error here.
14528 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
14529 return false;
14530 }
14531
14532 if (state < *current) *current = state;
14533 return true;
14534}
14535
14539static pm_parameters_node_t *
14540parse_parameters(
14541 pm_parser_t *parser,
14542 pm_binding_power_t binding_power,
14543 bool uses_parentheses,
14544 bool allows_trailing_comma,
14545 bool allows_forwarding_parameters,
14546 bool accepts_blocks_in_defaults,
14547 bool in_block,
14548 uint16_t depth
14549) {
14550 pm_do_loop_stack_push(parser, false);
14551
14552 pm_parameters_node_t *params = pm_parameters_node_create(parser);
14553 pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
14554
14555 while (true) {
14556 bool parsing = true;
14557
14558 switch (parser->current.type) {
14559 case PM_TOKEN_PARENTHESIS_LEFT: {
14560 update_parameter_state(parser, &parser->current, &order);
14561 pm_node_t *param = (pm_node_t *) parse_required_destructured_parameter(parser);
14562
14563 if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14564 pm_parameters_node_requireds_append(params, param);
14565 } else {
14566 pm_parameters_node_posts_append(params, param);
14567 }
14568 break;
14569 }
14570 case PM_TOKEN_UAMPERSAND:
14571 case PM_TOKEN_AMPERSAND: {
14572 update_parameter_state(parser, &parser->current, &order);
14573 parser_lex(parser);
14574
14575 pm_token_t operator = parser->previous;
14576 pm_token_t name;
14577
14578 bool repeated = false;
14579 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14580 name = parser->previous;
14581 repeated = pm_parser_parameter_name_check(parser, &name);
14582 pm_parser_local_add_token(parser, &name, 1);
14583 } else {
14584 name = not_provided(parser);
14585 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
14586 }
14587
14588 pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
14589 if (repeated) {
14590 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14591 }
14592 if (params->block == NULL) {
14593 pm_parameters_node_block_set(params, param);
14594 } else {
14595 pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_BLOCK_MULTI);
14596 pm_parameters_node_posts_append(params, (pm_node_t *) param);
14597 }
14598
14599 break;
14600 }
14601 case PM_TOKEN_UDOT_DOT_DOT: {
14602 if (!allows_forwarding_parameters) {
14603 pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
14604 }
14605
14606 bool succeeded = update_parameter_state(parser, &parser->current, &order);
14607 parser_lex(parser);
14608
14609 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14610 pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
14611
14612 if (params->keyword_rest != NULL) {
14613 // If we already have a keyword rest parameter, then we replace it with the
14614 // forwarding parameter and move the keyword rest parameter to the posts list.
14615 pm_node_t *keyword_rest = params->keyword_rest;
14616 pm_parameters_node_posts_append(params, keyword_rest);
14617 if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
14618 params->keyword_rest = NULL;
14619 }
14620
14621 pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param);
14622 break;
14623 }
14624 case PM_TOKEN_CLASS_VARIABLE:
14625 case PM_TOKEN_IDENTIFIER:
14626 case PM_TOKEN_CONSTANT:
14627 case PM_TOKEN_INSTANCE_VARIABLE:
14628 case PM_TOKEN_GLOBAL_VARIABLE:
14629 case PM_TOKEN_METHOD_NAME: {
14630 parser_lex(parser);
14631 switch (parser->previous.type) {
14632 case PM_TOKEN_CONSTANT:
14633 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14634 break;
14635 case PM_TOKEN_INSTANCE_VARIABLE:
14636 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14637 break;
14638 case PM_TOKEN_GLOBAL_VARIABLE:
14639 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14640 break;
14641 case PM_TOKEN_CLASS_VARIABLE:
14642 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14643 break;
14644 case PM_TOKEN_METHOD_NAME:
14645 pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
14646 break;
14647 default: break;
14648 }
14649
14650 if (parser->current.type == PM_TOKEN_EQUAL) {
14651 update_parameter_state(parser, &parser->current, &order);
14652 } else {
14653 update_parameter_state(parser, &parser->previous, &order);
14654 }
14655
14656 pm_token_t name = parser->previous;
14657 bool repeated = pm_parser_parameter_name_check(parser, &name);
14658 pm_parser_local_add_token(parser, &name, 1);
14659
14660 if (match1(parser, PM_TOKEN_EQUAL)) {
14661 pm_token_t operator = parser->current;
14662 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14663 parser_lex(parser);
14664
14665 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
14666 uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14667
14668 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14669 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
14670 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14671
14672 pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
14673
14674 if (repeated) {
14675 pm_node_flag_set_repeated_parameter((pm_node_t *) param);
14676 }
14677 pm_parameters_node_optionals_append(params, param);
14678
14679 // If the value of the parameter increased the number of
14680 // reads of that parameter, then we need to warn that we
14681 // have a circular definition.
14682 if ((parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14683 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
14684 }
14685
14686 context_pop(parser);
14687
14688 // If parsing the value of the parameter resulted in error recovery,
14689 // then we can put a missing node in its place and stop parsing the
14690 // parameters entirely now.
14691 if (parser->recovering) {
14692 parsing = false;
14693 break;
14694 }
14695 } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14696 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14697 if (repeated) {
14698 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14699 }
14700 pm_parameters_node_requireds_append(params, (pm_node_t *) param);
14701 } else {
14702 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14703 if (repeated) {
14704 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14705 }
14706 pm_parameters_node_posts_append(params, (pm_node_t *) param);
14707 }
14708
14709 break;
14710 }
14711 case PM_TOKEN_LABEL: {
14712 if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
14713 update_parameter_state(parser, &parser->current, &order);
14714
14715 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14716 parser_lex(parser);
14717
14718 pm_token_t name = parser->previous;
14719 pm_token_t local = name;
14720 local.end -= 1;
14721
14722 if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
14723 pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14724 } else if (local.end[-1] == '!' || local.end[-1] == '?') {
14725 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14726 }
14727
14728 bool repeated = pm_parser_parameter_name_check(parser, &local);
14729 pm_parser_local_add_token(parser, &local, 1);
14730
14731 switch (parser->current.type) {
14732 case PM_TOKEN_COMMA:
14733 case PM_TOKEN_PARENTHESIS_RIGHT:
14734 case PM_TOKEN_PIPE: {
14735 context_pop(parser);
14736
14737 pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14738 if (repeated) {
14739 pm_node_flag_set_repeated_parameter(param);
14740 }
14741
14742 pm_parameters_node_keywords_append(params, param);
14743 break;
14744 }
14745 case PM_TOKEN_SEMICOLON:
14746 case PM_TOKEN_NEWLINE: {
14747 context_pop(parser);
14748
14749 if (uses_parentheses) {
14750 parsing = false;
14751 break;
14752 }
14753
14754 pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14755 if (repeated) {
14756 pm_node_flag_set_repeated_parameter(param);
14757 }
14758
14759 pm_parameters_node_keywords_append(params, param);
14760 break;
14761 }
14762 default: {
14763 pm_node_t *param;
14764
14765 if (token_begins_expression_p(parser->current.type)) {
14766 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14767 uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14768
14769 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14770 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
14771 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14772
14773 if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14774 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
14775 }
14776
14777 param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
14778 }
14779 else {
14780 param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14781 }
14782
14783 if (repeated) {
14784 pm_node_flag_set_repeated_parameter(param);
14785 }
14786
14787 context_pop(parser);
14788 pm_parameters_node_keywords_append(params, param);
14789
14790 // If parsing the value of the parameter resulted in error recovery,
14791 // then we can put a missing node in its place and stop parsing the
14792 // parameters entirely now.
14793 if (parser->recovering) {
14794 parsing = false;
14795 break;
14796 }
14797 }
14798 }
14799
14800 parser->in_keyword_arg = false;
14801 break;
14802 }
14803 case PM_TOKEN_USTAR:
14804 case PM_TOKEN_STAR: {
14805 update_parameter_state(parser, &parser->current, &order);
14806 parser_lex(parser);
14807
14808 pm_token_t operator = parser->previous;
14809 pm_token_t name;
14810 bool repeated = false;
14811
14812 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14813 name = parser->previous;
14814 repeated = pm_parser_parameter_name_check(parser, &name);
14815 pm_parser_local_add_token(parser, &name, 1);
14816 } else {
14817 name = not_provided(parser);
14818 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
14819 }
14820
14821 pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
14822 if (repeated) {
14823 pm_node_flag_set_repeated_parameter(param);
14824 }
14825
14826 if (params->rest == NULL) {
14827 pm_parameters_node_rest_set(params, param);
14828 } else {
14829 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
14830 pm_parameters_node_posts_append(params, param);
14831 }
14832
14833 break;
14834 }
14835 case PM_TOKEN_STAR_STAR:
14836 case PM_TOKEN_USTAR_STAR: {
14837 pm_parameters_order_t previous_order = order;
14838 update_parameter_state(parser, &parser->current, &order);
14839 parser_lex(parser);
14840
14841 pm_token_t operator = parser->previous;
14842 pm_node_t *param;
14843
14844 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14845 if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14846 pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14847 }
14848
14849 param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
14850 } else {
14851 pm_token_t name;
14852
14853 bool repeated = false;
14854 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14855 name = parser->previous;
14856 repeated = pm_parser_parameter_name_check(parser, &name);
14857 pm_parser_local_add_token(parser, &name, 1);
14858 } else {
14859 name = not_provided(parser);
14860 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
14861 }
14862
14863 param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
14864 if (repeated) {
14865 pm_node_flag_set_repeated_parameter(param);
14866 }
14867 }
14868
14869 if (params->keyword_rest == NULL) {
14870 pm_parameters_node_keyword_rest_set(params, param);
14871 } else {
14872 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
14873 pm_parameters_node_posts_append(params, param);
14874 }
14875
14876 break;
14877 }
14878 default:
14879 if (parser->previous.type == PM_TOKEN_COMMA) {
14880 if (allows_trailing_comma && order >= PM_PARAMETERS_ORDER_NAMED) {
14881 // If we get here, then we have a trailing comma in a
14882 // block parameter list.
14883 pm_node_t *param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
14884
14885 if (params->rest == NULL) {
14886 pm_parameters_node_rest_set(params, param);
14887 } else {
14888 pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_SPLAT_MULTI);
14889 pm_parameters_node_posts_append(params, (pm_node_t *) param);
14890 }
14891 } else {
14892 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14893 }
14894 }
14895
14896 parsing = false;
14897 break;
14898 }
14899
14900 // If we hit some kind of issue while parsing the parameter, this would
14901 // have been set to false. In that case, we need to break out of the
14902 // loop.
14903 if (!parsing) break;
14904
14905 bool accepted_newline = false;
14906 if (uses_parentheses) {
14907 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14908 }
14909
14910 if (accept1(parser, PM_TOKEN_COMMA)) {
14911 // If there was a comma, but we also accepted a newline, then this
14912 // is a syntax error.
14913 if (accepted_newline) {
14914 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14915 }
14916 } else {
14917 // If there was no comma, then we're done parsing parameters.
14918 break;
14919 }
14920 }
14921
14922 pm_do_loop_stack_pop(parser);
14923
14924 // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
14925 if (params->base.location.start == params->base.location.end) {
14926 pm_node_destroy(parser, (pm_node_t *) params);
14927 return NULL;
14928 }
14929
14930 return params;
14931}
14932
14937static size_t
14938token_newline_index(const pm_parser_t *parser) {
14939 if (parser->heredoc_end == NULL) {
14940 // This is the common case. In this case we can look at the previously
14941 // recorded newline in the newline list and subtract from the current
14942 // offset.
14943 return parser->newline_list.size - 1;
14944 } else {
14945 // This is unlikely. This is the case that we have already parsed the
14946 // start of a heredoc, so we cannot rely on looking at the previous
14947 // offset of the newline list, and instead must go through the whole
14948 // process of a binary search for the line number.
14949 return (size_t) pm_newline_list_line(&parser->newline_list, parser->current.start, 0);
14950 }
14951}
14952
14957static int64_t
14958token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
14959 const uint8_t *cursor = parser->start + parser->newline_list.offsets[newline_index];
14960 const uint8_t *end = token->start;
14961
14962 // Skip over the BOM if it is present.
14963 if (
14964 newline_index == 0 &&
14965 parser->start[0] == 0xef &&
14966 parser->start[1] == 0xbb &&
14967 parser->start[2] == 0xbf
14968 ) cursor += 3;
14969
14970 int64_t column = 0;
14971 for (; cursor < end; cursor++) {
14972 switch (*cursor) {
14973 case '\t':
14974 column = ((column / PM_TAB_WHITESPACE_SIZE) + 1) * PM_TAB_WHITESPACE_SIZE;
14975 break;
14976 case ' ':
14977 column++;
14978 break;
14979 default:
14980 column++;
14981 if (break_on_non_space) return -1;
14982 break;
14983 }
14984 }
14985
14986 return column;
14987}
14988
14993static void
14994parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening_token, bool if_after_else, bool allow_indent) {
14995 // If these warnings are disabled (unlikely), then we can just return.
14996 if (!parser->warn_mismatched_indentation) return;
14997
14998 // If the tokens are on the same line, we do not warn.
14999 size_t closing_newline_index = token_newline_index(parser);
15000 if (opening_newline_index == closing_newline_index) return;
15001
15002 // If the opening token has anything other than spaces or tabs before it,
15003 // then we do not warn. This is unless we are matching up an `if`/`end` pair
15004 // and the `if` immediately follows an `else` keyword.
15005 int64_t opening_column = token_column(parser, opening_newline_index, opening_token, !if_after_else);
15006 if (!if_after_else && (opening_column == -1)) return;
15007
15008 // Get a reference to the closing token off the current parser. This assumes
15009 // that the caller has placed this in the correct position.
15010 pm_token_t *closing_token = &parser->current;
15011
15012 // If the tokens are at the same indentation, we do not warn.
15013 int64_t closing_column = token_column(parser, closing_newline_index, closing_token, true);
15014 if ((closing_column == -1) || (opening_column == closing_column)) return;
15015
15016 // If the closing column is greater than the opening column and we are
15017 // allowing indentation, then we do not warn.
15018 if (allow_indent && (closing_column > opening_column)) return;
15019
15020 // Otherwise, add a warning.
15021 PM_PARSER_WARN_FORMAT(
15022 parser,
15023 closing_token->start,
15024 closing_token->end,
15025 PM_WARN_INDENTATION_MISMATCH,
15026 (int) (closing_token->end - closing_token->start),
15027 (const char *) closing_token->start,
15028 (int) (opening_token->end - opening_token->start),
15029 (const char *) opening_token->start,
15030 ((int32_t) opening_newline_index) + parser->start_line
15031 );
15032}
15033
15034typedef enum {
15035 PM_RESCUES_BEGIN = 1,
15036 PM_RESCUES_BLOCK,
15037 PM_RESCUES_CLASS,
15038 PM_RESCUES_DEF,
15039 PM_RESCUES_LAMBDA,
15040 PM_RESCUES_MODULE,
15041 PM_RESCUES_SCLASS
15042} pm_rescues_type_t;
15043
15048static inline void
15049parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
15050 pm_rescue_node_t *current = NULL;
15051
15052 while (match1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
15053 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15054 parser_lex(parser);
15055
15056 pm_rescue_node_t *rescue = pm_rescue_node_create(parser, &parser->previous);
15057
15058 switch (parser->current.type) {
15059 case PM_TOKEN_EQUAL_GREATER: {
15060 // Here we have an immediate => after the rescue keyword, in which case
15061 // we're going to have an empty list of exceptions to rescue (which
15062 // implies StandardError).
15063 parser_lex(parser);
15064 pm_rescue_node_operator_set(rescue, &parser->previous);
15065
15066 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
15067 reference = parse_target(parser, reference, false, false);
15068
15069 pm_rescue_node_reference_set(rescue, reference);
15070 break;
15071 }
15072 case PM_TOKEN_NEWLINE:
15073 case PM_TOKEN_SEMICOLON:
15074 case PM_TOKEN_KEYWORD_THEN:
15075 // Here we have a terminator for the rescue keyword, in which
15076 // case we're going to just continue on.
15077 break;
15078 default: {
15079 if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
15080 // Here we have something that could be an exception expression, so
15081 // we'll attempt to parse it here and any others delimited by commas.
15082
15083 do {
15084 pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
15085 pm_rescue_node_exceptions_append(rescue, expression);
15086
15087 // If we hit a newline, then this is the end of the rescue expression. We
15088 // can continue on to parse the statements.
15089 if (match3(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_THEN)) break;
15090
15091 // If we hit a `=>` then we're going to parse the exception variable. Once
15092 // we've done that, we'll break out of the loop and parse the statements.
15093 if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
15094 pm_rescue_node_operator_set(rescue, &parser->previous);
15095
15096 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
15097 reference = parse_target(parser, reference, false, false);
15098
15099 pm_rescue_node_reference_set(rescue, reference);
15100 break;
15101 }
15102 } while (accept1(parser, PM_TOKEN_COMMA));
15103 }
15104 }
15105 }
15106
15107 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
15108 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15109 rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
15110 }
15111 } else {
15112 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
15113 rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
15114 }
15115
15116 if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
15117 pm_accepts_block_stack_push(parser, true);
15118 pm_context_t context;
15119
15120 switch (type) {
15121 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_RESCUE; break;
15122 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_RESCUE; break;
15123 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_RESCUE; break;
15124 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_RESCUE; break;
15125 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_RESCUE; break;
15126 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_RESCUE; break;
15127 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_RESCUE; break;
15128 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15129 }
15130
15131 pm_statements_node_t *statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15132 if (statements != NULL) pm_rescue_node_statements_set(rescue, statements);
15133
15134 pm_accepts_block_stack_pop(parser);
15135 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15136 }
15137
15138 if (current == NULL) {
15139 pm_begin_node_rescue_clause_set(parent_node, rescue);
15140 } else {
15141 pm_rescue_node_subsequent_set(current, rescue);
15142 }
15143
15144 current = rescue;
15145 }
15146
15147 // The end node locations on rescue nodes will not be set correctly
15148 // since we won't know the end until we've found all subsequent
15149 // clauses. This sets the end location on all rescues once we know it.
15150 if (current != NULL) {
15151 const uint8_t *end_to_set = current->base.location.end;
15152 pm_rescue_node_t *clause = parent_node->rescue_clause;
15153
15154 while (clause != NULL) {
15155 clause->base.location.end = end_to_set;
15156 clause = clause->subsequent;
15157 }
15158 }
15159
15160 pm_token_t else_keyword;
15161 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15162 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15163 opening_newline_index = token_newline_index(parser);
15164
15165 else_keyword = parser->current;
15166 opening = &else_keyword;
15167
15168 parser_lex(parser);
15169 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15170
15171 pm_statements_node_t *else_statements = NULL;
15172 if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
15173 pm_accepts_block_stack_push(parser, true);
15174 pm_context_t context;
15175
15176 switch (type) {
15177 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ELSE; break;
15178 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ELSE; break;
15179 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ELSE; break;
15180 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ELSE; break;
15181 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
15182 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
15183 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
15184 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
15185 }
15186
15187 else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15188 pm_accepts_block_stack_pop(parser);
15189
15190 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15191 }
15192
15193 pm_else_node_t *else_clause = pm_else_node_create(parser, &else_keyword, else_statements, &parser->current);
15194 pm_begin_node_else_clause_set(parent_node, else_clause);
15195
15196 // If we don't have a `current` rescue node, then this is a dangling
15197 // else, and it's an error.
15198 if (current == NULL) pm_parser_err_node(parser, (pm_node_t *) else_clause, PM_ERR_BEGIN_LONELY_ELSE);
15199 }
15200
15201 if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
15202 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15203 pm_token_t ensure_keyword = parser->current;
15204
15205 parser_lex(parser);
15206 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15207
15208 pm_statements_node_t *ensure_statements = NULL;
15209 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15210 pm_accepts_block_stack_push(parser, true);
15211 pm_context_t context;
15212
15213 switch (type) {
15214 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ENSURE; break;
15215 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ENSURE; break;
15216 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ENSURE; break;
15217 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ENSURE; break;
15218 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ENSURE; break;
15219 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ENSURE; break;
15220 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ENSURE; break;
15221 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15222 }
15223
15224 ensure_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15225 pm_accepts_block_stack_pop(parser);
15226
15227 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15228 }
15229
15230 pm_ensure_node_t *ensure_clause = pm_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->current);
15231 pm_begin_node_ensure_clause_set(parent_node, ensure_clause);
15232 }
15233
15234 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
15235 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15236 pm_begin_node_end_keyword_set(parent_node, &parser->current);
15237 } else {
15238 pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15239 pm_begin_node_end_keyword_set(parent_node, &end_keyword);
15240 }
15241}
15242
15247static pm_begin_node_t *
15248parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
15249 pm_token_t begin_keyword = not_provided(parser);
15250 pm_begin_node_t *node = pm_begin_node_create(parser, &begin_keyword, statements);
15251
15252 parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
15253 node->base.location.start = start;
15254
15255 return node;
15256}
15257
15262parse_block_parameters(
15263 pm_parser_t *parser,
15264 bool allows_trailing_comma,
15265 const pm_token_t *opening,
15266 bool is_lambda_literal,
15267 bool accepts_blocks_in_defaults,
15268 uint16_t depth
15269) {
15270 pm_parameters_node_t *parameters = NULL;
15271 if (!match1(parser, PM_TOKEN_SEMICOLON)) {
15272 parameters = parse_parameters(
15273 parser,
15274 is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
15275 false,
15276 allows_trailing_comma,
15277 false,
15278 accepts_blocks_in_defaults,
15279 true,
15280 (uint16_t) (depth + 1)
15281 );
15282 }
15283
15284 pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
15285 if ((opening->type != PM_TOKEN_NOT_PROVIDED)) {
15286 accept1(parser, PM_TOKEN_NEWLINE);
15287
15288 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
15289 do {
15290 switch (parser->current.type) {
15291 case PM_TOKEN_CONSTANT:
15292 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
15293 parser_lex(parser);
15294 break;
15295 case PM_TOKEN_INSTANCE_VARIABLE:
15296 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
15297 parser_lex(parser);
15298 break;
15299 case PM_TOKEN_GLOBAL_VARIABLE:
15300 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
15301 parser_lex(parser);
15302 break;
15303 case PM_TOKEN_CLASS_VARIABLE:
15304 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
15305 parser_lex(parser);
15306 break;
15307 default:
15308 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
15309 break;
15310 }
15311
15312 bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
15313 pm_parser_local_add_token(parser, &parser->previous, 1);
15314
15315 pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
15316 if (repeated) pm_node_flag_set_repeated_parameter((pm_node_t *) local);
15317
15318 pm_block_parameters_node_append_local(block_parameters, local);
15319 } while (accept1(parser, PM_TOKEN_COMMA));
15320 }
15321 }
15322
15323 return block_parameters;
15324}
15325
15330static bool
15331outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
15332 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15333 if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
15334 }
15335
15336 return false;
15337}
15338
15344static const char * const pm_numbered_parameter_names[] = {
15345 "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
15346};
15347
15353static pm_node_t *
15354parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
15355 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
15356
15357 // If we have ordinary parameters, then we will return them as the set of
15358 // parameters.
15359 if (parameters != NULL) {
15360 // If we also have implicit parameters, then this is an error.
15361 if (implicit_parameters->size > 0) {
15362 pm_node_t *node = implicit_parameters->nodes[0];
15363
15364 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
15365 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
15366 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
15367 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
15368 } else {
15369 assert(false && "unreachable");
15370 }
15371 }
15372
15373 return parameters;
15374 }
15375
15376 // If we don't have any implicit parameters, then the set of parameters is
15377 // NULL.
15378 if (implicit_parameters->size == 0) {
15379 return NULL;
15380 }
15381
15382 // If we don't have ordinary parameters, then we now must validate our set
15383 // of implicit parameters. We can only have numbered parameters or it, but
15384 // they cannot be mixed.
15385 uint8_t numbered_parameter = 0;
15386 bool it_parameter = false;
15387
15388 for (size_t index = 0; index < implicit_parameters->size; index++) {
15389 pm_node_t *node = implicit_parameters->nodes[index];
15390
15391 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
15392 if (it_parameter) {
15393 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
15394 } else if (outer_scope_using_numbered_parameters_p(parser)) {
15395 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
15396 } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
15397 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
15398 } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
15399 numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
15400 } else {
15401 assert(false && "unreachable");
15402 }
15403 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
15404 if (numbered_parameter > 0) {
15405 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
15406 } else {
15407 it_parameter = true;
15408 }
15409 }
15410 }
15411
15412 if (numbered_parameter > 0) {
15413 // Go through the parent scopes and mark them as being disallowed from
15414 // using numbered parameters because this inner scope is using them.
15415 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15416 scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
15417 }
15418
15419 const pm_location_t location = { .start = opening->start, .end = closing->end };
15420 return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, numbered_parameter);
15421 }
15422
15423 if (it_parameter) {
15424 return (pm_node_t *) pm_it_parameters_node_create(parser, opening, closing);
15425 }
15426
15427 return NULL;
15428}
15429
15433static pm_block_node_t *
15434parse_block(pm_parser_t *parser, uint16_t depth) {
15435 pm_token_t opening = parser->previous;
15436 accept1(parser, PM_TOKEN_NEWLINE);
15437
15438 pm_accepts_block_stack_push(parser, true);
15439 pm_parser_scope_push(parser, false);
15440
15441 pm_block_parameters_node_t *block_parameters = NULL;
15442
15443 if (accept1(parser, PM_TOKEN_PIPE)) {
15444 pm_token_t block_parameters_opening = parser->previous;
15445 if (match1(parser, PM_TOKEN_PIPE)) {
15446 block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
15447 parser->command_start = true;
15448 parser_lex(parser);
15449 } else {
15450 block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false, true, (uint16_t) (depth + 1));
15451 accept1(parser, PM_TOKEN_NEWLINE);
15452 parser->command_start = true;
15453 expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
15454 }
15455
15456 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
15457 }
15458
15459 accept1(parser, PM_TOKEN_NEWLINE);
15460 pm_node_t *statements = NULL;
15461
15462 if (opening.type == PM_TOKEN_BRACE_LEFT) {
15463 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
15464 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1));
15465 }
15466
15467 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE);
15468 } else {
15469 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15470 if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE)) {
15471 pm_accepts_block_stack_push(parser, true);
15472 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1));
15473 pm_accepts_block_stack_pop(parser);
15474 }
15475
15476 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
15477 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
15478 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1));
15479 }
15480 }
15481
15482 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END);
15483 }
15484
15485 pm_constant_id_list_t locals;
15486 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
15487 pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &opening, &parser->previous);
15488
15489 pm_parser_scope_pop(parser);
15490 pm_accepts_block_stack_pop(parser);
15491
15492 return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
15493}
15494
15500static bool
15501parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call, uint16_t depth) {
15502 bool found = false;
15503
15504 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
15505 found |= true;
15506 arguments->opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15507
15508 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15509 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15510 } else {
15511 pm_accepts_block_stack_push(parser, true);
15512 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1));
15513
15514 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15515 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
15516 parser->previous.start = parser->previous.end;
15517 parser->previous.type = PM_TOKEN_MISSING;
15518 }
15519
15520 pm_accepts_block_stack_pop(parser);
15521 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15522 }
15523 } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
15524 found |= true;
15525 pm_accepts_block_stack_push(parser, false);
15526
15527 // If we get here, then the subsequent token cannot be used as an infix
15528 // operator. In this case we assume the subsequent token is part of an
15529 // argument to this method call.
15530 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, (uint16_t) (depth + 1));
15531
15532 // If we have done with the arguments and still not consumed the comma,
15533 // then we have a trailing comma where we need to check whether it is
15534 // allowed or not.
15535 if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
15536 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type));
15537 }
15538
15539 pm_accepts_block_stack_pop(parser);
15540 }
15541
15542 // If we're at the end of the arguments, we can now check if there is a block
15543 // node that starts with a {. If there is, then we can parse it and add it to
15544 // the arguments.
15545 if (accepts_block) {
15546 pm_block_node_t *block = NULL;
15547
15548 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
15549 found |= true;
15550 block = parse_block(parser, (uint16_t) (depth + 1));
15551 pm_arguments_validate_block(parser, arguments, block);
15552 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
15553 found |= true;
15554 block = parse_block(parser, (uint16_t) (depth + 1));
15555 }
15556
15557 if (block != NULL) {
15558 if (arguments->block == NULL && !arguments->has_forwarding) {
15559 arguments->block = (pm_node_t *) block;
15560 } else {
15561 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
15562
15563 if (arguments->block != NULL) {
15564 if (arguments->arguments == NULL) {
15565 arguments->arguments = pm_arguments_node_create(parser);
15566 }
15567 pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
15568 }
15569 arguments->block = (pm_node_t *) block;
15570 }
15571 }
15572 }
15573
15574 return found;
15575}
15576
15581static void
15582parse_return(pm_parser_t *parser, pm_node_t *node) {
15583 bool in_sclass = false;
15584 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15585 switch (context_node->context) {
15589 case PM_CONTEXT_BEGIN:
15590 case PM_CONTEXT_CASE_IN:
15593 case PM_CONTEXT_DEFINED:
15594 case PM_CONTEXT_ELSE:
15595 case PM_CONTEXT_ELSIF:
15596 case PM_CONTEXT_EMBEXPR:
15598 case PM_CONTEXT_FOR:
15599 case PM_CONTEXT_IF:
15601 case PM_CONTEXT_MAIN:
15603 case PM_CONTEXT_PARENS:
15604 case PM_CONTEXT_POSTEXE:
15606 case PM_CONTEXT_PREEXE:
15608 case PM_CONTEXT_TERNARY:
15609 case PM_CONTEXT_UNLESS:
15610 case PM_CONTEXT_UNTIL:
15611 case PM_CONTEXT_WHILE:
15612 // Keep iterating up the lists of contexts, because returns can
15613 // see through these.
15614 continue;
15618 case PM_CONTEXT_SCLASS:
15619 in_sclass = true;
15620 continue;
15624 case PM_CONTEXT_CLASS:
15628 case PM_CONTEXT_MODULE:
15629 // These contexts are invalid for a return.
15630 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15631 return;
15641 case PM_CONTEXT_DEF:
15647 // These contexts are valid for a return, and we should not
15648 // continue to loop.
15649 return;
15650 case PM_CONTEXT_NONE:
15651 // This case should never happen.
15652 assert(false && "unreachable");
15653 break;
15654 }
15655 }
15656 if (in_sclass) {
15657 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15658 }
15659}
15660
15665static void
15666parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
15667 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15668 switch (context_node->context) {
15674 case PM_CONTEXT_DEFINED:
15675 case PM_CONTEXT_FOR:
15682 case PM_CONTEXT_POSTEXE:
15683 case PM_CONTEXT_UNTIL:
15684 case PM_CONTEXT_WHILE:
15685 // These are the good cases. We're allowed to have a block exit
15686 // in these contexts.
15687 return;
15688 case PM_CONTEXT_DEF:
15693 case PM_CONTEXT_MAIN:
15694 case PM_CONTEXT_PREEXE:
15695 case PM_CONTEXT_SCLASS:
15699 // These are the bad cases. We're not allowed to have a block
15700 // exit in these contexts.
15701 //
15702 // If we get here, then we're about to mark this block exit
15703 // as invalid. However, it could later _become_ valid if we
15704 // find a trailing while/until on the expression. In this
15705 // case instead of adding the error here, we'll add the
15706 // block exit to the list of exits for the expression, and
15707 // the node parsing will handle validating it instead.
15708 assert(parser->current_block_exits != NULL);
15709 pm_node_list_append(parser->current_block_exits, node);
15710 return;
15714 case PM_CONTEXT_BEGIN:
15715 case PM_CONTEXT_CASE_IN:
15720 case PM_CONTEXT_CLASS:
15722 case PM_CONTEXT_ELSE:
15723 case PM_CONTEXT_ELSIF:
15724 case PM_CONTEXT_EMBEXPR:
15726 case PM_CONTEXT_IF:
15730 case PM_CONTEXT_MODULE:
15732 case PM_CONTEXT_PARENS:
15735 case PM_CONTEXT_TERNARY:
15736 case PM_CONTEXT_UNLESS:
15737 // In these contexts we should continue walking up the list of
15738 // contexts.
15739 break;
15740 case PM_CONTEXT_NONE:
15741 // This case should never happen.
15742 assert(false && "unreachable");
15743 break;
15744 }
15745 }
15746}
15747
15752static pm_node_list_t *
15753push_block_exits(pm_parser_t *parser, pm_node_list_t *current_block_exits) {
15754 pm_node_list_t *previous_block_exits = parser->current_block_exits;
15755 parser->current_block_exits = current_block_exits;
15756 return previous_block_exits;
15757}
15758
15764static void
15765flush_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15766 pm_node_t *block_exit;
15767 PM_NODE_LIST_FOREACH(parser->current_block_exits, index, block_exit) {
15768 const char *type;
15769
15770 switch (PM_NODE_TYPE(block_exit)) {
15771 case PM_BREAK_NODE: type = "break"; break;
15772 case PM_NEXT_NODE: type = "next"; break;
15773 case PM_REDO_NODE: type = "redo"; break;
15774 default: assert(false && "unreachable"); type = ""; break;
15775 }
15776
15777 PM_PARSER_ERR_NODE_FORMAT(parser, block_exit, PM_ERR_INVALID_BLOCK_EXIT, type);
15778 }
15779
15780 parser->current_block_exits = previous_block_exits;
15781}
15782
15787static void
15788pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15789 if (match2(parser, PM_TOKEN_KEYWORD_WHILE_MODIFIER, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) {
15790 // If we matched a trailing while/until, then all of the block exits in
15791 // the contained list are valid. In this case we do not need to do
15792 // anything.
15793 parser->current_block_exits = previous_block_exits;
15794 } else if (previous_block_exits != NULL) {
15795 // If we did not matching a trailing while/until, then all of the block
15796 // exits contained in the list are invalid for this specific context.
15797 // However, they could still become valid in a higher level context if
15798 // there is another list above this one. In this case we'll push all of
15799 // the block exits up to the previous list.
15800 pm_node_list_concat(previous_block_exits, parser->current_block_exits);
15801 parser->current_block_exits = previous_block_exits;
15802 } else {
15803 // If we did not match a trailing while/until and this was the last
15804 // chance to do so, then all of the block exits in the list are invalid
15805 // and we need to add an error for each of them.
15806 flush_block_exits(parser, previous_block_exits);
15807 }
15808}
15809
15810static inline pm_node_t *
15811parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
15812 context_push(parser, PM_CONTEXT_PREDICATE);
15813 pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
15814 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, error_id, (uint16_t) (depth + 1));
15815
15816 // Predicates are closed by a term, a "then", or a term and then a "then".
15817 bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15818
15819 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15820 predicate_closed = true;
15821 *then_keyword = parser->previous;
15822 }
15823
15824 if (!predicate_closed) {
15825 pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
15826 }
15827
15828 context_pop(parser);
15829 return predicate;
15830}
15831
15832static inline pm_node_t *
15833parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
15834 pm_node_list_t current_block_exits = { 0 };
15835 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
15836
15837 pm_token_t keyword = parser->previous;
15838 pm_token_t then_keyword = not_provided(parser);
15839
15840 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
15841 pm_statements_node_t *statements = NULL;
15842
15843 if (!match3(parser, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
15844 pm_accepts_block_stack_push(parser, true);
15845 statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15846 pm_accepts_block_stack_pop(parser);
15847 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15848 }
15849
15850 pm_token_t end_keyword = not_provided(parser);
15851 pm_node_t *parent = NULL;
15852
15853 switch (context) {
15854 case PM_CONTEXT_IF:
15855 parent = (pm_node_t *) pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
15856 break;
15857 case PM_CONTEXT_UNLESS:
15858 parent = (pm_node_t *) pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements);
15859 break;
15860 default:
15861 assert(false && "unreachable");
15862 break;
15863 }
15864
15865 pm_node_t *current = parent;
15866
15867 // Parse any number of elsif clauses. This will form a linked list of if
15868 // nodes pointing to each other from the top.
15869 if (context == PM_CONTEXT_IF) {
15870 while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
15871 if (parser_end_of_line_p(parser)) {
15872 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
15873 }
15874
15875 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15876 pm_token_t elsif_keyword = parser->current;
15877 parser_lex(parser);
15878
15879 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
15880 pm_accepts_block_stack_push(parser, true);
15881
15882 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF, (uint16_t) (depth + 1));
15883 pm_accepts_block_stack_pop(parser);
15884 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15885
15886 pm_node_t *elsif = (pm_node_t *) pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
15887 ((pm_if_node_t *) current)->subsequent = elsif;
15888 current = elsif;
15889 }
15890 }
15891
15892 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15893 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15894 opening_newline_index = token_newline_index(parser);
15895
15896 parser_lex(parser);
15897 pm_token_t else_keyword = parser->previous;
15898
15899 pm_accepts_block_stack_push(parser, true);
15900 pm_statements_node_t *else_statements = parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1));
15901 pm_accepts_block_stack_pop(parser);
15902
15903 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15904 parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
15905 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE);
15906
15907 pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
15908
15909 switch (context) {
15910 case PM_CONTEXT_IF:
15911 ((pm_if_node_t *) current)->subsequent = (pm_node_t *) else_node;
15912 break;
15913 case PM_CONTEXT_UNLESS:
15914 ((pm_unless_node_t *) parent)->else_clause = else_node;
15915 break;
15916 default:
15917 assert(false && "unreachable");
15918 break;
15919 }
15920 } else {
15921 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
15922 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM);
15923 }
15924
15925 // Set the appropriate end location for all of the nodes in the subtree.
15926 switch (context) {
15927 case PM_CONTEXT_IF: {
15928 pm_node_t *current = parent;
15929 bool recursing = true;
15930
15931 while (recursing) {
15932 switch (PM_NODE_TYPE(current)) {
15933 case PM_IF_NODE:
15934 pm_if_node_end_keyword_loc_set((pm_if_node_t *) current, &parser->previous);
15935 current = ((pm_if_node_t *) current)->subsequent;
15936 recursing = current != NULL;
15937 break;
15938 case PM_ELSE_NODE:
15939 pm_else_node_end_keyword_loc_set((pm_else_node_t *) current, &parser->previous);
15940 recursing = false;
15941 break;
15942 default: {
15943 recursing = false;
15944 break;
15945 }
15946 }
15947 }
15948 break;
15949 }
15950 case PM_CONTEXT_UNLESS:
15951 pm_unless_node_end_keyword_loc_set((pm_unless_node_t *) parent, &parser->previous);
15952 break;
15953 default:
15954 assert(false && "unreachable");
15955 break;
15956 }
15957
15958 pop_block_exits(parser, previous_block_exits);
15959 pm_node_list_free(&current_block_exits);
15960
15961 return parent;
15962}
15963
15968#define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15969 case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
15970 case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
15971 case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
15972 case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
15973 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
15974 case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
15975 case PM_TOKEN_KEYWORD_OR: case PM_TOKEN_KEYWORD_REDO: case PM_TOKEN_KEYWORD_RESCUE: case PM_TOKEN_KEYWORD_RETRY: \
15976 case PM_TOKEN_KEYWORD_RETURN: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_SUPER: case PM_TOKEN_KEYWORD_THEN: \
15977 case PM_TOKEN_KEYWORD_TRUE: case PM_TOKEN_KEYWORD_UNDEF: case PM_TOKEN_KEYWORD_UNLESS: case PM_TOKEN_KEYWORD_UNTIL: \
15978 case PM_TOKEN_KEYWORD_WHEN: case PM_TOKEN_KEYWORD_WHILE: case PM_TOKEN_KEYWORD_YIELD
15979
15984#define PM_CASE_OPERATOR PM_TOKEN_AMPERSAND: case PM_TOKEN_BACKTICK: case PM_TOKEN_BANG_EQUAL: \
15985 case PM_TOKEN_BANG_TILDE: case PM_TOKEN_BANG: case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \
15986 case PM_TOKEN_BRACKET_LEFT_RIGHT: case PM_TOKEN_CARET: case PM_TOKEN_EQUAL_EQUAL_EQUAL: case PM_TOKEN_EQUAL_EQUAL: \
15987 case PM_TOKEN_EQUAL_TILDE: case PM_TOKEN_GREATER_EQUAL: case PM_TOKEN_GREATER_GREATER: case PM_TOKEN_GREATER: \
15988 case PM_TOKEN_LESS_EQUAL_GREATER: case PM_TOKEN_LESS_EQUAL: case PM_TOKEN_LESS_LESS: case PM_TOKEN_LESS: \
15989 case PM_TOKEN_MINUS: case PM_TOKEN_PERCENT: case PM_TOKEN_PIPE: case PM_TOKEN_PLUS: case PM_TOKEN_SLASH: \
15990 case PM_TOKEN_STAR_STAR: case PM_TOKEN_STAR: case PM_TOKEN_TILDE: case PM_TOKEN_UAMPERSAND: case PM_TOKEN_UMINUS: \
15991 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_UPLUS: case PM_TOKEN_USTAR: case PM_TOKEN_USTAR_STAR
15992
15998#define PM_CASE_PRIMITIVE PM_TOKEN_INTEGER: case PM_TOKEN_INTEGER_IMAGINARY: case PM_TOKEN_INTEGER_RATIONAL: \
15999 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: case PM_TOKEN_FLOAT: case PM_TOKEN_FLOAT_IMAGINARY: \
16000 case PM_TOKEN_FLOAT_RATIONAL: case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: case PM_TOKEN_SYMBOL_BEGIN: \
16001 case PM_TOKEN_REGEXP_BEGIN: case PM_TOKEN_BACKTICK: case PM_TOKEN_PERCENT_LOWER_X: case PM_TOKEN_PERCENT_LOWER_I: \
16002 case PM_TOKEN_PERCENT_LOWER_W: case PM_TOKEN_PERCENT_UPPER_I: case PM_TOKEN_PERCENT_UPPER_W: \
16003 case PM_TOKEN_STRING_BEGIN: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_TRUE: \
16004 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
16005 case PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_MINUS_GREATER: case PM_TOKEN_HEREDOC_START: \
16006 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_CHARACTER_LITERAL
16007
16012#define PM_CASE_PARAMETER PM_TOKEN_UAMPERSAND: case PM_TOKEN_AMPERSAND: case PM_TOKEN_UDOT_DOT_DOT: \
16013 case PM_TOKEN_IDENTIFIER: case PM_TOKEN_LABEL: case PM_TOKEN_USTAR: case PM_TOKEN_STAR: case PM_TOKEN_STAR_STAR: \
16014 case PM_TOKEN_USTAR_STAR: case PM_TOKEN_CONSTANT: case PM_TOKEN_INSTANCE_VARIABLE: case PM_TOKEN_GLOBAL_VARIABLE: \
16015 case PM_TOKEN_CLASS_VARIABLE
16016
16021#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
16022 case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
16023 case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
16024 case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
16025
16026// Assert here that the flags are the same so that we can safely switch the type
16027// of the node without having to move the flags.
16028PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING), "Expected the flags to match.");
16029
16034static inline pm_node_flags_t
16035parse_unescaped_encoding(const pm_parser_t *parser) {
16036 if (parser->explicit_encoding != NULL) {
16038 // If the there's an explicit encoding and it's using a UTF-8 escape
16039 // sequence, then mark the string as UTF-8.
16040 return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
16041 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
16042 // If there's a non-UTF-8 escape sequence being used, then the
16043 // string uses the source encoding, unless the source is marked as
16044 // US-ASCII. In that case the string is forced as ASCII-8BIT in
16045 // order to keep the string valid.
16046 return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
16047 }
16048 }
16049 return 0;
16050}
16051
16056static pm_node_t *
16057parse_string_part(pm_parser_t *parser, uint16_t depth) {
16058 switch (parser->current.type) {
16059 // Here the lexer has returned to us plain string content. In this case
16060 // we'll create a string node that has no opening or closing and return that
16061 // as the part. These kinds of parts look like:
16062 //
16063 // "aaa #{bbb} #@ccc ddd"
16064 // ^^^^ ^ ^^^^
16065 case PM_TOKEN_STRING_CONTENT: {
16066 pm_token_t opening = not_provided(parser);
16067 pm_token_t closing = not_provided(parser);
16068
16069 pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
16070 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16071
16072 parser_lex(parser);
16073 return node;
16074 }
16075 // Here the lexer has returned the beginning of an embedded expression. In
16076 // that case we'll parse the inner statements and return that as the part.
16077 // These kinds of parts look like:
16078 //
16079 // "aaa #{bbb} #@ccc ddd"
16080 // ^^^^^^
16081 case PM_TOKEN_EMBEXPR_BEGIN: {
16082 // Ruby disallows seeing encoding around interpolation in strings,
16083 // even though it is known at parse time.
16084 parser->explicit_encoding = NULL;
16085
16086 pm_lex_state_t state = parser->lex_state;
16087 int brace_nesting = parser->brace_nesting;
16088
16089 parser->brace_nesting = 0;
16090 lex_state_set(parser, PM_LEX_STATE_BEG);
16091 parser_lex(parser);
16092
16093 pm_token_t opening = parser->previous;
16094 pm_statements_node_t *statements = NULL;
16095
16096 if (!match1(parser, PM_TOKEN_EMBEXPR_END)) {
16097 pm_accepts_block_stack_push(parser, true);
16098 statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
16099 pm_accepts_block_stack_pop(parser);
16100 }
16101
16102 parser->brace_nesting = brace_nesting;
16103 lex_state_set(parser, state);
16104
16105 expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
16106 pm_token_t closing = parser->previous;
16107
16108 // If this set of embedded statements only contains a single
16109 // statement, then Ruby does not consider it as a possible statement
16110 // that could emit a line event.
16111 if (statements != NULL && statements->body.size == 1) {
16112 pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
16113 }
16114
16115 return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing);
16116 }
16117
16118 // Here the lexer has returned the beginning of an embedded variable.
16119 // In that case we'll parse the variable and create an appropriate node
16120 // for it and then return that node. These kinds of parts look like:
16121 //
16122 // "aaa #{bbb} #@ccc ddd"
16123 // ^^^^^
16124 case PM_TOKEN_EMBVAR: {
16125 // Ruby disallows seeing encoding around interpolation in strings,
16126 // even though it is known at parse time.
16127 parser->explicit_encoding = NULL;
16128
16129 lex_state_set(parser, PM_LEX_STATE_BEG);
16130 parser_lex(parser);
16131
16132 pm_token_t operator = parser->previous;
16133 pm_node_t *variable;
16134
16135 switch (parser->current.type) {
16136 // In this case a back reference is being interpolated. We'll
16137 // create a global variable read node.
16138 case PM_TOKEN_BACK_REFERENCE:
16139 parser_lex(parser);
16140 variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
16141 break;
16142 // In this case an nth reference is being interpolated. We'll
16143 // create a global variable read node.
16144 case PM_TOKEN_NUMBERED_REFERENCE:
16145 parser_lex(parser);
16146 variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
16147 break;
16148 // In this case a global variable is being interpolated. We'll
16149 // create a global variable read node.
16150 case PM_TOKEN_GLOBAL_VARIABLE:
16151 parser_lex(parser);
16152 variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
16153 break;
16154 // In this case an instance variable is being interpolated.
16155 // We'll create an instance variable read node.
16156 case PM_TOKEN_INSTANCE_VARIABLE:
16157 parser_lex(parser);
16158 variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
16159 break;
16160 // In this case a class variable is being interpolated. We'll
16161 // create a class variable read node.
16162 case PM_TOKEN_CLASS_VARIABLE:
16163 parser_lex(parser);
16164 variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
16165 break;
16166 // We can hit here if we got an invalid token. In that case
16167 // we'll not attempt to lex this token and instead just return a
16168 // missing node.
16169 default:
16170 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
16171 variable = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16172 break;
16173 }
16174
16175 return (pm_node_t *) pm_embedded_variable_node_create(parser, &operator, variable);
16176 }
16177 default:
16178 parser_lex(parser);
16179 pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
16180 return NULL;
16181 }
16182}
16183
16189static const uint8_t *
16190parse_operator_symbol_name(const pm_token_t *name) {
16191 switch (name->type) {
16192 case PM_TOKEN_TILDE:
16193 case PM_TOKEN_BANG:
16194 if (name->end[-1] == '@') return name->end - 1;
16196 default:
16197 return name->end;
16198 }
16199}
16200
16201static pm_node_t *
16202parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
16203 pm_token_t closing = not_provided(parser);
16204 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
16205
16206 const uint8_t *end = parse_operator_symbol_name(&parser->current);
16207
16208 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16209 parser_lex(parser);
16210
16211 pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
16212 pm_node_flag_set((pm_node_t *) symbol, PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
16213
16214 return (pm_node_t *) symbol;
16215}
16216
16222static pm_node_t *
16223parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state, uint16_t depth) {
16224 const pm_token_t opening = parser->previous;
16225
16226 if (lex_mode->mode != PM_LEX_STRING) {
16227 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16228
16229 switch (parser->current.type) {
16230 case PM_CASE_OPERATOR:
16231 return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
16232 case PM_TOKEN_IDENTIFIER:
16233 case PM_TOKEN_CONSTANT:
16234 case PM_TOKEN_INSTANCE_VARIABLE:
16235 case PM_TOKEN_METHOD_NAME:
16236 case PM_TOKEN_CLASS_VARIABLE:
16237 case PM_TOKEN_GLOBAL_VARIABLE:
16238 case PM_TOKEN_NUMBERED_REFERENCE:
16239 case PM_TOKEN_BACK_REFERENCE:
16240 case PM_CASE_KEYWORD:
16241 parser_lex(parser);
16242 break;
16243 default:
16244 expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
16245 break;
16246 }
16247
16248 pm_token_t closing = not_provided(parser);
16249 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16250
16251 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16252 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16253
16254 return (pm_node_t *) symbol;
16255 }
16256
16257 if (lex_mode->as.string.interpolation) {
16258 // If we have the end of the symbol, then we can return an empty symbol.
16259 if (match1(parser, PM_TOKEN_STRING_END)) {
16260 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16261 parser_lex(parser);
16262
16263 pm_token_t content = not_provided(parser);
16264 pm_token_t closing = parser->previous;
16265 return (pm_node_t *) pm_symbol_node_create(parser, &opening, &content, &closing);
16266 }
16267
16268 // Now we can parse the first part of the symbol.
16269 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
16270
16271 // If we got a string part, then it's possible that we could transform
16272 // what looks like an interpolated symbol into a regular symbol.
16273 if (part && PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16274 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16275 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16276
16277 return (pm_node_t *) pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous);
16278 }
16279
16280 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
16281 if (part) pm_interpolated_symbol_node_append(symbol, part);
16282
16283 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16284 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16285 pm_interpolated_symbol_node_append(symbol, part);
16286 }
16287 }
16288
16289 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16290 if (match1(parser, PM_TOKEN_EOF)) {
16291 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16292 } else {
16293 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16294 }
16295
16296 pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
16297 return (pm_node_t *) symbol;
16298 }
16299
16300 pm_token_t content;
16301 pm_string_t unescaped;
16302
16303 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16304 content = parser->current;
16305 unescaped = parser->current_string;
16306 parser_lex(parser);
16307
16308 // If we have two string contents in a row, then the content of this
16309 // symbol is split because of heredoc contents. This looks like:
16310 //
16311 // <<A; :'a
16312 // A
16313 // b'
16314 //
16315 // In this case, the best way we have to represent this is as an
16316 // interpolated string node, so that's what we'll do here.
16317 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16318 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
16319 pm_token_t bounds = not_provided(parser);
16320
16321 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped);
16322 pm_interpolated_symbol_node_append(symbol, part);
16323
16324 part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string);
16325 pm_interpolated_symbol_node_append(symbol, part);
16326
16327 if (next_state != PM_LEX_STATE_NONE) {
16328 lex_state_set(parser, next_state);
16329 }
16330
16331 parser_lex(parser);
16332 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
16333
16334 pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
16335 return (pm_node_t *) symbol;
16336 }
16337 } else {
16338 content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
16339 pm_string_shared_init(&unescaped, content.start, content.end);
16340 }
16341
16342 if (next_state != PM_LEX_STATE_NONE) {
16343 lex_state_set(parser, next_state);
16344 }
16345
16346 if (match1(parser, PM_TOKEN_EOF)) {
16347 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
16348 } else {
16349 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
16350 }
16351
16352 return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false));
16353}
16354
16359static inline pm_node_t *
16360parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
16361 switch (parser->current.type) {
16362 case PM_CASE_OPERATOR: {
16363 const pm_token_t opening = not_provided(parser);
16364 return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
16365 }
16366 case PM_CASE_KEYWORD:
16367 case PM_TOKEN_CONSTANT:
16368 case PM_TOKEN_IDENTIFIER:
16369 case PM_TOKEN_METHOD_NAME: {
16370 parser_lex(parser);
16371
16372 pm_token_t opening = not_provided(parser);
16373 pm_token_t closing = not_provided(parser);
16374 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16375
16376 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16377 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16378
16379 return (pm_node_t *) symbol;
16380 }
16381 case PM_TOKEN_SYMBOL_BEGIN: {
16382 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16383 parser_lex(parser);
16384
16385 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16386 }
16387 default:
16388 pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
16389 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16390 }
16391}
16392
16399static inline pm_node_t *
16400parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
16401 switch (parser->current.type) {
16402 case PM_CASE_OPERATOR: {
16403 const pm_token_t opening = not_provided(parser);
16404 return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
16405 }
16406 case PM_CASE_KEYWORD:
16407 case PM_TOKEN_CONSTANT:
16408 case PM_TOKEN_IDENTIFIER:
16409 case PM_TOKEN_METHOD_NAME: {
16410 if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
16411 parser_lex(parser);
16412
16413 pm_token_t opening = not_provided(parser);
16414 pm_token_t closing = not_provided(parser);
16415 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16416
16417 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16418 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16419
16420 return (pm_node_t *) symbol;
16421 }
16422 case PM_TOKEN_SYMBOL_BEGIN: {
16423 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16424 parser_lex(parser);
16425
16426 return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16427 }
16428 case PM_TOKEN_BACK_REFERENCE:
16429 parser_lex(parser);
16430 return (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
16431 case PM_TOKEN_NUMBERED_REFERENCE:
16432 parser_lex(parser);
16433 return (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
16434 case PM_TOKEN_GLOBAL_VARIABLE:
16435 parser_lex(parser);
16436 return (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
16437 default:
16438 pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
16439 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16440 }
16441}
16442
16447static pm_node_t *
16448parse_variable(pm_parser_t *parser) {
16449 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
16450 int depth;
16451 bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
16452
16453 if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
16454 return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
16455 }
16456
16457 pm_scope_t *current_scope = parser->current_scope;
16458 if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
16459 if (is_numbered_param) {
16460 // When you use a numbered parameter, it implies the existence of
16461 // all of the locals that exist before it. For example, referencing
16462 // _2 means that _1 must exist. Therefore here we loop through all
16463 // of the possibilities and add them into the constant pool.
16464 uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
16465 for (uint8_t number = 1; number <= maximum; number++) {
16466 pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
16467 }
16468
16469 if (!match1(parser, PM_TOKEN_EQUAL)) {
16470 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
16471 }
16472
16473 pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
16474 pm_node_list_append(&current_scope->implicit_parameters, node);
16475
16476 return node;
16477 } else if ((parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
16478 pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous);
16479 pm_node_list_append(&current_scope->implicit_parameters, node);
16480
16481 return node;
16482 }
16483 }
16484
16485 return NULL;
16486}
16487
16491static pm_node_t *
16492parse_variable_call(pm_parser_t *parser) {
16493 pm_node_flags_t flags = 0;
16494
16495 if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
16496 pm_node_t *node = parse_variable(parser);
16497 if (node != NULL) return node;
16498 flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
16499 }
16500
16501 pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
16502 pm_node_flag_set((pm_node_t *)node, flags);
16503
16504 return (pm_node_t *) node;
16505}
16506
16512static inline pm_token_t
16513parse_method_definition_name(pm_parser_t *parser) {
16514 switch (parser->current.type) {
16515 case PM_CASE_KEYWORD:
16516 case PM_TOKEN_CONSTANT:
16517 case PM_TOKEN_METHOD_NAME:
16518 parser_lex(parser);
16519 return parser->previous;
16520 case PM_TOKEN_IDENTIFIER:
16521 pm_refute_numbered_parameter(parser, parser->current.start, parser->current.end);
16522 parser_lex(parser);
16523 return parser->previous;
16524 case PM_CASE_OPERATOR:
16525 lex_state_set(parser, PM_LEX_STATE_ENDFN);
16526 parser_lex(parser);
16527 return parser->previous;
16528 default:
16529 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type));
16530 return (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->current.start, .end = parser->current.end };
16531 }
16532}
16533
16534static void
16535parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
16536 // Get a reference to the string struct that is being held by the string
16537 // node. This is the value we're going to actually manipulate.
16538 pm_string_ensure_owned(string);
16539
16540 // Now get the bounds of the existing string. We'll use this as a
16541 // destination to move bytes into. We'll also use it for bounds checking
16542 // since we don't require that these strings be null terminated.
16543 size_t dest_length = pm_string_length(string);
16544 const uint8_t *source_cursor = (uint8_t *) string->source;
16545 const uint8_t *source_end = source_cursor + dest_length;
16546
16547 // We're going to move bytes backward in the string when we get leading
16548 // whitespace, so we'll maintain a pointer to the current position in the
16549 // string that we're writing to.
16550 size_t trimmed_whitespace = 0;
16551
16552 // While we haven't reached the amount of common whitespace that we need to
16553 // trim and we haven't reached the end of the string, we'll keep trimming
16554 // whitespace. Trimming in this context means skipping over these bytes such
16555 // that they aren't copied into the new string.
16556 while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
16557 if (*source_cursor == '\t') {
16558 trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
16559 if (trimmed_whitespace > common_whitespace) break;
16560 } else {
16561 trimmed_whitespace++;
16562 }
16563
16564 source_cursor++;
16565 dest_length--;
16566 }
16567
16568 memmove((uint8_t *) string->source, source_cursor, (size_t) (source_end - source_cursor));
16569 string->length = dest_length;
16570}
16571
16575static void
16576parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
16577 // The next node should be dedented if it's the first node in the list or if
16578 // it follows a string node.
16579 bool dedent_next = true;
16580
16581 // Iterate over all nodes, and trim whitespace accordingly. We're going to
16582 // keep around two indices: a read and a write. If we end up trimming all of
16583 // the whitespace from a node, then we'll drop it from the list entirely.
16584 size_t write_index = 0;
16585
16586 pm_node_t *node;
16587 PM_NODE_LIST_FOREACH(nodes, read_index, node) {
16588 // We're not manipulating child nodes that aren't strings. In this case
16589 // we'll skip past it and indicate that the subsequent node should not
16590 // be dedented.
16591 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) {
16592 nodes->nodes[write_index++] = node;
16593 dedent_next = false;
16594 continue;
16595 }
16596
16597 pm_string_node_t *string_node = ((pm_string_node_t *) node);
16598 if (dedent_next) {
16599 parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace);
16600 }
16601
16602 if (string_node->unescaped.length == 0) {
16603 pm_node_destroy(parser, node);
16604 } else {
16605 nodes->nodes[write_index++] = node;
16606 }
16607
16608 // We always dedent the next node if it follows a string node.
16609 dedent_next = true;
16610 }
16611
16612 nodes->size = write_index;
16613}
16614
16618static pm_token_t
16619parse_strings_empty_content(const uint8_t *location) {
16620 return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
16621}
16622
16626static inline pm_node_t *
16627parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
16628 assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
16629 bool concating = false;
16630
16631 while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16632 pm_node_t *node = NULL;
16633
16634 // Here we have found a string literal. We'll parse it and add it to
16635 // the list of strings.
16636 const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
16637 assert(lex_mode->mode == PM_LEX_STRING);
16638 bool lex_interpolation = lex_mode->as.string.interpolation;
16639 bool label_allowed = lex_mode->as.string.label_allowed && accepts_label;
16640
16641 pm_token_t opening = parser->current;
16642 parser_lex(parser);
16643
16644 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16645 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16646 // If we get here, then we have an end immediately after a
16647 // start. In that case we'll create an empty content token and
16648 // return an uninterpolated string.
16649 pm_token_t content = parse_strings_empty_content(parser->previous.start);
16650 pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
16651
16652 pm_string_shared_init(&string->unescaped, content.start, content.end);
16653 node = (pm_node_t *) string;
16654 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16655 // If we get here, then we have an end of a label immediately
16656 // after a start. In that case we'll create an empty symbol
16657 // node.
16658 pm_token_t content = parse_strings_empty_content(parser->previous.start);
16659 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
16660
16661 pm_string_shared_init(&symbol->unescaped, content.start, content.end);
16662 node = (pm_node_t *) symbol;
16663
16664 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16665 } else if (!lex_interpolation) {
16666 // If we don't accept interpolation then we expect the string to
16667 // start with a single string content node.
16668 pm_string_t unescaped;
16669 pm_token_t content;
16670
16671 if (match1(parser, PM_TOKEN_EOF)) {
16672 unescaped = PM_STRING_EMPTY;
16673 content = not_provided(parser);
16674 } else {
16675 unescaped = parser->current_string;
16676 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16677 content = parser->previous;
16678 }
16679
16680 // It is unfortunately possible to have multiple string content
16681 // nodes in a row in the case that there's heredoc content in
16682 // the middle of the string, like this cursed example:
16683 //
16684 // <<-END+'b
16685 // a
16686 // END
16687 // c'+'d'
16688 //
16689 // In that case we need to switch to an interpolated string to
16690 // be able to contain all of the parts.
16691 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16692 pm_node_list_t parts = { 0 };
16693
16694 pm_token_t delimiters = not_provided(parser);
16695 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
16696 pm_node_list_append(&parts, part);
16697
16698 do {
16699 part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
16700 pm_node_list_append(&parts, part);
16701 parser_lex(parser);
16702 } while (match1(parser, PM_TOKEN_STRING_CONTENT));
16703
16704 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16705 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16706
16707 pm_node_list_free(&parts);
16708 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16709 node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16710 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16711 } else if (match1(parser, PM_TOKEN_EOF)) {
16712 pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16713 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16714 } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16715 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16716 } else {
16717 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
16718 parser->previous.start = parser->previous.end;
16719 parser->previous.type = PM_TOKEN_MISSING;
16720 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16721 }
16722 } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16723 // In this case we've hit string content so we know the string
16724 // at least has something in it. We'll need to check if the
16725 // following token is the end (in which case we can return a
16726 // plain string) or if it's not then it has interpolation.
16727 pm_token_t content = parser->current;
16728 pm_string_t unescaped = parser->current_string;
16729 parser_lex(parser);
16730
16731 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16732 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16733 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16734
16735 // Kind of odd behavior, but basically if we have an
16736 // unterminated string and it ends in a newline, we back up one
16737 // character so that the error message is on the last line of
16738 // content in the string.
16739 if (!accept1(parser, PM_TOKEN_STRING_END)) {
16740 const uint8_t *location = parser->previous.end;
16741 if (location > parser->start && location[-1] == '\n') location--;
16742 pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
16743
16744 parser->previous.start = parser->previous.end;
16745 parser->previous.type = PM_TOKEN_MISSING;
16746 }
16747 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16748 node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16749 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16750 } else {
16751 // If we get here, then we have interpolation so we'll need
16752 // to create a string or symbol node with interpolation.
16753 pm_node_list_t parts = { 0 };
16754 pm_token_t string_opening = not_provided(parser);
16755 pm_token_t string_closing = not_provided(parser);
16756
16757 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
16758 pm_node_flag_set(part, parse_unescaped_encoding(parser));
16759 pm_node_list_append(&parts, part);
16760
16761 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16762 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16763 pm_node_list_append(&parts, part);
16764 }
16765 }
16766
16767 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16768 node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16769 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16770 } else if (match1(parser, PM_TOKEN_EOF)) {
16771 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16772 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16773 } else {
16774 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16775 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16776 }
16777
16778 pm_node_list_free(&parts);
16779 }
16780 } else {
16781 // If we get here, then the first part of the string is not plain
16782 // string content, in which case we need to parse the string as an
16783 // interpolated string.
16784 pm_node_list_t parts = { 0 };
16785 pm_node_t *part;
16786
16787 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16788 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16789 pm_node_list_append(&parts, part);
16790 }
16791 }
16792
16793 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16794 node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16795 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16796 } else if (match1(parser, PM_TOKEN_EOF)) {
16797 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16798 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16799 } else {
16800 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16801 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16802 }
16803
16804 pm_node_list_free(&parts);
16805 }
16806
16807 if (current == NULL) {
16808 // If the node we just parsed is a symbol node, then we can't
16809 // concatenate it with anything else, so we can now return that
16810 // node.
16811 if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
16812 return node;
16813 }
16814
16815 // If we don't already have a node, then it's fine and we can just
16816 // set the result to be the node we just parsed.
16817 current = node;
16818 } else {
16819 // Otherwise we need to check the type of the node we just parsed.
16820 // If it cannot be concatenated with the previous node, then we'll
16821 // need to add a syntax error.
16822 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
16823 pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16824 }
16825
16826 // If we haven't already created our container for concatenation,
16827 // we'll do that now.
16828 if (!concating) {
16829 if (!PM_NODE_TYPE_P(current, PM_STRING_NODE) && !PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
16830 pm_parser_err_node(parser, current, PM_ERR_STRING_CONCATENATION);
16831 }
16832
16833 concating = true;
16834 pm_token_t bounds = not_provided(parser);
16835
16836 pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
16837 pm_interpolated_string_node_append(container, current);
16838 current = (pm_node_t *) container;
16839 }
16840
16841 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
16842 }
16843 }
16844
16845 return current;
16846}
16847
16848#define PM_PARSE_PATTERN_SINGLE 0
16849#define PM_PARSE_PATTERN_TOP 1
16850#define PM_PARSE_PATTERN_MULTI 2
16851
16852static pm_node_t *
16853parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
16854
16860static void
16861parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
16862 // Skip this capture if it starts with an underscore.
16863 if (*location->start == '_') return;
16864
16865 if (pm_constant_id_list_includes(captures, capture)) {
16866 pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
16867 } else {
16868 pm_constant_id_list_append(captures, capture);
16869 }
16870}
16871
16875static pm_node_t *
16876parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node, uint16_t depth) {
16877 // Now, if there are any :: operators that follow, parse them as constant
16878 // path nodes.
16879 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
16880 pm_token_t delimiter = parser->previous;
16881 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16882 node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
16883 }
16884
16885 // If there is a [ or ( that follows, then this is part of a larger pattern
16886 // expression. We'll parse the inner pattern here, then modify the returned
16887 // inner pattern with our constant path attached.
16888 if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
16889 return node;
16890 }
16891
16892 pm_token_t opening;
16893 pm_token_t closing;
16894 pm_node_t *inner = NULL;
16895
16896 if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
16897 opening = parser->previous;
16898 accept1(parser, PM_TOKEN_NEWLINE);
16899
16900 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16901 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16902 accept1(parser, PM_TOKEN_NEWLINE);
16903 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
16904 }
16905
16906 closing = parser->previous;
16907 } else {
16908 parser_lex(parser);
16909 opening = parser->previous;
16910 accept1(parser, PM_TOKEN_NEWLINE);
16911
16912 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
16913 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16914 accept1(parser, PM_TOKEN_NEWLINE);
16915 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
16916 }
16917
16918 closing = parser->previous;
16919 }
16920
16921 if (!inner) {
16922 // If there was no inner pattern, then we have something like Foo() or
16923 // Foo[]. In that case we'll create an array pattern with no requireds.
16924 return (pm_node_t *) pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16925 }
16926
16927 // Now that we have the inner pattern, check to see if it's an array, find,
16928 // or hash pattern. If it is, then we'll attach our constant path to it if
16929 // it doesn't already have a constant. If it's not one of those node types
16930 // or it does have a constant, then we'll create an array pattern.
16931 switch (PM_NODE_TYPE(inner)) {
16932 case PM_ARRAY_PATTERN_NODE: {
16933 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16934
16935 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16936 pattern_node->base.location.start = node->location.start;
16937 pattern_node->base.location.end = closing.end;
16938
16939 pattern_node->constant = node;
16940 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16941 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16942
16943 return (pm_node_t *) pattern_node;
16944 }
16945
16946 break;
16947 }
16948 case PM_FIND_PATTERN_NODE: {
16949 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16950
16951 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16952 pattern_node->base.location.start = node->location.start;
16953 pattern_node->base.location.end = closing.end;
16954
16955 pattern_node->constant = node;
16956 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16957 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16958
16959 return (pm_node_t *) pattern_node;
16960 }
16961
16962 break;
16963 }
16964 case PM_HASH_PATTERN_NODE: {
16965 pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
16966
16967 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16968 pattern_node->base.location.start = node->location.start;
16969 pattern_node->base.location.end = closing.end;
16970
16971 pattern_node->constant = node;
16972 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16973 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16974
16975 return (pm_node_t *) pattern_node;
16976 }
16977
16978 break;
16979 }
16980 default:
16981 break;
16982 }
16983
16984 // If we got here, then we didn't return one of the inner patterns by
16985 // attaching its constant. In this case we'll create an array pattern and
16986 // attach our constant to it.
16987 pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16988 pm_array_pattern_node_requireds_append(pattern_node, inner);
16989 return (pm_node_t *) pattern_node;
16990}
16991
16995static pm_splat_node_t *
16996parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16997 assert(parser->previous.type == PM_TOKEN_USTAR);
16998 pm_token_t operator = parser->previous;
16999 pm_node_t *name = NULL;
17000
17001 // Rest patterns don't necessarily have a name associated with them. So we
17002 // will check for that here. If they do, then we'll add it to the local
17003 // table since this pattern will cause it to become a local variable.
17004 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
17005 pm_token_t identifier = parser->previous;
17006 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &identifier);
17007
17008 int depth;
17009 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17010 pm_parser_local_add(parser, constant_id, identifier.start, identifier.end, 0);
17011 }
17012
17013 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier));
17014 name = (pm_node_t *) pm_local_variable_target_node_create(
17015 parser,
17016 &PM_LOCATION_TOKEN_VALUE(&identifier),
17017 constant_id,
17018 (uint32_t) (depth == -1 ? 0 : depth)
17019 );
17020 }
17021
17022 // Finally we can return the created node.
17023 return pm_splat_node_create(parser, &operator, name);
17024}
17025
17029static pm_node_t *
17030parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
17031 assert(parser->current.type == PM_TOKEN_USTAR_STAR);
17032 parser_lex(parser);
17033
17034 pm_token_t operator = parser->previous;
17035 pm_node_t *value = NULL;
17036
17037 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
17038 return (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
17039 }
17040
17041 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
17042 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17043
17044 int depth;
17045 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17046 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17047 }
17048
17049 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17050 value = (pm_node_t *) pm_local_variable_target_node_create(
17051 parser,
17052 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17053 constant_id,
17054 (uint32_t) (depth == -1 ? 0 : depth)
17055 );
17056 }
17057
17058 return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
17059}
17060
17065static bool
17066pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
17067 ptrdiff_t length = end - start;
17068 if (length == 0) return false;
17069
17070 // First ensure that it starts with a valid identifier starting character.
17071 size_t width = char_is_identifier_start(parser, start, end - start);
17072 if (width == 0) return false;
17073
17074 // Next, ensure that it's not an uppercase character.
17075 if (parser->encoding_changed) {
17076 if (parser->encoding->isupper_char(start, length)) return false;
17077 } else {
17078 if (pm_encoding_utf_8_isupper_char(start, length)) return false;
17079 }
17080
17081 // Next, iterate through all of the bytes of the string to ensure that they
17082 // are all valid identifier characters.
17083 const uint8_t *cursor = start + width;
17084 while ((width = char_is_identifier(parser, cursor, end - cursor))) cursor += width;
17085 return cursor == end;
17086}
17087
17092static pm_node_t *
17093parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
17094 const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
17095
17096 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
17097 int depth = -1;
17098
17099 if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
17100 depth = pm_parser_local_depth_constant_id(parser, constant_id);
17101 } else {
17102 pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
17103
17104 if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
17105 PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
17106 }
17107 }
17108
17109 if (depth == -1) {
17110 pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0);
17111 }
17112
17113 parse_pattern_capture(parser, captures, constant_id, value_loc);
17114 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17115 parser,
17116 value_loc,
17117 constant_id,
17118 (uint32_t) (depth == -1 ? 0 : depth)
17119 );
17120
17121 return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target);
17122}
17123
17128static void
17129parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
17130 if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
17131 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
17132 }
17133}
17134
17139parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, uint16_t depth) {
17140 pm_node_list_t assocs = { 0 };
17141 pm_static_literals_t keys = { 0 };
17142 pm_node_t *rest = NULL;
17143
17144 switch (PM_NODE_TYPE(first_node)) {
17145 case PM_ASSOC_SPLAT_NODE:
17146 case PM_NO_KEYWORDS_PARAMETER_NODE:
17147 rest = first_node;
17148 break;
17149 case PM_SYMBOL_NODE: {
17150 if (pm_symbol_node_label_p(first_node)) {
17151 parse_pattern_hash_key(parser, &keys, first_node);
17152 pm_node_t *value;
17153
17154 if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
17155 // Otherwise, we will create an implicit local variable
17156 // target for the value.
17157 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
17158 } else {
17159 // Here we have a value for the first assoc in the list, so
17160 // we will parse it now.
17161 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
17162 }
17163
17164 pm_token_t operator = not_provided(parser);
17165 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
17166
17167 pm_node_list_append(&assocs, assoc);
17168 break;
17169 }
17170 }
17172 default: {
17173 // If we get anything else, then this is an error. For this we'll
17174 // create a missing node for the value and create an assoc node for
17175 // the first node in the list.
17176 pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
17177 pm_parser_err_node(parser, first_node, diag_id);
17178
17179 pm_token_t operator = not_provided(parser);
17180 pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
17181 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
17182
17183 pm_node_list_append(&assocs, assoc);
17184 break;
17185 }
17186 }
17187
17188 // If there are any other assocs, then we'll parse them now.
17189 while (accept1(parser, PM_TOKEN_COMMA)) {
17190 // Here we need to break to support trailing commas.
17191 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
17192 // Trailing commas are not allowed to follow a rest pattern.
17193 if (rest != NULL) {
17194 pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17195 }
17196
17197 break;
17198 }
17199
17200 if (match1(parser, PM_TOKEN_USTAR_STAR)) {
17201 pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures);
17202
17203 if (rest == NULL) {
17204 rest = assoc;
17205 } else {
17206 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17207 pm_node_list_append(&assocs, assoc);
17208 }
17209 } else {
17210 pm_node_t *key;
17211
17212 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
17213 key = parse_strings(parser, NULL, true, (uint16_t) (depth + 1));
17214
17215 if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
17216 pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
17217 } else if (!pm_symbol_node_label_p(key)) {
17218 pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
17219 }
17220 } else {
17221 expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
17222 key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17223 }
17224
17225 parse_pattern_hash_key(parser, &keys, key);
17226 pm_node_t *value = NULL;
17227
17228 if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
17229 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
17230 } else {
17231 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
17232 }
17233
17234 pm_token_t operator = not_provided(parser);
17235 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
17236
17237 if (rest != NULL) {
17238 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17239 }
17240
17241 pm_node_list_append(&assocs, assoc);
17242 }
17243 }
17244
17245 pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
17246 xfree(assocs.nodes);
17247
17248 pm_static_literals_free(&keys);
17249 return node;
17250}
17251
17255static pm_node_t *
17256parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id, uint16_t depth) {
17257 switch (parser->current.type) {
17258 case PM_TOKEN_IDENTIFIER:
17259 case PM_TOKEN_METHOD_NAME: {
17260 parser_lex(parser);
17261 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17262
17263 int depth;
17264 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17265 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17266 }
17267
17268 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17269 return (pm_node_t *) pm_local_variable_target_node_create(
17270 parser,
17271 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17272 constant_id,
17273 (uint32_t) (depth == -1 ? 0 : depth)
17274 );
17275 }
17276 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
17277 pm_token_t opening = parser->current;
17278 parser_lex(parser);
17279
17280 if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17281 // If we have an empty array pattern, then we'll just return a new
17282 // array pattern node.
17283 return (pm_node_t *) pm_array_pattern_node_empty_create(parser, &opening, &parser->previous);
17284 }
17285
17286 // Otherwise, we'll parse the inner pattern, then deal with it depending
17287 // on the type it returns.
17288 pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
17289
17290 accept1(parser, PM_TOKEN_NEWLINE);
17291 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
17292 pm_token_t closing = parser->previous;
17293
17294 switch (PM_NODE_TYPE(inner)) {
17295 case PM_ARRAY_PATTERN_NODE: {
17296 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
17297 if (pattern_node->opening_loc.start == NULL) {
17298 pattern_node->base.location.start = opening.start;
17299 pattern_node->base.location.end = closing.end;
17300
17301 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17302 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17303
17304 return (pm_node_t *) pattern_node;
17305 }
17306
17307 break;
17308 }
17309 case PM_FIND_PATTERN_NODE: {
17310 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
17311 if (pattern_node->opening_loc.start == NULL) {
17312 pattern_node->base.location.start = opening.start;
17313 pattern_node->base.location.end = closing.end;
17314
17315 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17316 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17317
17318 return (pm_node_t *) pattern_node;
17319 }
17320
17321 break;
17322 }
17323 default:
17324 break;
17325 }
17326
17327 pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
17328 pm_array_pattern_node_requireds_append(node, inner);
17329 return (pm_node_t *) node;
17330 }
17331 case PM_TOKEN_BRACE_LEFT: {
17332 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17333 parser->pattern_matching_newlines = false;
17334
17336 pm_token_t opening = parser->current;
17337 parser_lex(parser);
17338
17339 if (accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
17340 // If we have an empty hash pattern, then we'll just return a new hash
17341 // pattern node.
17342 node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
17343 } else {
17344 pm_node_t *first_node;
17345
17346 switch (parser->current.type) {
17347 case PM_TOKEN_LABEL:
17348 parser_lex(parser);
17349 first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17350 break;
17351 case PM_TOKEN_USTAR_STAR:
17352 first_node = parse_pattern_keyword_rest(parser, captures);
17353 break;
17354 case PM_TOKEN_STRING_BEGIN:
17355 first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
17356 break;
17357 default: {
17358 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
17359 parser_lex(parser);
17360
17361 first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
17362 break;
17363 }
17364 }
17365
17366 node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
17367
17368 accept1(parser, PM_TOKEN_NEWLINE);
17369 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
17370 pm_token_t closing = parser->previous;
17371
17372 node->base.location.start = opening.start;
17373 node->base.location.end = closing.end;
17374
17375 node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17376 node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17377 }
17378
17379 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17380 return (pm_node_t *) node;
17381 }
17382 case PM_TOKEN_UDOT_DOT:
17383 case PM_TOKEN_UDOT_DOT_DOT: {
17384 pm_token_t operator = parser->current;
17385 parser_lex(parser);
17386
17387 // Since we have a unary range operator, we need to parse the subsequent
17388 // expression as the right side of the range.
17389 switch (parser->current.type) {
17390 case PM_CASE_PRIMITIVE: {
17391 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17392 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
17393 }
17394 default: {
17395 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
17396 pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
17397 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
17398 }
17399 }
17400 }
17401 case PM_CASE_PRIMITIVE: {
17402 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, diag_id, (uint16_t) (depth + 1));
17403
17404 // If we found a label, we need to immediately return to the caller.
17405 if (pm_symbol_node_label_p(node)) return node;
17406
17407 // Call nodes (arithmetic operations) are not allowed in patterns
17408 if (PM_NODE_TYPE(node) == PM_CALL_NODE) {
17409 pm_parser_err_node(parser, node, diag_id);
17410 pm_missing_node_t *missing_node = pm_missing_node_create(parser, node->location.start, node->location.end);
17411 pm_node_destroy(parser, node);
17412 return (pm_node_t *) missing_node;
17413 }
17414
17415 // Now that we have a primitive, we need to check if it's part of a range.
17416 if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
17417 pm_token_t operator = parser->previous;
17418
17419 // Now that we have the operator, we need to check if this is followed
17420 // by another expression. If it is, then we will create a full range
17421 // node. Otherwise, we'll create an endless range.
17422 switch (parser->current.type) {
17423 case PM_CASE_PRIMITIVE: {
17424 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17425 return (pm_node_t *) pm_range_node_create(parser, node, &operator, right);
17426 }
17427 default:
17428 return (pm_node_t *) pm_range_node_create(parser, node, &operator, NULL);
17429 }
17430 }
17431
17432 return node;
17433 }
17434 case PM_TOKEN_CARET: {
17435 parser_lex(parser);
17436 pm_token_t operator = parser->previous;
17437
17438 // At this point we have a pin operator. We need to check the subsequent
17439 // expression to determine if it's a variable or an expression.
17440 switch (parser->current.type) {
17441 case PM_TOKEN_IDENTIFIER: {
17442 parser_lex(parser);
17443 pm_node_t *variable = (pm_node_t *) parse_variable(parser);
17444
17445 if (variable == NULL) {
17446 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
17447 variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
17448 }
17449
17450 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17451 }
17452 case PM_TOKEN_INSTANCE_VARIABLE: {
17453 parser_lex(parser);
17454 pm_node_t *variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
17455
17456 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17457 }
17458 case PM_TOKEN_CLASS_VARIABLE: {
17459 parser_lex(parser);
17460 pm_node_t *variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
17461
17462 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17463 }
17464 case PM_TOKEN_GLOBAL_VARIABLE: {
17465 parser_lex(parser);
17466 pm_node_t *variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
17467
17468 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17469 }
17470 case PM_TOKEN_NUMBERED_REFERENCE: {
17471 parser_lex(parser);
17472 pm_node_t *variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
17473
17474 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17475 }
17476 case PM_TOKEN_BACK_REFERENCE: {
17477 parser_lex(parser);
17478 pm_node_t *variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
17479
17480 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17481 }
17482 case PM_TOKEN_PARENTHESIS_LEFT: {
17483 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17484 parser->pattern_matching_newlines = false;
17485
17486 pm_token_t lparen = parser->current;
17487 parser_lex(parser);
17488
17489 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
17490 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17491
17492 accept1(parser, PM_TOKEN_NEWLINE);
17493 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17494 return (pm_node_t *) pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous);
17495 }
17496 default: {
17497 // If we get here, then we have a pin operator followed by something
17498 // not understood. We'll create a missing node and return that.
17499 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
17500 pm_node_t *variable = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
17501 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17502 }
17503 }
17504 }
17505 case PM_TOKEN_UCOLON_COLON: {
17506 pm_token_t delimiter = parser->current;
17507 parser_lex(parser);
17508
17509 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17510 pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
17511
17512 return parse_pattern_constant_path(parser, captures, (pm_node_t *) node, (uint16_t) (depth + 1));
17513 }
17514 case PM_TOKEN_CONSTANT: {
17515 pm_token_t constant = parser->current;
17516 parser_lex(parser);
17517
17518 pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
17519 return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
17520 }
17521 default:
17522 pm_parser_err_current(parser, diag_id);
17523 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
17524 }
17525}
17526
17531static pm_node_t *
17532parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
17533 pm_node_t *node = first_node;
17534
17535 while ((node == NULL) || accept1(parser, PM_TOKEN_PIPE)) {
17536 pm_token_t operator = parser->previous;
17537
17538 switch (parser->current.type) {
17539 case PM_TOKEN_IDENTIFIER:
17540 case PM_TOKEN_BRACKET_LEFT_ARRAY:
17541 case PM_TOKEN_BRACE_LEFT:
17542 case PM_TOKEN_CARET:
17543 case PM_TOKEN_CONSTANT:
17544 case PM_TOKEN_UCOLON_COLON:
17545 case PM_TOKEN_UDOT_DOT:
17546 case PM_TOKEN_UDOT_DOT_DOT:
17547 case PM_CASE_PRIMITIVE: {
17548 if (node == NULL) {
17549 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17550 } else {
17551 pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
17552 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17553 }
17554
17555 break;
17556 }
17557 case PM_TOKEN_PARENTHESIS_LEFT:
17558 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
17559 pm_token_t opening = parser->current;
17560 parser_lex(parser);
17561
17562 pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
17563 accept1(parser, PM_TOKEN_NEWLINE);
17564 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17565 pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0);
17566
17567 if (node == NULL) {
17568 node = right;
17569 } else {
17570 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17571 }
17572
17573 break;
17574 }
17575 default: {
17576 pm_parser_err_current(parser, diag_id);
17577 pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
17578
17579 if (node == NULL) {
17580 node = right;
17581 } else {
17582 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17583 }
17584
17585 break;
17586 }
17587 }
17588 }
17589
17590 // If we have an =>, then we are assigning this pattern to a variable.
17591 // In this case we should create an assignment node.
17592 while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
17593 pm_token_t operator = parser->previous;
17594 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
17595
17596 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17597 int depth;
17598
17599 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17600 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17601 }
17602
17603 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17604 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17605 parser,
17606 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17607 constant_id,
17608 (uint32_t) (depth == -1 ? 0 : depth)
17609 );
17610
17611 node = (pm_node_t *) pm_capture_pattern_node_create(parser, node, target, &operator);
17612 }
17613
17614 return node;
17615}
17616
17620static pm_node_t *
17621parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
17622 pm_node_t *node = NULL;
17623
17624 bool leading_rest = false;
17625 bool trailing_rest = false;
17626
17627 switch (parser->current.type) {
17628 case PM_TOKEN_LABEL: {
17629 parser_lex(parser);
17630 pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17631 node = (pm_node_t *) parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1));
17632
17633 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17634 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17635 }
17636
17637 return node;
17638 }
17639 case PM_TOKEN_USTAR_STAR: {
17640 node = parse_pattern_keyword_rest(parser, captures);
17641 node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17642
17643 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17644 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17645 }
17646
17647 return node;
17648 }
17649 case PM_TOKEN_STRING_BEGIN: {
17650 // We need special handling for string beginnings because they could
17651 // be dynamic symbols leading to hash patterns.
17652 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17653
17654 if (pm_symbol_node_label_p(node)) {
17655 node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17656
17657 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17658 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17659 }
17660
17661 return node;
17662 }
17663
17664 node = parse_pattern_primitives(parser, captures, node, diag_id, (uint16_t) (depth + 1));
17665 break;
17666 }
17667 case PM_TOKEN_USTAR: {
17668 if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
17669 parser_lex(parser);
17670 node = (pm_node_t *) parse_pattern_rest(parser, captures);
17671 leading_rest = true;
17672 break;
17673 }
17674 }
17676 default:
17677 node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
17678 break;
17679 }
17680
17681 // If we got a dynamic label symbol, then we need to treat it like the
17682 // beginning of a hash pattern.
17683 if (pm_symbol_node_label_p(node)) {
17684 return (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17685 }
17686
17687 if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
17688 // If we have a comma, then we are now parsing either an array pattern
17689 // or a find pattern. We need to parse all of the patterns, put them
17690 // into a big list, and then determine which type of node we have.
17691 pm_node_list_t nodes = { 0 };
17692 pm_node_list_append(&nodes, node);
17693
17694 // Gather up all of the patterns into the list.
17695 while (accept1(parser, PM_TOKEN_COMMA)) {
17696 // Break early here in case we have a trailing comma.
17697 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
17698 node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
17699 pm_node_list_append(&nodes, node);
17700 trailing_rest = true;
17701 break;
17702 }
17703
17704 if (accept1(parser, PM_TOKEN_USTAR)) {
17705 node = (pm_node_t *) parse_pattern_rest(parser, captures);
17706
17707 // If we have already parsed a splat pattern, then this is an
17708 // error. We will continue to parse the rest of the patterns,
17709 // but we will indicate it as an error.
17710 if (trailing_rest) {
17711 pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
17712 }
17713
17714 trailing_rest = true;
17715 } else {
17716 node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
17717 }
17718
17719 pm_node_list_append(&nodes, node);
17720 }
17721
17722 // If the first pattern and the last pattern are rest patterns, then we
17723 // will call this a find pattern, regardless of how many rest patterns
17724 // are in between because we know we already added the appropriate
17725 // errors. Otherwise we will create an array pattern.
17726 if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
17727 node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
17728
17729 if (nodes.size == 2) {
17730 pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
17731 }
17732 } else {
17733 node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
17734
17735 if (leading_rest && trailing_rest) {
17736 pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
17737 }
17738 }
17739
17740 xfree(nodes.nodes);
17741 } else if (leading_rest) {
17742 // Otherwise, if we parsed a single splat pattern, then we know we have
17743 // an array pattern, so we can go ahead and create that node.
17744 node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
17745 }
17746
17747 return node;
17748}
17749
17755static inline void
17756parse_negative_numeric(pm_node_t *node) {
17757 switch (PM_NODE_TYPE(node)) {
17758 case PM_INTEGER_NODE: {
17759 pm_integer_node_t *cast = (pm_integer_node_t *) node;
17760 cast->base.location.start--;
17761 cast->value.negative = true;
17762 break;
17763 }
17764 case PM_FLOAT_NODE: {
17765 pm_float_node_t *cast = (pm_float_node_t *) node;
17766 cast->base.location.start--;
17767 cast->value = -cast->value;
17768 break;
17769 }
17770 case PM_RATIONAL_NODE: {
17771 pm_rational_node_t *cast = (pm_rational_node_t *) node;
17772 cast->base.location.start--;
17773 cast->numerator.negative = true;
17774 break;
17775 }
17776 case PM_IMAGINARY_NODE:
17777 node->location.start--;
17778 parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
17779 break;
17780 default:
17781 assert(false && "unreachable");
17782 break;
17783 }
17784}
17785
17791static void
17792pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
17793 switch (diag_id) {
17794 case PM_ERR_HASH_KEY: {
17795 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
17796 break;
17797 }
17798 case PM_ERR_HASH_VALUE:
17799 case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17800 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17801 break;
17802 }
17803 case PM_ERR_UNARY_RECEIVER: {
17804 const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
17805 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
17806 break;
17807 }
17808 case PM_ERR_UNARY_DISALLOWED:
17809 case PM_ERR_EXPECT_ARGUMENT: {
17810 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17811 break;
17812 }
17813 default:
17814 pm_parser_err_previous(parser, diag_id);
17815 break;
17816 }
17817}
17818
17822static void
17823parse_retry(pm_parser_t *parser, const pm_node_t *node) {
17824#define CONTEXT_NONE 0
17825#define CONTEXT_THROUGH_ENSURE 1
17826#define CONTEXT_THROUGH_ELSE 2
17827
17828 pm_context_node_t *context_node = parser->current_context;
17829 int context = CONTEXT_NONE;
17830
17831 while (context_node != NULL) {
17832 switch (context_node->context) {
17840 case PM_CONTEXT_DEFINED:
17842 // These are the good cases. We're allowed to have a retry here.
17843 return;
17844 case PM_CONTEXT_CLASS:
17845 case PM_CONTEXT_DEF:
17847 case PM_CONTEXT_MAIN:
17848 case PM_CONTEXT_MODULE:
17849 case PM_CONTEXT_PREEXE:
17850 case PM_CONTEXT_SCLASS:
17851 // These are the bad cases. We're not allowed to have a retry in
17852 // these contexts.
17853 if (context == CONTEXT_NONE) {
17854 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE);
17855 } else if (context == CONTEXT_THROUGH_ENSURE) {
17856 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ENSURE);
17857 } else if (context == CONTEXT_THROUGH_ELSE) {
17858 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ELSE);
17859 }
17860 return;
17868 // These are also bad cases, but with a more specific error
17869 // message indicating the else.
17870 context = CONTEXT_THROUGH_ELSE;
17871 break;
17879 // These are also bad cases, but with a more specific error
17880 // message indicating the ensure.
17881 context = CONTEXT_THROUGH_ENSURE;
17882 break;
17883 case PM_CONTEXT_NONE:
17884 // This case should never happen.
17885 assert(false && "unreachable");
17886 break;
17887 case PM_CONTEXT_BEGIN:
17890 case PM_CONTEXT_CASE_IN:
17893 case PM_CONTEXT_ELSE:
17894 case PM_CONTEXT_ELSIF:
17895 case PM_CONTEXT_EMBEXPR:
17897 case PM_CONTEXT_FOR:
17898 case PM_CONTEXT_IF:
17903 case PM_CONTEXT_PARENS:
17904 case PM_CONTEXT_POSTEXE:
17906 case PM_CONTEXT_TERNARY:
17907 case PM_CONTEXT_UNLESS:
17908 case PM_CONTEXT_UNTIL:
17909 case PM_CONTEXT_WHILE:
17910 // In these contexts we should continue walking up the list of
17911 // contexts.
17912 break;
17913 }
17914
17915 context_node = context_node->prev;
17916 }
17917
17918#undef CONTEXT_NONE
17919#undef CONTEXT_ENSURE
17920#undef CONTEXT_ELSE
17921}
17922
17926static void
17927parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17928 pm_context_node_t *context_node = parser->current_context;
17929
17930 while (context_node != NULL) {
17931 switch (context_node->context) {
17932 case PM_CONTEXT_DEF:
17934 case PM_CONTEXT_DEFINED:
17938 // These are the good cases. We're allowed to have a block exit
17939 // in these contexts.
17940 return;
17941 case PM_CONTEXT_CLASS:
17945 case PM_CONTEXT_MAIN:
17946 case PM_CONTEXT_MODULE:
17950 case PM_CONTEXT_SCLASS:
17954 // These are the bad cases. We're not allowed to have a retry in
17955 // these contexts.
17956 pm_parser_err_node(parser, node, PM_ERR_INVALID_YIELD);
17957 return;
17958 case PM_CONTEXT_NONE:
17959 // This case should never happen.
17960 assert(false && "unreachable");
17961 break;
17962 case PM_CONTEXT_BEGIN:
17971 case PM_CONTEXT_CASE_IN:
17974 case PM_CONTEXT_ELSE:
17975 case PM_CONTEXT_ELSIF:
17976 case PM_CONTEXT_EMBEXPR:
17978 case PM_CONTEXT_FOR:
17979 case PM_CONTEXT_IF:
17987 case PM_CONTEXT_PARENS:
17988 case PM_CONTEXT_POSTEXE:
17990 case PM_CONTEXT_PREEXE:
17992 case PM_CONTEXT_TERNARY:
17993 case PM_CONTEXT_UNLESS:
17994 case PM_CONTEXT_UNTIL:
17995 case PM_CONTEXT_WHILE:
17996 // In these contexts we should continue walking up the list of
17997 // contexts.
17998 break;
17999 }
18000
18001 context_node = context_node->prev;
18002 }
18003}
18004
18009typedef struct {
18012
18014 const uint8_t *start;
18015
18017 const uint8_t *end;
18018
18027
18032static void
18033parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
18035 pm_location_t location;
18036
18037 if (callback_data->shared) {
18038 location = (pm_location_t) { .start = start, .end = end };
18039 } else {
18040 location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
18041 }
18042
18043 PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
18044}
18045
18049static void
18050parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
18051 const pm_string_t *unescaped = &node->unescaped;
18053 .parser = parser,
18054 .start = node->base.location.start,
18055 .end = node->base.location.end,
18056 .shared = unescaped->type == PM_STRING_SHARED
18057 };
18058
18059 pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED), NULL, NULL, parse_regular_expression_error, &error_data);
18060}
18061
18065static inline pm_node_t *
18066parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
18067 switch (parser->current.type) {
18068 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
18069 parser_lex(parser);
18070
18071 pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
18072 pm_accepts_block_stack_push(parser, true);
18073 bool parsed_bare_hash = false;
18074
18075 while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
18076 bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
18077
18078 // Handle the case where we don't have a comma and we have a
18079 // newline followed by a right bracket.
18080 if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18081 break;
18082 }
18083
18084 // Ensure that we have a comma between elements in the array.
18085 if (array->elements.size > 0) {
18086 if (accept1(parser, PM_TOKEN_COMMA)) {
18087 // If there was a comma but we also accepts a newline,
18088 // then this is a syntax error.
18089 if (accepted_newline) {
18090 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
18091 }
18092 } else {
18093 // If there was no comma, then we need to add a syntax
18094 // error.
18095 const uint8_t *location = parser->previous.end;
18096 PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
18097
18098 parser->previous.start = location;
18099 parser->previous.type = PM_TOKEN_MISSING;
18100 }
18101 }
18102
18103 // If we have a right bracket immediately following a comma,
18104 // this is allowed since it's a trailing comma. In this case we
18105 // can break out of the loop.
18106 if (match1(parser, PM_TOKEN_BRACKET_RIGHT)) break;
18107
18108 pm_node_t *element;
18109
18110 if (accept1(parser, PM_TOKEN_USTAR)) {
18111 pm_token_t operator = parser->previous;
18112 pm_node_t *expression = NULL;
18113
18114 if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
18115 pm_parser_scope_forwarding_positionals_check(parser, &operator);
18116 } else {
18117 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18118 }
18119
18120 element = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
18121 } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
18122 if (parsed_bare_hash) {
18123 pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
18124 }
18125
18126 element = (pm_node_t *) pm_keyword_hash_node_create(parser);
18127 pm_static_literals_t hash_keys = { 0 };
18128
18129 if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
18130 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
18131 }
18132
18133 pm_static_literals_free(&hash_keys);
18134 parsed_bare_hash = true;
18135 } else {
18136 element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
18137
18138 if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
18139 if (parsed_bare_hash) {
18140 pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
18141 }
18142
18143 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
18144 pm_static_literals_t hash_keys = { 0 };
18145 pm_hash_key_static_literals_add(parser, &hash_keys, element);
18146
18147 pm_token_t operator;
18148 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
18149 operator = parser->previous;
18150 } else {
18151 operator = not_provided(parser);
18152 }
18153
18154 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
18155 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, element, &operator, value);
18156 pm_keyword_hash_node_elements_append(hash, assoc);
18157
18158 element = (pm_node_t *) hash;
18159 if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18160 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
18161 }
18162
18163 pm_static_literals_free(&hash_keys);
18164 parsed_bare_hash = true;
18165 }
18166 }
18167
18168 pm_array_node_elements_append(array, element);
18169 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
18170 }
18171
18172 accept1(parser, PM_TOKEN_NEWLINE);
18173
18174 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18175 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
18176 parser->previous.start = parser->previous.end;
18177 parser->previous.type = PM_TOKEN_MISSING;
18178 }
18179
18180 pm_array_node_close_set(array, &parser->previous);
18181 pm_accepts_block_stack_pop(parser);
18182
18183 return (pm_node_t *) array;
18184 }
18185 case PM_TOKEN_PARENTHESIS_LEFT:
18186 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
18187 pm_token_t opening = parser->current;
18188 pm_node_flags_t flags = 0;
18189
18190 pm_node_list_t current_block_exits = { 0 };
18191 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18192
18193 parser_lex(parser);
18194 while (true) {
18195 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18196 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18197 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
18198 break;
18199 }
18200 }
18201
18202 // If this is the end of the file or we match a right parenthesis, then
18203 // we have an empty parentheses node, and we can immediately return.
18204 if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
18205 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18206
18207 pop_block_exits(parser, previous_block_exits);
18208 pm_node_list_free(&current_block_exits);
18209
18210 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, flags);
18211 }
18212
18213 // Otherwise, we're going to parse the first statement in the list
18214 // of statements within the parentheses.
18215 pm_accepts_block_stack_push(parser, true);
18216 context_push(parser, PM_CONTEXT_PARENS);
18217 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18218 context_pop(parser);
18219
18220 // Determine if this statement is followed by a terminator. In the
18221 // case of a single statement, this is fine. But in the case of
18222 // multiple statements it's required.
18223 bool terminator_found = false;
18224
18225 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18226 terminator_found = true;
18227 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18228 } else if (accept1(parser, PM_TOKEN_NEWLINE)) {
18229 terminator_found = true;
18230 }
18231
18232 if (terminator_found) {
18233 while (true) {
18234 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18235 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18236 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
18237 break;
18238 }
18239 }
18240 }
18241
18242 // If we hit a right parenthesis, then we're done parsing the
18243 // parentheses node, and we can check which kind of node we should
18244 // return.
18245 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18246 if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
18247 lex_state_set(parser, PM_LEX_STATE_ENDARG);
18248 }
18249
18250 parser_lex(parser);
18251 pm_accepts_block_stack_pop(parser);
18252
18253 pop_block_exits(parser, previous_block_exits);
18254 pm_node_list_free(&current_block_exits);
18255
18256 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18257 // If we have a single statement and are ending on a right
18258 // parenthesis, then we need to check if this is possibly a
18259 // multiple target node.
18260 pm_multi_target_node_t *multi_target;
18261
18262 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) {
18263 multi_target = (pm_multi_target_node_t *) statement;
18264 } else {
18265 multi_target = pm_multi_target_node_create(parser);
18266 pm_multi_target_node_targets_append(parser, multi_target, statement);
18267 }
18268
18269 pm_location_t lparen_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18270 pm_location_t rparen_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
18271
18272 multi_target->lparen_loc = lparen_loc;
18273 multi_target->rparen_loc = rparen_loc;
18274 multi_target->base.location.start = lparen_loc.start;
18275 multi_target->base.location.end = rparen_loc.end;
18276
18277 pm_node_t *result;
18278 if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
18279 result = parse_targets(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18280 accept1(parser, PM_TOKEN_NEWLINE);
18281 } else {
18282 result = (pm_node_t *) multi_target;
18283 }
18284
18285 if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
18286 // All set, this is explicitly allowed by the parent
18287 // context.
18288 } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
18289 // All set, we're inside a for loop and we're parsing
18290 // multiple targets.
18291 } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
18292 // Multi targets are not allowed when it's not a
18293 // statement level.
18294 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18295 } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
18296 // Multi targets must be followed by an equal sign in
18297 // order to be valid (or a right parenthesis if they are
18298 // nested).
18299 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18300 }
18301
18302 return result;
18303 }
18304
18305 // If we have a single statement and are ending on a right parenthesis
18306 // and we didn't return a multiple assignment node, then we can return a
18307 // regular parentheses node now.
18308 pm_statements_node_t *statements = pm_statements_node_create(parser);
18309 pm_statements_node_body_append(parser, statements, statement, true);
18310
18311 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous, flags);
18312 }
18313
18314 // If we have more than one statement in the set of parentheses,
18315 // then we are going to parse all of them as a list of statements.
18316 // We'll do that here.
18317 context_push(parser, PM_CONTEXT_PARENS);
18318 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18319
18320 pm_statements_node_t *statements = pm_statements_node_create(parser);
18321 pm_statements_node_body_append(parser, statements, statement, true);
18322
18323 // If we didn't find a terminator and we didn't find a right
18324 // parenthesis, then this is a syntax error.
18325 if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
18326 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
18327 }
18328
18329 // Parse each statement within the parentheses.
18330 while (true) {
18331 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18332 pm_statements_node_body_append(parser, statements, node, true);
18333
18334 // If we're recovering from a syntax error, then we need to stop
18335 // parsing the statements now.
18336 if (parser->recovering) {
18337 // If this is the level of context where the recovery has
18338 // happened, then we can mark the parser as done recovering.
18339 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
18340 break;
18341 }
18342
18343 // If we couldn't parse an expression at all, then we need to
18344 // bail out of the loop.
18345 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) break;
18346
18347 // If we successfully parsed a statement, then we are going to
18348 // need terminator to delimit them.
18349 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18350 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18351 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
18352 } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18353 break;
18354 } else if (!match1(parser, PM_TOKEN_EOF)) {
18355 // If we're at the end of the file, then we're going to add
18356 // an error after this for the ) anyway.
18357 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
18358 }
18359 }
18360
18361 context_pop(parser);
18362 pm_accepts_block_stack_pop(parser);
18363 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18364
18365 // When we're parsing multi targets, we allow them to be followed by
18366 // a right parenthesis if they are at the statement level. This is
18367 // only possible if they are the final statement in a parentheses.
18368 // We need to explicitly reject that here.
18369 {
18370 pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
18371
18372 if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18373 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
18374 pm_multi_target_node_targets_append(parser, multi_target, statement);
18375
18376 statement = (pm_node_t *) multi_target;
18377 statements->body.nodes[statements->body.size - 1] = statement;
18378 }
18379
18380 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
18381 const uint8_t *offset = statement->location.end;
18382 pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
18383 pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, offset, offset);
18384
18385 statement = (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value);
18386 statements->body.nodes[statements->body.size - 1] = statement;
18387
18388 pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
18389 }
18390 }
18391
18392 pop_block_exits(parser, previous_block_exits);
18393 pm_node_list_free(&current_block_exits);
18394
18395 pm_void_statements_check(parser, statements, true);
18396 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous, flags);
18397 }
18398 case PM_TOKEN_BRACE_LEFT: {
18399 // If we were passed a current_hash_keys via the parser, then that
18400 // means we're already parsing a hash and we want to share the set
18401 // of hash keys with this inner hash we're about to parse for the
18402 // sake of warnings. We'll set it to NULL after we grab it to make
18403 // sure subsequent expressions don't use it. Effectively this is a
18404 // way of getting around passing it to every call to
18405 // parse_expression.
18406 pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
18407 parser->current_hash_keys = NULL;
18408
18409 pm_accepts_block_stack_push(parser, true);
18410 parser_lex(parser);
18411
18412 pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
18413
18414 if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
18415 if (current_hash_keys != NULL) {
18416 parse_assocs(parser, current_hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
18417 } else {
18418 pm_static_literals_t hash_keys = { 0 };
18419 parse_assocs(parser, &hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
18420 pm_static_literals_free(&hash_keys);
18421 }
18422
18423 accept1(parser, PM_TOKEN_NEWLINE);
18424 }
18425
18426 pm_accepts_block_stack_pop(parser);
18427 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
18428 pm_hash_node_closing_loc_set(node, &parser->previous);
18429
18430 return (pm_node_t *) node;
18431 }
18432 case PM_TOKEN_CHARACTER_LITERAL: {
18433 parser_lex(parser);
18434
18435 pm_token_t opening = parser->previous;
18436 opening.type = PM_TOKEN_STRING_BEGIN;
18437 opening.end = opening.start + 1;
18438
18439 pm_token_t content = parser->previous;
18440 content.type = PM_TOKEN_STRING_CONTENT;
18441 content.start = content.start + 1;
18442
18443 pm_token_t closing = not_provided(parser);
18444 pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &closing);
18445 pm_node_flag_set(node, parse_unescaped_encoding(parser));
18446
18447 // Characters can be followed by strings in which case they are
18448 // automatically concatenated.
18449 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18450 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18451 }
18452
18453 return node;
18454 }
18455 case PM_TOKEN_CLASS_VARIABLE: {
18456 parser_lex(parser);
18457 pm_node_t *node = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
18458
18459 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18460 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18461 }
18462
18463 return node;
18464 }
18465 case PM_TOKEN_CONSTANT: {
18466 parser_lex(parser);
18467 pm_token_t constant = parser->previous;
18468
18469 // If a constant is immediately followed by parentheses, then this is in
18470 // fact a method call, not a constant read.
18471 if (
18472 match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
18473 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
18474 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
18475 match1(parser, PM_TOKEN_BRACE_LEFT)
18476 ) {
18477 pm_arguments_t arguments = { 0 };
18478 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18479 return (pm_node_t *) pm_call_node_fcall_create(parser, &constant, &arguments);
18480 }
18481
18482 pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
18483
18484 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18485 // If we get here, then we have a comma immediately following a
18486 // constant, so we're going to parse this as a multiple assignment.
18487 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18488 }
18489
18490 return node;
18491 }
18492 case PM_TOKEN_UCOLON_COLON: {
18493 parser_lex(parser);
18494 pm_token_t delimiter = parser->previous;
18495
18496 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
18497 pm_node_t *node = (pm_node_t *) pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
18498
18499 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18500 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18501 }
18502
18503 return node;
18504 }
18505 case PM_TOKEN_UDOT_DOT:
18506 case PM_TOKEN_UDOT_DOT_DOT: {
18507 pm_token_t operator = parser->current;
18508 parser_lex(parser);
18509
18510 pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
18511
18512 // Unary .. and ... are special because these are non-associative
18513 // operators that can also be unary operators. In this case we need
18514 // to explicitly reject code that has a .. or ... that follows this
18515 // expression.
18516 if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
18517 pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
18518 }
18519
18520 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
18521 }
18522 case PM_TOKEN_FLOAT:
18523 parser_lex(parser);
18524 return (pm_node_t *) pm_float_node_create(parser, &parser->previous);
18525 case PM_TOKEN_FLOAT_IMAGINARY:
18526 parser_lex(parser);
18527 return (pm_node_t *) pm_float_node_imaginary_create(parser, &parser->previous);
18528 case PM_TOKEN_FLOAT_RATIONAL:
18529 parser_lex(parser);
18530 return (pm_node_t *) pm_float_node_rational_create(parser, &parser->previous);
18531 case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
18532 parser_lex(parser);
18533 return (pm_node_t *) pm_float_node_rational_imaginary_create(parser, &parser->previous);
18534 case PM_TOKEN_NUMBERED_REFERENCE: {
18535 parser_lex(parser);
18536 pm_node_t *node = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
18537
18538 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18539 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18540 }
18541
18542 return node;
18543 }
18544 case PM_TOKEN_GLOBAL_VARIABLE: {
18545 parser_lex(parser);
18546 pm_node_t *node = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
18547
18548 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18549 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18550 }
18551
18552 return node;
18553 }
18554 case PM_TOKEN_BACK_REFERENCE: {
18555 parser_lex(parser);
18556 pm_node_t *node = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
18557
18558 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18559 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18560 }
18561
18562 return node;
18563 }
18564 case PM_TOKEN_IDENTIFIER:
18565 case PM_TOKEN_METHOD_NAME: {
18566 parser_lex(parser);
18567 pm_token_t identifier = parser->previous;
18568 pm_node_t *node = parse_variable_call(parser);
18569
18570 if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
18571 // If parse_variable_call returned with a call node, then we
18572 // know the identifier is not in the local table. In that case
18573 // we need to check if there are arguments following the
18574 // identifier.
18575 pm_call_node_t *call = (pm_call_node_t *) node;
18576 pm_arguments_t arguments = { 0 };
18577
18578 if (parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1))) {
18579 // Since we found arguments, we need to turn off the
18580 // variable call bit in the flags.
18581 pm_node_flag_unset((pm_node_t *)call, PM_CALL_NODE_FLAGS_VARIABLE_CALL);
18582
18583 call->opening_loc = arguments.opening_loc;
18584 call->arguments = arguments.arguments;
18585 call->closing_loc = arguments.closing_loc;
18586 call->block = arguments.block;
18587
18588 if (arguments.block != NULL) {
18589 call->base.location.end = arguments.block->location.end;
18590 } else if (arguments.closing_loc.start == NULL) {
18591 if (arguments.arguments != NULL) {
18592 call->base.location.end = arguments.arguments->base.location.end;
18593 } else {
18594 call->base.location.end = call->message_loc.end;
18595 }
18596 } else {
18597 call->base.location.end = arguments.closing_loc.end;
18598 }
18599 }
18600 } else {
18601 // Otherwise, we know the identifier is in the local table. This
18602 // can still be a method call if it is followed by arguments or
18603 // a block, so we need to check for that here.
18604 if (
18605 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
18606 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
18607 match1(parser, PM_TOKEN_BRACE_LEFT)
18608 ) {
18609 pm_arguments_t arguments = { 0 };
18610 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18611 pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
18612
18613 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
18614 // If we're about to convert an 'it' implicit local
18615 // variable read into a method call, we need to remove
18616 // it from the list of implicit local variables.
18617 parse_target_implicit_parameter(parser, node);
18618 } else {
18619 // Otherwise, we're about to convert a regular local
18620 // variable read into a method call, in which case we
18621 // need to indicate that this was not a read for the
18622 // purposes of warnings.
18623 assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
18624
18625 if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
18626 parse_target_implicit_parameter(parser, node);
18627 } else {
18629 pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
18630 }
18631 }
18632
18633 pm_node_destroy(parser, node);
18634 return (pm_node_t *) fcall;
18635 }
18636 }
18637
18638 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18639 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18640 }
18641
18642 return node;
18643 }
18644 case PM_TOKEN_HEREDOC_START: {
18645 // Here we have found a heredoc. We'll parse it and add it to the
18646 // list of strings.
18647 assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
18648 pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
18649
18650 size_t common_whitespace = (size_t) -1;
18651 parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
18652
18653 parser_lex(parser);
18654 pm_token_t opening = parser->previous;
18655
18656 pm_node_t *node;
18657 pm_node_t *part;
18658
18659 if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18660 // If we get here, then we have an empty heredoc. We'll create
18661 // an empty content token and return an empty string node.
18662 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18663 pm_token_t content = parse_strings_empty_content(parser->previous.start);
18664
18665 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18666 node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
18667 } else {
18668 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
18669 }
18670
18671 node->location.end = opening.end;
18672 } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
18673 // If we get here, then we tried to find something in the
18674 // heredoc but couldn't actually parse anything, so we'll just
18675 // return a missing node.
18676 //
18677 // parse_string_part handles its own errors, so there is no need
18678 // for us to add one here.
18679 node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
18680 } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18681 // If we get here, then the part that we parsed was plain string
18682 // content and we're at the end of the heredoc, so we can return
18683 // just a string node with the heredoc opening and closing as
18684 // its opening and closing.
18685 pm_node_flag_set(part, parse_unescaped_encoding(parser));
18686 pm_string_node_t *cast = (pm_string_node_t *) part;
18687
18688 cast->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18689 cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current);
18690 cast->base.location = cast->opening_loc;
18691
18692 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18693 assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
18694 cast->base.type = PM_X_STRING_NODE;
18695 }
18696
18697 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18698 parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
18699 }
18700
18701 node = (pm_node_t *) cast;
18702 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18703 } else {
18704 // If we get here, then we have multiple parts in the heredoc,
18705 // so we'll need to create an interpolated string node to hold
18706 // them all.
18707 pm_node_list_t parts = { 0 };
18708 pm_node_list_append(&parts, part);
18709
18710 while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18711 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
18712 pm_node_list_append(&parts, part);
18713 }
18714 }
18715
18716 // Now that we have all of the parts, create the correct type of
18717 // interpolated node.
18718 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18719 pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
18720 cast->parts = parts;
18721
18722 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18723 pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
18724
18725 cast->base.location = cast->opening_loc;
18726 node = (pm_node_t *) cast;
18727 } else {
18728 pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
18729 pm_node_list_free(&parts);
18730
18731 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18732 pm_interpolated_string_node_closing_set(cast, &parser->previous);
18733
18734 cast->base.location = cast->opening_loc;
18735 node = (pm_node_t *) cast;
18736 }
18737
18738 // If this is a heredoc that is indented with a ~, then we need
18739 // to dedent each line by the common leading whitespace.
18740 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18741 pm_node_list_t *nodes;
18742 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18743 nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
18744 } else {
18745 nodes = &((pm_interpolated_string_node_t *) node)->parts;
18746 }
18747
18748 parse_heredoc_dedent(parser, nodes, common_whitespace);
18749 }
18750 }
18751
18752 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18753 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18754 }
18755
18756 return node;
18757 }
18758 case PM_TOKEN_INSTANCE_VARIABLE: {
18759 parser_lex(parser);
18760 pm_node_t *node = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
18761
18762 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18763 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18764 }
18765
18766 return node;
18767 }
18768 case PM_TOKEN_INTEGER: {
18769 pm_node_flags_t base = parser->integer_base;
18770 parser_lex(parser);
18771 return (pm_node_t *) pm_integer_node_create(parser, base, &parser->previous);
18772 }
18773 case PM_TOKEN_INTEGER_IMAGINARY: {
18774 pm_node_flags_t base = parser->integer_base;
18775 parser_lex(parser);
18776 return (pm_node_t *) pm_integer_node_imaginary_create(parser, base, &parser->previous);
18777 }
18778 case PM_TOKEN_INTEGER_RATIONAL: {
18779 pm_node_flags_t base = parser->integer_base;
18780 parser_lex(parser);
18781 return (pm_node_t *) pm_integer_node_rational_create(parser, base, &parser->previous);
18782 }
18783 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: {
18784 pm_node_flags_t base = parser->integer_base;
18785 parser_lex(parser);
18786 return (pm_node_t *) pm_integer_node_rational_imaginary_create(parser, base, &parser->previous);
18787 }
18788 case PM_TOKEN_KEYWORD___ENCODING__:
18789 parser_lex(parser);
18790 return (pm_node_t *) pm_source_encoding_node_create(parser, &parser->previous);
18791 case PM_TOKEN_KEYWORD___FILE__:
18792 parser_lex(parser);
18793 return (pm_node_t *) pm_source_file_node_create(parser, &parser->previous);
18794 case PM_TOKEN_KEYWORD___LINE__:
18795 parser_lex(parser);
18796 return (pm_node_t *) pm_source_line_node_create(parser, &parser->previous);
18797 case PM_TOKEN_KEYWORD_ALIAS: {
18798 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18799 pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
18800 }
18801
18802 parser_lex(parser);
18803 pm_token_t keyword = parser->previous;
18804
18805 pm_node_t *new_name = parse_alias_argument(parser, true, (uint16_t) (depth + 1));
18806 pm_node_t *old_name = parse_alias_argument(parser, false, (uint16_t) (depth + 1));
18807
18808 switch (PM_NODE_TYPE(new_name)) {
18809 case PM_BACK_REFERENCE_READ_NODE:
18810 case PM_NUMBERED_REFERENCE_READ_NODE:
18811 case PM_GLOBAL_VARIABLE_READ_NODE: {
18812 if (PM_NODE_TYPE_P(old_name, PM_BACK_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_GLOBAL_VARIABLE_READ_NODE)) {
18813 if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) {
18814 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
18815 }
18816 } else {
18817 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18818 }
18819
18820 return (pm_node_t *) pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name);
18821 }
18822 case PM_SYMBOL_NODE:
18823 case PM_INTERPOLATED_SYMBOL_NODE: {
18824 if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE)) {
18825 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18826 }
18827 }
18829 default:
18830 return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name);
18831 }
18832 }
18833 case PM_TOKEN_KEYWORD_CASE: {
18834 size_t opening_newline_index = token_newline_index(parser);
18835 parser_lex(parser);
18836
18837 pm_token_t case_keyword = parser->previous;
18838 pm_node_t *predicate = NULL;
18839
18840 pm_node_list_t current_block_exits = { 0 };
18841 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18842
18843 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18844 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18845 predicate = NULL;
18846 } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
18847 predicate = NULL;
18848 } else if (!token_begins_expression_p(parser->current.type)) {
18849 predicate = NULL;
18850 } else {
18851 predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
18852 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18853 }
18854
18855 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
18856 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18857 parser_lex(parser);
18858
18859 pop_block_exits(parser, previous_block_exits);
18860 pm_node_list_free(&current_block_exits);
18861
18862 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18863 return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, &parser->previous);
18864 }
18865
18866 // At this point we can create a case node, though we don't yet know
18867 // if it is a case-in or case-when node.
18868 pm_token_t end_keyword = not_provided(parser);
18869 pm_node_t *node;
18870
18871 if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18872 pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword);
18873 pm_static_literals_t literals = { 0 };
18874
18875 // At this point we've seen a when keyword, so we know this is a
18876 // case-when node. We will continue to parse the when nodes
18877 // until we hit the end of the list.
18878 while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18879 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18880 parser_lex(parser);
18881
18882 pm_token_t when_keyword = parser->previous;
18883 pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
18884
18885 do {
18886 if (accept1(parser, PM_TOKEN_USTAR)) {
18887 pm_token_t operator = parser->previous;
18888 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18889
18890 pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
18891 pm_when_node_conditions_append(when_node, (pm_node_t *) splat_node);
18892
18893 if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
18894 } else {
18895 pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
18896 pm_when_node_conditions_append(when_node, condition);
18897
18898 // If we found a missing node, then this is a syntax
18899 // error and we should stop looping.
18900 if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
18901
18902 // If this is a string node, then we need to mark it
18903 // as frozen because when clause strings are frozen.
18904 if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
18905 pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
18906 } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
18907 pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
18908 }
18909
18910 pm_when_clause_static_literals_add(parser, &literals, condition);
18911 }
18912 } while (accept1(parser, PM_TOKEN_COMMA));
18913
18914 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18915 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18916 pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18917 }
18918 } else {
18919 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
18920 pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18921 }
18922
18923 if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18924 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
18925 if (statements != NULL) {
18926 pm_when_node_statements_set(when_node, statements);
18927 }
18928 }
18929
18930 pm_case_node_condition_append(case_node, (pm_node_t *) when_node);
18931 }
18932
18933 // If we didn't parse any conditions (in or when) then we need
18934 // to indicate that we have an error.
18935 if (case_node->conditions.size == 0) {
18936 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18937 }
18938
18939 pm_static_literals_free(&literals);
18940 node = (pm_node_t *) case_node;
18941 } else {
18942 pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword);
18943
18944 // If this is a case-match node (i.e., it is a pattern matching
18945 // case statement) then we must have a predicate.
18946 if (predicate == NULL) {
18947 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
18948 }
18949
18950 // At this point we expect that we're parsing a case-in node. We
18951 // will continue to parse the in nodes until we hit the end of
18952 // the list.
18953 while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
18954 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18955
18956 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
18957 parser->pattern_matching_newlines = true;
18958
18959 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
18960 parser->command_start = false;
18961 parser_lex(parser);
18962
18963 pm_token_t in_keyword = parser->previous;
18964
18965 pm_constant_id_list_t captures = { 0 };
18966 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
18967
18968 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
18969 pm_constant_id_list_free(&captures);
18970
18971 // Since we're in the top-level of the case-in node we need
18972 // to check for guard clauses in the form of `if` or
18973 // `unless` statements.
18974 if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
18975 pm_token_t keyword = parser->previous;
18976 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
18977 pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
18978 } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
18979 pm_token_t keyword = parser->previous;
18980 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
18981 pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
18982 }
18983
18984 // Now we need to check for the terminator of the in node's
18985 // pattern. It can be a newline or semicolon optionally
18986 // followed by a `then` keyword.
18987 pm_token_t then_keyword;
18988 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18989 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18990 then_keyword = parser->previous;
18991 } else {
18992 then_keyword = not_provided(parser);
18993 }
18994 } else {
18995 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
18996 then_keyword = parser->previous;
18997 }
18998
18999 // Now we can actually parse the statements associated with
19000 // the in node.
19001 pm_statements_node_t *statements;
19002 if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19003 statements = NULL;
19004 } else {
19005 statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
19006 }
19007
19008 // Now that we have the full pattern and statements, we can
19009 // create the node and attach it to the case node.
19010 pm_node_t *condition = (pm_node_t *) pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword);
19011 pm_case_match_node_condition_append(case_node, condition);
19012 }
19013
19014 // If we didn't parse any conditions (in or when) then we need
19015 // to indicate that we have an error.
19016 if (case_node->conditions.size == 0) {
19017 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
19018 }
19019
19020 node = (pm_node_t *) case_node;
19021 }
19022
19023 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19024 if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
19025 pm_token_t else_keyword = parser->previous;
19026 pm_else_node_t *else_node;
19027
19028 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19029 else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
19030 } else {
19031 else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
19032 }
19033
19034 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
19035 pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
19036 } else {
19037 pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
19038 }
19039 }
19040
19041 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
19042 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM);
19043
19044 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
19045 pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous);
19046 } else {
19047 pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous);
19048 }
19049
19050 pop_block_exits(parser, previous_block_exits);
19051 pm_node_list_free(&current_block_exits);
19052
19053 return node;
19054 }
19055 case PM_TOKEN_KEYWORD_BEGIN: {
19056 size_t opening_newline_index = token_newline_index(parser);
19057 parser_lex(parser);
19058
19059 pm_token_t begin_keyword = parser->previous;
19060 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19061
19062 pm_node_list_t current_block_exits = { 0 };
19063 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19064 pm_statements_node_t *begin_statements = NULL;
19065
19066 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19067 pm_accepts_block_stack_push(parser, true);
19068 begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN, (uint16_t) (depth + 1));
19069 pm_accepts_block_stack_pop(parser);
19070 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19071 }
19072
19073 pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
19074 parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
19075 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM);
19076
19077 begin_node->base.location.end = parser->previous.end;
19078 pm_begin_node_end_keyword_set(begin_node, &parser->previous);
19079
19080 pop_block_exits(parser, previous_block_exits);
19081 pm_node_list_free(&current_block_exits);
19082
19083 return (pm_node_t *) begin_node;
19084 }
19085 case PM_TOKEN_KEYWORD_BEGIN_UPCASE: {
19086 pm_node_list_t current_block_exits = { 0 };
19087 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19088
19089 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19090 pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
19091 }
19092
19093 parser_lex(parser);
19094 pm_token_t keyword = parser->previous;
19095
19096 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE);
19097 pm_token_t opening = parser->previous;
19098 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
19099
19100 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM);
19101 pm_context_t context = parser->current_context->context;
19102 if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
19103 pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
19104 }
19105
19106 flush_block_exits(parser, previous_block_exits);
19107 pm_node_list_free(&current_block_exits);
19108
19109 return (pm_node_t *) pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
19110 }
19111 case PM_TOKEN_KEYWORD_BREAK:
19112 case PM_TOKEN_KEYWORD_NEXT:
19113 case PM_TOKEN_KEYWORD_RETURN: {
19114 parser_lex(parser);
19115
19116 pm_token_t keyword = parser->previous;
19117 pm_arguments_t arguments = { 0 };
19118
19119 if (
19120 token_begins_expression_p(parser->current.type) ||
19121 match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)
19122 ) {
19123 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
19124
19125 if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
19126 parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, (uint16_t) (depth + 1));
19127 }
19128 }
19129
19130 switch (keyword.type) {
19131 case PM_TOKEN_KEYWORD_BREAK: {
19132 pm_node_t *node = (pm_node_t *) pm_break_node_create(parser, &keyword, arguments.arguments);
19133 if (!parser->partial_script) parse_block_exit(parser, node);
19134 return node;
19135 }
19136 case PM_TOKEN_KEYWORD_NEXT: {
19137 pm_node_t *node = (pm_node_t *) pm_next_node_create(parser, &keyword, arguments.arguments);
19138 if (!parser->partial_script) parse_block_exit(parser, node);
19139 return node;
19140 }
19141 case PM_TOKEN_KEYWORD_RETURN: {
19142 pm_node_t *node = (pm_node_t *) pm_return_node_create(parser, &keyword, arguments.arguments);
19143 parse_return(parser, node);
19144 return node;
19145 }
19146 default:
19147 assert(false && "unreachable");
19148 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
19149 }
19150 }
19151 case PM_TOKEN_KEYWORD_SUPER: {
19152 parser_lex(parser);
19153
19154 pm_token_t keyword = parser->previous;
19155 pm_arguments_t arguments = { 0 };
19156 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
19157
19158 if (
19159 arguments.opening_loc.start == NULL &&
19160 arguments.arguments == NULL &&
19161 ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
19162 ) {
19163 return (pm_node_t *) pm_forwarding_super_node_create(parser, &keyword, &arguments);
19164 }
19165
19166 return (pm_node_t *) pm_super_node_create(parser, &keyword, &arguments);
19167 }
19168 case PM_TOKEN_KEYWORD_YIELD: {
19169 parser_lex(parser);
19170
19171 pm_token_t keyword = parser->previous;
19172 pm_arguments_t arguments = { 0 };
19173 parse_arguments_list(parser, &arguments, false, accepts_command_call, (uint16_t) (depth + 1));
19174
19175 // It's possible that we've parsed a block argument through our
19176 // call to parse_arguments_list. If we found one, we should mark it
19177 // as invalid and destroy it, as we don't have a place for it on the
19178 // yield node.
19179 if (arguments.block != NULL) {
19180 pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
19181 pm_node_destroy(parser, arguments.block);
19182 arguments.block = NULL;
19183 }
19184
19185 pm_node_t *node = (pm_node_t *) pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc);
19186 if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
19187
19188 return node;
19189 }
19190 case PM_TOKEN_KEYWORD_CLASS: {
19191 size_t opening_newline_index = token_newline_index(parser);
19192 parser_lex(parser);
19193
19194 pm_token_t class_keyword = parser->previous;
19195 pm_do_loop_stack_push(parser, false);
19196
19197 pm_node_list_t current_block_exits = { 0 };
19198 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19199
19200 if (accept1(parser, PM_TOKEN_LESS_LESS)) {
19201 pm_token_t operator = parser->previous;
19202 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
19203
19204 pm_parser_scope_push(parser, true);
19205 if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
19206 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type));
19207 }
19208
19209 pm_node_t *statements = NULL;
19210 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19211 pm_accepts_block_stack_push(parser, true);
19212 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1));
19213 pm_accepts_block_stack_pop(parser);
19214 }
19215
19216 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
19217 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19218 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1));
19219 } else {
19220 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
19221 }
19222
19223 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
19224
19225 pm_constant_id_list_t locals;
19226 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19227
19228 pm_parser_scope_pop(parser);
19229 pm_do_loop_stack_pop(parser);
19230
19231 flush_block_exits(parser, previous_block_exits);
19232 pm_node_list_free(&current_block_exits);
19233
19234 return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
19235 }
19236
19237 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
19238 pm_token_t name = parser->previous;
19239 if (name.type != PM_TOKEN_CONSTANT) {
19240 pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
19241 }
19242
19243 pm_token_t inheritance_operator;
19244 pm_node_t *superclass;
19245
19246 if (match1(parser, PM_TOKEN_LESS)) {
19247 inheritance_operator = parser->current;
19248 lex_state_set(parser, PM_LEX_STATE_BEG);
19249
19250 parser->command_start = true;
19251 parser_lex(parser);
19252
19253 superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
19254 } else {
19255 inheritance_operator = not_provided(parser);
19256 superclass = NULL;
19257 }
19258
19259 pm_parser_scope_push(parser, true);
19260
19261 if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
19262 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
19263 } else {
19264 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19265 }
19266 pm_node_t *statements = NULL;
19267
19268 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19269 pm_accepts_block_stack_push(parser, true);
19270 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1));
19271 pm_accepts_block_stack_pop(parser);
19272 }
19273
19274 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
19275 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19276 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1));
19277 } else {
19278 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
19279 }
19280
19281 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
19282
19283 if (context_def_p(parser)) {
19284 pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
19285 }
19286
19287 pm_constant_id_list_t locals;
19288 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19289
19290 pm_parser_scope_pop(parser);
19291 pm_do_loop_stack_pop(parser);
19292
19293 if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
19294 pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
19295 }
19296
19297 pop_block_exits(parser, previous_block_exits);
19298 pm_node_list_free(&current_block_exits);
19299
19300 return (pm_node_t *) pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous);
19301 }
19302 case PM_TOKEN_KEYWORD_DEF: {
19303 pm_node_list_t current_block_exits = { 0 };
19304 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19305
19306 pm_token_t def_keyword = parser->current;
19307 size_t opening_newline_index = token_newline_index(parser);
19308
19309 pm_node_t *receiver = NULL;
19310 pm_token_t operator = not_provided(parser);
19311 pm_token_t name;
19312
19313 // This context is necessary for lexing `...` in a bare params
19314 // correctly. It must be pushed before lexing the first param, so it
19315 // is here.
19316 context_push(parser, PM_CONTEXT_DEF_PARAMS);
19317 parser_lex(parser);
19318
19319 // This will be false if the method name is not a valid identifier
19320 // but could be followed by an operator.
19321 bool valid_name = true;
19322
19323 switch (parser->current.type) {
19324 case PM_CASE_OPERATOR:
19325 pm_parser_scope_push(parser, true);
19326 lex_state_set(parser, PM_LEX_STATE_ENDFN);
19327 parser_lex(parser);
19328
19329 name = parser->previous;
19330 break;
19331 case PM_TOKEN_IDENTIFIER: {
19332 parser_lex(parser);
19333
19334 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
19335 receiver = parse_variable_call(parser);
19336
19337 pm_parser_scope_push(parser, true);
19338 lex_state_set(parser, PM_LEX_STATE_FNAME);
19339 parser_lex(parser);
19340
19341 operator = parser->previous;
19342 name = parse_method_definition_name(parser);
19343 } else {
19344 pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
19345 pm_parser_scope_push(parser, true);
19346
19347 name = parser->previous;
19348 }
19349
19350 break;
19351 }
19352 case PM_TOKEN_INSTANCE_VARIABLE:
19353 case PM_TOKEN_CLASS_VARIABLE:
19354 case PM_TOKEN_GLOBAL_VARIABLE:
19355 valid_name = false;
19357 case PM_TOKEN_CONSTANT:
19358 case PM_TOKEN_KEYWORD_NIL:
19359 case PM_TOKEN_KEYWORD_SELF:
19360 case PM_TOKEN_KEYWORD_TRUE:
19361 case PM_TOKEN_KEYWORD_FALSE:
19362 case PM_TOKEN_KEYWORD___FILE__:
19363 case PM_TOKEN_KEYWORD___LINE__:
19364 case PM_TOKEN_KEYWORD___ENCODING__: {
19365 pm_parser_scope_push(parser, true);
19366 parser_lex(parser);
19367
19368 pm_token_t identifier = parser->previous;
19369
19370 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
19371 lex_state_set(parser, PM_LEX_STATE_FNAME);
19372 parser_lex(parser);
19373 operator = parser->previous;
19374
19375 switch (identifier.type) {
19376 case PM_TOKEN_CONSTANT:
19377 receiver = (pm_node_t *) pm_constant_read_node_create(parser, &identifier);
19378 break;
19379 case PM_TOKEN_INSTANCE_VARIABLE:
19380 receiver = (pm_node_t *) pm_instance_variable_read_node_create(parser, &identifier);
19381 break;
19382 case PM_TOKEN_CLASS_VARIABLE:
19383 receiver = (pm_node_t *) pm_class_variable_read_node_create(parser, &identifier);
19384 break;
19385 case PM_TOKEN_GLOBAL_VARIABLE:
19386 receiver = (pm_node_t *) pm_global_variable_read_node_create(parser, &identifier);
19387 break;
19388 case PM_TOKEN_KEYWORD_NIL:
19389 receiver = (pm_node_t *) pm_nil_node_create(parser, &identifier);
19390 break;
19391 case PM_TOKEN_KEYWORD_SELF:
19392 receiver = (pm_node_t *) pm_self_node_create(parser, &identifier);
19393 break;
19394 case PM_TOKEN_KEYWORD_TRUE:
19395 receiver = (pm_node_t *) pm_true_node_create(parser, &identifier);
19396 break;
19397 case PM_TOKEN_KEYWORD_FALSE:
19398 receiver = (pm_node_t *) pm_false_node_create(parser, &identifier);
19399 break;
19400 case PM_TOKEN_KEYWORD___FILE__:
19401 receiver = (pm_node_t *) pm_source_file_node_create(parser, &identifier);
19402 break;
19403 case PM_TOKEN_KEYWORD___LINE__:
19404 receiver = (pm_node_t *) pm_source_line_node_create(parser, &identifier);
19405 break;
19406 case PM_TOKEN_KEYWORD___ENCODING__:
19407 receiver = (pm_node_t *) pm_source_encoding_node_create(parser, &identifier);
19408 break;
19409 default:
19410 break;
19411 }
19412
19413 name = parse_method_definition_name(parser);
19414 } else {
19415 if (!valid_name) {
19416 PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
19417 }
19418
19419 name = identifier;
19420 }
19421 break;
19422 }
19423 case PM_TOKEN_PARENTHESIS_LEFT: {
19424 // The current context is `PM_CONTEXT_DEF_PARAMS`, however
19425 // the inner expression of this parenthesis should not be
19426 // processed under this context. Thus, the context is popped
19427 // here.
19428 context_pop(parser);
19429 parser_lex(parser);
19430
19431 pm_token_t lparen = parser->previous;
19432 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
19433
19434 accept1(parser, PM_TOKEN_NEWLINE);
19435 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19436 pm_token_t rparen = parser->previous;
19437
19438 lex_state_set(parser, PM_LEX_STATE_FNAME);
19439 expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
19440
19441 operator = parser->previous;
19442 receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0);
19443
19444 // To push `PM_CONTEXT_DEF_PARAMS` again is for the same
19445 // reason as described the above.
19446 pm_parser_scope_push(parser, true);
19447 context_push(parser, PM_CONTEXT_DEF_PARAMS);
19448 name = parse_method_definition_name(parser);
19449 break;
19450 }
19451 default:
19452 pm_parser_scope_push(parser, true);
19453 name = parse_method_definition_name(parser);
19454 break;
19455 }
19456
19457 pm_token_t lparen;
19458 pm_token_t rparen;
19459 pm_parameters_node_t *params;
19460
19461 switch (parser->current.type) {
19462 case PM_TOKEN_PARENTHESIS_LEFT: {
19463 parser_lex(parser);
19464 lparen = parser->previous;
19465
19466 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19467 params = NULL;
19468 } else {
19469 params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1));
19470 }
19471
19472 lex_state_set(parser, PM_LEX_STATE_BEG);
19473 parser->command_start = true;
19474
19475 context_pop(parser);
19476 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19477 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
19478 parser->previous.start = parser->previous.end;
19479 parser->previous.type = PM_TOKEN_MISSING;
19480 }
19481
19482 rparen = parser->previous;
19483 break;
19484 }
19485 case PM_CASE_PARAMETER: {
19486 // If we're about to lex a label, we need to add the label
19487 // state to make sure the next newline is ignored.
19488 if (parser->current.type == PM_TOKEN_LABEL) {
19489 lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
19490 }
19491
19492 lparen = not_provided(parser);
19493 rparen = not_provided(parser);
19494 params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1));
19495
19496 context_pop(parser);
19497 break;
19498 }
19499 default: {
19500 lparen = not_provided(parser);
19501 rparen = not_provided(parser);
19502 params = NULL;
19503
19504 context_pop(parser);
19505 break;
19506 }
19507 }
19508
19509 pm_node_t *statements = NULL;
19510 pm_token_t equal;
19511 pm_token_t end_keyword;
19512
19513 if (accept1(parser, PM_TOKEN_EQUAL)) {
19514 if (token_is_setter_name(&name)) {
19515 pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
19516 }
19517 equal = parser->previous;
19518
19519 context_push(parser, PM_CONTEXT_DEF);
19520 pm_do_loop_stack_push(parser, false);
19521 statements = (pm_node_t *) pm_statements_node_create(parser);
19522
19523 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, binding_power < PM_BINDING_POWER_COMPOSITION, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
19524
19525 if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
19526 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
19527
19528 pm_token_t rescue_keyword = parser->previous;
19529 pm_node_t *value = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
19530 context_pop(parser);
19531
19532 statement = (pm_node_t *) pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value);
19533 }
19534
19535 pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
19536 pm_do_loop_stack_pop(parser);
19537 context_pop(parser);
19538 end_keyword = not_provided(parser);
19539 } else {
19540 equal = not_provided(parser);
19541
19542 if (lparen.type == PM_TOKEN_NOT_PROVIDED) {
19543 lex_state_set(parser, PM_LEX_STATE_BEG);
19544 parser->command_start = true;
19545 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
19546 } else {
19547 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19548 }
19549
19550 pm_accepts_block_stack_push(parser, true);
19551 pm_do_loop_stack_push(parser, false);
19552
19553 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19554 pm_accepts_block_stack_push(parser, true);
19555 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1));
19556 pm_accepts_block_stack_pop(parser);
19557 }
19558
19559 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
19560 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19561 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1));
19562 } else {
19563 parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
19564 }
19565
19566 pm_accepts_block_stack_pop(parser);
19567 pm_do_loop_stack_pop(parser);
19568
19569 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM);
19570 end_keyword = parser->previous;
19571 }
19572
19573 pm_constant_id_list_t locals;
19574 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19575 pm_parser_scope_pop(parser);
19576
19582 pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
19583
19584 flush_block_exits(parser, previous_block_exits);
19585 pm_node_list_free(&current_block_exits);
19586
19587 return (pm_node_t *) pm_def_node_create(
19588 parser,
19589 name_id,
19590 &name,
19591 receiver,
19592 params,
19593 statements,
19594 &locals,
19595 &def_keyword,
19596 &operator,
19597 &lparen,
19598 &rparen,
19599 &equal,
19600 &end_keyword
19601 );
19602 }
19603 case PM_TOKEN_KEYWORD_DEFINED: {
19604 parser_lex(parser);
19605 pm_token_t keyword = parser->previous;
19606
19607 pm_token_t lparen;
19608 pm_token_t rparen;
19609 pm_node_t *expression;
19610
19611 context_push(parser, PM_CONTEXT_DEFINED);
19612 bool newline = accept1(parser, PM_TOKEN_NEWLINE);
19613
19614 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19615 lparen = parser->previous;
19616
19617 if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19618 expression = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0);
19619 lparen = not_provided(parser);
19620 rparen = not_provided(parser);
19621 } else {
19622 expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19623
19624 if (parser->recovering) {
19625 rparen = not_provided(parser);
19626 } else {
19627 accept1(parser, PM_TOKEN_NEWLINE);
19628 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19629 rparen = parser->previous;
19630 }
19631 }
19632 } else {
19633 lparen = not_provided(parser);
19634 rparen = not_provided(parser);
19635 expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19636 }
19637
19638 context_pop(parser);
19639 return (pm_node_t *) pm_defined_node_create(
19640 parser,
19641 &lparen,
19642 expression,
19643 &rparen,
19644 &PM_LOCATION_TOKEN_VALUE(&keyword)
19645 );
19646 }
19647 case PM_TOKEN_KEYWORD_END_UPCASE: {
19648 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19649 pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
19650 }
19651
19652 parser_lex(parser);
19653 pm_token_t keyword = parser->previous;
19654
19655 if (context_def_p(parser)) {
19656 pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
19657 }
19658
19659 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
19660 pm_token_t opening = parser->previous;
19661 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
19662
19663 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM);
19664 return (pm_node_t *) pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
19665 }
19666 case PM_TOKEN_KEYWORD_FALSE:
19667 parser_lex(parser);
19668 return (pm_node_t *) pm_false_node_create(parser, &parser->previous);
19669 case PM_TOKEN_KEYWORD_FOR: {
19670 size_t opening_newline_index = token_newline_index(parser);
19671 parser_lex(parser);
19672
19673 pm_token_t for_keyword = parser->previous;
19674 pm_node_t *index;
19675
19676 context_push(parser, PM_CONTEXT_FOR_INDEX);
19677
19678 // First, parse out the first index expression.
19679 if (accept1(parser, PM_TOKEN_USTAR)) {
19680 pm_token_t star_operator = parser->previous;
19681 pm_node_t *name = NULL;
19682
19683 if (token_begins_expression_p(parser->current.type)) {
19684 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19685 }
19686
19687 index = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
19688 } else if (token_begins_expression_p(parser->current.type)) {
19689 index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
19690 } else {
19691 pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
19692 index = (pm_node_t *) pm_missing_node_create(parser, for_keyword.start, for_keyword.end);
19693 }
19694
19695 // Now, if there are multiple index expressions, parse them out.
19696 if (match1(parser, PM_TOKEN_COMMA)) {
19697 index = parse_targets(parser, index, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19698 } else {
19699 index = parse_target(parser, index, false, false);
19700 }
19701
19702 context_pop(parser);
19703 pm_do_loop_stack_push(parser, true);
19704
19705 expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
19706 pm_token_t in_keyword = parser->previous;
19707
19708 pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
19709 pm_do_loop_stack_pop(parser);
19710
19711 pm_token_t do_keyword;
19712 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19713 do_keyword = parser->previous;
19714 } else {
19715 do_keyword = not_provided(parser);
19716 if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
19717 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type));
19718 }
19719 }
19720
19721 pm_statements_node_t *statements = NULL;
19722 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19723 statements = parse_statements(parser, PM_CONTEXT_FOR, (uint16_t) (depth + 1));
19724 }
19725
19726 parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
19727 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM);
19728
19729 return (pm_node_t *) pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous);
19730 }
19731 case PM_TOKEN_KEYWORD_IF:
19732 if (parser_end_of_line_p(parser)) {
19733 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
19734 }
19735
19736 size_t opening_newline_index = token_newline_index(parser);
19737 bool if_after_else = parser->previous.type == PM_TOKEN_KEYWORD_ELSE;
19738 parser_lex(parser);
19739
19740 return parse_conditional(parser, PM_CONTEXT_IF, opening_newline_index, if_after_else, (uint16_t) (depth + 1));
19741 case PM_TOKEN_KEYWORD_UNDEF: {
19742 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19743 pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
19744 }
19745
19746 parser_lex(parser);
19747 pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
19748 pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19749
19750 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19751 pm_node_destroy(parser, name);
19752 } else {
19753 pm_undef_node_append(undef, name);
19754
19755 while (match1(parser, PM_TOKEN_COMMA)) {
19756 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
19757 parser_lex(parser);
19758 name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19759
19760 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19761 pm_node_destroy(parser, name);
19762 break;
19763 }
19764
19765 pm_undef_node_append(undef, name);
19766 }
19767 }
19768
19769 return (pm_node_t *) undef;
19770 }
19771 case PM_TOKEN_KEYWORD_NOT: {
19772 parser_lex(parser);
19773
19774 pm_token_t message = parser->previous;
19775 pm_arguments_t arguments = { 0 };
19776 pm_node_t *receiver = NULL;
19777
19778 // If we do not accept a command call, then we also do not accept a
19779 // not without parentheses. In this case we need to reject this
19780 // syntax.
19781 if (!accepts_command_call && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19782 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) {
19783 pm_parser_err(parser, parser->previous.end, parser->previous.end + 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN);
19784 } else {
19785 accept1(parser, PM_TOKEN_NEWLINE);
19786 pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER);
19787 }
19788
19789 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
19790 }
19791
19792 accept1(parser, PM_TOKEN_NEWLINE);
19793
19794 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19795 pm_token_t lparen = parser->previous;
19796
19797 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19798 receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0);
19799 } else {
19800 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&lparen);
19801 receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19802
19803 if (!parser->recovering) {
19804 accept1(parser, PM_TOKEN_NEWLINE);
19805 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19806 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
19807 }
19808 }
19809 } else {
19810 receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19811 }
19812
19813 return (pm_node_t *) pm_call_node_not_create(parser, receiver, &message, &arguments);
19814 }
19815 case PM_TOKEN_KEYWORD_UNLESS: {
19816 size_t opening_newline_index = token_newline_index(parser);
19817 parser_lex(parser);
19818
19819 return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
19820 }
19821 case PM_TOKEN_KEYWORD_MODULE: {
19822 pm_node_list_t current_block_exits = { 0 };
19823 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19824
19825 size_t opening_newline_index = token_newline_index(parser);
19826 parser_lex(parser);
19827 pm_token_t module_keyword = parser->previous;
19828
19829 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
19830 pm_token_t name;
19831
19832 // If we can recover from a syntax error that occurred while parsing
19833 // the name of the module, then we'll handle that here.
19834 if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) {
19835 pop_block_exits(parser, previous_block_exits);
19836 pm_node_list_free(&current_block_exits);
19837
19838 pm_token_t missing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19839 return (pm_node_t *) pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing);
19840 }
19841
19842 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
19843 pm_token_t double_colon = parser->previous;
19844
19845 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
19846 constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous);
19847 }
19848
19849 // Here we retrieve the name of the module. If it wasn't a constant,
19850 // then it's possible that `module foo` was passed, which is a
19851 // syntax error. We handle that here as well.
19852 name = parser->previous;
19853 if (name.type != PM_TOKEN_CONSTANT) {
19854 pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
19855 }
19856
19857 pm_parser_scope_push(parser, true);
19858 accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
19859 pm_node_t *statements = NULL;
19860
19861 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19862 pm_accepts_block_stack_push(parser, true);
19863 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1));
19864 pm_accepts_block_stack_pop(parser);
19865 }
19866
19867 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
19868 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19869 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1));
19870 } else {
19871 parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
19872 }
19873
19874 pm_constant_id_list_t locals;
19875 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19876
19877 pm_parser_scope_pop(parser);
19878 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
19879
19880 if (context_def_p(parser)) {
19881 pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
19882 }
19883
19884 pop_block_exits(parser, previous_block_exits);
19885 pm_node_list_free(&current_block_exits);
19886
19887 return (pm_node_t *) pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous);
19888 }
19889 case PM_TOKEN_KEYWORD_NIL:
19890 parser_lex(parser);
19891 return (pm_node_t *) pm_nil_node_create(parser, &parser->previous);
19892 case PM_TOKEN_KEYWORD_REDO: {
19893 parser_lex(parser);
19894
19895 pm_node_t *node = (pm_node_t *) pm_redo_node_create(parser, &parser->previous);
19896 if (!parser->partial_script) parse_block_exit(parser, node);
19897
19898 return node;
19899 }
19900 case PM_TOKEN_KEYWORD_RETRY: {
19901 parser_lex(parser);
19902
19903 pm_node_t *node = (pm_node_t *) pm_retry_node_create(parser, &parser->previous);
19904 parse_retry(parser, node);
19905
19906 return node;
19907 }
19908 case PM_TOKEN_KEYWORD_SELF:
19909 parser_lex(parser);
19910 return (pm_node_t *) pm_self_node_create(parser, &parser->previous);
19911 case PM_TOKEN_KEYWORD_TRUE:
19912 parser_lex(parser);
19913 return (pm_node_t *) pm_true_node_create(parser, &parser->previous);
19914 case PM_TOKEN_KEYWORD_UNTIL: {
19915 size_t opening_newline_index = token_newline_index(parser);
19916
19917 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19918 pm_do_loop_stack_push(parser, true);
19919
19920 parser_lex(parser);
19921 pm_token_t keyword = parser->previous;
19922 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
19923
19924 pm_do_loop_stack_pop(parser);
19925 context_pop(parser);
19926
19927 pm_token_t do_keyword;
19928 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19929 do_keyword = parser->previous;
19930 } else {
19931 do_keyword = not_provided(parser);
19932 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19933 }
19934
19935 pm_statements_node_t *statements = NULL;
19936 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19937 pm_accepts_block_stack_push(parser, true);
19938 statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
19939 pm_accepts_block_stack_pop(parser);
19940 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19941 }
19942
19943 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19944 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
19945
19946 return (pm_node_t *) pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
19947 }
19948 case PM_TOKEN_KEYWORD_WHILE: {
19949 size_t opening_newline_index = token_newline_index(parser);
19950
19951 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19952 pm_do_loop_stack_push(parser, true);
19953
19954 parser_lex(parser);
19955 pm_token_t keyword = parser->previous;
19956 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
19957
19958 pm_do_loop_stack_pop(parser);
19959 context_pop(parser);
19960
19961 pm_token_t do_keyword;
19962 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19963 do_keyword = parser->previous;
19964 } else {
19965 do_keyword = not_provided(parser);
19966 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19967 }
19968
19969 pm_statements_node_t *statements = NULL;
19970 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19971 pm_accepts_block_stack_push(parser, true);
19972 statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
19973 pm_accepts_block_stack_pop(parser);
19974 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19975 }
19976
19977 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19978 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
19979
19980 return (pm_node_t *) pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
19981 }
19982 case PM_TOKEN_PERCENT_LOWER_I: {
19983 parser_lex(parser);
19984 pm_token_t opening = parser->previous;
19985 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19986
19987 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19988 accept1(parser, PM_TOKEN_WORDS_SEP);
19989 if (match1(parser, PM_TOKEN_STRING_END)) break;
19990
19991 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19992 pm_token_t opening = not_provided(parser);
19993 pm_token_t closing = not_provided(parser);
19994 pm_array_node_elements_append(array, (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
19995 }
19996
19997 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
19998 }
19999
20000 pm_token_t closing = parser->current;
20001 if (match1(parser, PM_TOKEN_EOF)) {
20002 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
20003 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20004 } else {
20005 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
20006 }
20007 pm_array_node_close_set(array, &closing);
20008
20009 return (pm_node_t *) array;
20010 }
20011 case PM_TOKEN_PERCENT_UPPER_I: {
20012 parser_lex(parser);
20013 pm_token_t opening = parser->previous;
20014 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20015
20016 // This is the current node that we are parsing that will be added to the
20017 // list of elements.
20018 pm_node_t *current = NULL;
20019
20020 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20021 switch (parser->current.type) {
20022 case PM_TOKEN_WORDS_SEP: {
20023 if (current == NULL) {
20024 // If we hit a separator before we have any content, then we don't
20025 // need to do anything.
20026 } else {
20027 // If we hit a separator after we've hit content, then we need to
20028 // append that content to the list and reset the current node.
20029 pm_array_node_elements_append(array, current);
20030 current = NULL;
20031 }
20032
20033 parser_lex(parser);
20034 break;
20035 }
20036 case PM_TOKEN_STRING_CONTENT: {
20037 pm_token_t opening = not_provided(parser);
20038 pm_token_t closing = not_provided(parser);
20039
20040 if (current == NULL) {
20041 // If we hit content and the current node is NULL, then this is
20042 // the first string content we've seen. In that case we're going
20043 // to create a new string node and set that to the current.
20044 current = (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing);
20045 parser_lex(parser);
20046 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
20047 // If we hit string content and the current node is an
20048 // interpolated string, then we need to append the string content
20049 // to the list of child nodes.
20050 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20051 parser_lex(parser);
20052
20053 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
20054 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20055 // If we hit string content and the current node is a symbol node,
20056 // then we need to convert the current node into an interpolated
20057 // string and add the string content to the list of child nodes.
20058 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
20059 pm_token_t bounds = not_provided(parser);
20060
20061 pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
20062 pm_node_t *first_string = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped);
20063 pm_node_t *second_string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing);
20064 parser_lex(parser);
20065
20066 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20067 pm_interpolated_symbol_node_append(interpolated, first_string);
20068 pm_interpolated_symbol_node_append(interpolated, second_string);
20069
20070 xfree(current);
20071 current = (pm_node_t *) interpolated;
20072 } else {
20073 assert(false && "unreachable");
20074 }
20075
20076 break;
20077 }
20078 case PM_TOKEN_EMBVAR: {
20079 bool start_location_set = false;
20080 if (current == NULL) {
20081 // If we hit an embedded variable and the current node is NULL,
20082 // then this is the start of a new string. We'll set the current
20083 // node to a new interpolated string.
20084 pm_token_t opening = not_provided(parser);
20085 pm_token_t closing = not_provided(parser);
20086 current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20087 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20088 // If we hit an embedded variable and the current node is a string
20089 // node, then we'll convert the current into an interpolated
20090 // string and add the string node to the list of parts.
20091 pm_token_t opening = not_provided(parser);
20092 pm_token_t closing = not_provided(parser);
20093 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20094
20095 current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
20096 pm_interpolated_symbol_node_append(interpolated, current);
20097 interpolated->base.location.start = current->location.start;
20098 start_location_set = true;
20099 current = (pm_node_t *) interpolated;
20100 } else {
20101 // If we hit an embedded variable and the current node is an
20102 // interpolated string, then we'll just add the embedded variable.
20103 }
20104
20105 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20106 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
20107 if (!start_location_set) {
20108 current->location.start = part->location.start;
20109 }
20110 break;
20111 }
20112 case PM_TOKEN_EMBEXPR_BEGIN: {
20113 bool start_location_set = false;
20114 if (current == NULL) {
20115 // If we hit an embedded expression and the current node is NULL,
20116 // then this is the start of a new string. We'll set the current
20117 // node to a new interpolated string.
20118 pm_token_t opening = not_provided(parser);
20119 pm_token_t closing = not_provided(parser);
20120 current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20121 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20122 // If we hit an embedded expression and the current node is a
20123 // string node, then we'll convert the current into an
20124 // interpolated string and add the string node to the list of
20125 // parts.
20126 pm_token_t opening = not_provided(parser);
20127 pm_token_t closing = not_provided(parser);
20128 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20129
20130 current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
20131 pm_interpolated_symbol_node_append(interpolated, current);
20132 interpolated->base.location.start = current->location.start;
20133 start_location_set = true;
20134 current = (pm_node_t *) interpolated;
20135 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
20136 // If we hit an embedded expression and the current node is an
20137 // interpolated string, then we'll just continue on.
20138 } else {
20139 assert(false && "unreachable");
20140 }
20141
20142 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20143 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
20144 if (!start_location_set) {
20145 current->location.start = part->location.start;
20146 }
20147 break;
20148 }
20149 default:
20150 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
20151 parser_lex(parser);
20152 break;
20153 }
20154 }
20155
20156 // If we have a current node, then we need to append it to the list.
20157 if (current) {
20158 pm_array_node_elements_append(array, current);
20159 }
20160
20161 pm_token_t closing = parser->current;
20162 if (match1(parser, PM_TOKEN_EOF)) {
20163 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
20164 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20165 } else {
20166 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
20167 }
20168 pm_array_node_close_set(array, &closing);
20169
20170 return (pm_node_t *) array;
20171 }
20172 case PM_TOKEN_PERCENT_LOWER_W: {
20173 parser_lex(parser);
20174 pm_token_t opening = parser->previous;
20175 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20176
20177 // skip all leading whitespaces
20178 accept1(parser, PM_TOKEN_WORDS_SEP);
20179
20180 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20181 accept1(parser, PM_TOKEN_WORDS_SEP);
20182 if (match1(parser, PM_TOKEN_STRING_END)) break;
20183
20184 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20185 pm_token_t opening = not_provided(parser);
20186 pm_token_t closing = not_provided(parser);
20187
20188 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20189 pm_array_node_elements_append(array, string);
20190 }
20191
20192 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
20193 }
20194
20195 pm_token_t closing = parser->current;
20196 if (match1(parser, PM_TOKEN_EOF)) {
20197 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
20198 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20199 } else {
20200 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
20201 }
20202
20203 pm_array_node_close_set(array, &closing);
20204 return (pm_node_t *) array;
20205 }
20206 case PM_TOKEN_PERCENT_UPPER_W: {
20207 parser_lex(parser);
20208 pm_token_t opening = parser->previous;
20209 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20210
20211 // This is the current node that we are parsing that will be added
20212 // to the list of elements.
20213 pm_node_t *current = NULL;
20214
20215 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20216 switch (parser->current.type) {
20217 case PM_TOKEN_WORDS_SEP: {
20218 // Reset the explicit encoding if we hit a separator
20219 // since each element can have its own encoding.
20220 parser->explicit_encoding = NULL;
20221
20222 if (current == NULL) {
20223 // If we hit a separator before we have any content,
20224 // then we don't need to do anything.
20225 } else {
20226 // If we hit a separator after we've hit content,
20227 // then we need to append that content to the list
20228 // and reset the current node.
20229 pm_array_node_elements_append(array, current);
20230 current = NULL;
20231 }
20232
20233 parser_lex(parser);
20234 break;
20235 }
20236 case PM_TOKEN_STRING_CONTENT: {
20237 pm_token_t opening = not_provided(parser);
20238 pm_token_t closing = not_provided(parser);
20239
20240 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20241 pm_node_flag_set(string, parse_unescaped_encoding(parser));
20242 parser_lex(parser);
20243
20244 if (current == NULL) {
20245 // If we hit content and the current node is NULL,
20246 // then this is the first string content we've seen.
20247 // In that case we're going to create a new string
20248 // node and set that to the current.
20249 current = string;
20250 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20251 // If we hit string content and the current node is
20252 // an interpolated string, then we need to append
20253 // the string content to the list of child nodes.
20254 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
20255 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20256 // If we hit string content and the current node is
20257 // a string node, then we need to convert the
20258 // current node into an interpolated string and add
20259 // the string content to the list of child nodes.
20260 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20261 pm_interpolated_string_node_append(interpolated, current);
20262 pm_interpolated_string_node_append(interpolated, string);
20263 current = (pm_node_t *) interpolated;
20264 } else {
20265 assert(false && "unreachable");
20266 }
20267
20268 break;
20269 }
20270 case PM_TOKEN_EMBVAR: {
20271 if (current == NULL) {
20272 // If we hit an embedded variable and the current
20273 // node is NULL, then this is the start of a new
20274 // string. We'll set the current node to a new
20275 // interpolated string.
20276 pm_token_t opening = not_provided(parser);
20277 pm_token_t closing = not_provided(parser);
20278 current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20279 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20280 // If we hit an embedded variable and the current
20281 // node is a string node, then we'll convert the
20282 // current into an interpolated string and add the
20283 // string node to the list of parts.
20284 pm_token_t opening = not_provided(parser);
20285 pm_token_t closing = not_provided(parser);
20286 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20287 pm_interpolated_string_node_append(interpolated, current);
20288 current = (pm_node_t *) interpolated;
20289 } else {
20290 // If we hit an embedded variable and the current
20291 // node is an interpolated string, then we'll just
20292 // add the embedded variable.
20293 }
20294
20295 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20296 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
20297 break;
20298 }
20299 case PM_TOKEN_EMBEXPR_BEGIN: {
20300 if (current == NULL) {
20301 // If we hit an embedded expression and the current
20302 // node is NULL, then this is the start of a new
20303 // string. We'll set the current node to a new
20304 // interpolated string.
20305 pm_token_t opening = not_provided(parser);
20306 pm_token_t closing = not_provided(parser);
20307 current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20308 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20309 // If we hit an embedded expression and the current
20310 // node is a string node, then we'll convert the
20311 // current into an interpolated string and add the
20312 // string node to the list of parts.
20313 pm_token_t opening = not_provided(parser);
20314 pm_token_t closing = not_provided(parser);
20315 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20316 pm_interpolated_string_node_append(interpolated, current);
20317 current = (pm_node_t *) interpolated;
20318 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20319 // If we hit an embedded expression and the current
20320 // node is an interpolated string, then we'll just
20321 // continue on.
20322 } else {
20323 assert(false && "unreachable");
20324 }
20325
20326 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20327 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
20328 break;
20329 }
20330 default:
20331 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
20332 parser_lex(parser);
20333 break;
20334 }
20335 }
20336
20337 // If we have a current node, then we need to append it to the list.
20338 if (current) {
20339 pm_array_node_elements_append(array, current);
20340 }
20341
20342 pm_token_t closing = parser->current;
20343 if (match1(parser, PM_TOKEN_EOF)) {
20344 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
20345 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20346 } else {
20347 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
20348 }
20349
20350 pm_array_node_close_set(array, &closing);
20351 return (pm_node_t *) array;
20352 }
20353 case PM_TOKEN_REGEXP_BEGIN: {
20354 pm_token_t opening = parser->current;
20355 parser_lex(parser);
20356
20357 if (match1(parser, PM_TOKEN_REGEXP_END)) {
20358 // If we get here, then we have an end immediately after a start. In
20359 // that case we'll create an empty content token and return an
20360 // uninterpolated regular expression.
20361 pm_token_t content = (pm_token_t) {
20362 .type = PM_TOKEN_STRING_CONTENT,
20363 .start = parser->previous.end,
20364 .end = parser->previous.end
20365 };
20366
20367 parser_lex(parser);
20368
20369 pm_node_t *node = (pm_node_t *) pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
20370 pm_node_flag_set(node, PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING);
20371
20372 return node;
20373 }
20374
20376
20377 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20378 // In this case we've hit string content so we know the regular
20379 // expression at least has something in it. We'll need to check if the
20380 // following token is the end (in which case we can return a plain
20381 // regular expression) or if it's not then it has interpolation.
20382 pm_string_t unescaped = parser->current_string;
20383 pm_token_t content = parser->current;
20384 bool ascii_only = parser->current_regular_expression_ascii_only;
20385 parser_lex(parser);
20386
20387 // If we hit an end, then we can create a regular expression
20388 // node without interpolation, which can be represented more
20389 // succinctly and more easily compiled.
20390 if (accept1(parser, PM_TOKEN_REGEXP_END)) {
20391 pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
20392
20393 // If we're not immediately followed by a =~, then we want
20394 // to parse all of the errors at this point. If it is
20395 // followed by a =~, then it will get parsed higher up while
20396 // parsing the named captures as well.
20397 if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
20398 parse_regular_expression_errors(parser, node);
20399 }
20400
20401 pm_node_flag_set((pm_node_t *) node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->base.flags));
20402 return (pm_node_t *) node;
20403 }
20404
20405 // If we get here, then we have interpolation so we'll need to create
20406 // a regular expression node with interpolation.
20407 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20408
20409 pm_token_t opening = not_provided(parser);
20410 pm_token_t closing = not_provided(parser);
20411 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
20412
20413 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
20414 // This is extremely strange, but the first string part of a
20415 // regular expression will always be tagged as binary if we
20416 // are in a US-ASCII file, no matter its contents.
20417 pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
20418 }
20419
20420 pm_interpolated_regular_expression_node_append(interpolated, part);
20421 } else {
20422 // If the first part of the body of the regular expression is not a
20423 // string content, then we have interpolation and we need to create an
20424 // interpolated regular expression node.
20425 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20426 }
20427
20428 // Now that we're here and we have interpolation, we'll parse all of the
20429 // parts into the list.
20430 pm_node_t *part;
20431 while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
20432 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20433 pm_interpolated_regular_expression_node_append(interpolated, part);
20434 }
20435 }
20436
20437 pm_token_t closing = parser->current;
20438 if (match1(parser, PM_TOKEN_EOF)) {
20439 pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
20440 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20441 } else {
20442 expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
20443 }
20444
20445 pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
20446 return (pm_node_t *) interpolated;
20447 }
20448 case PM_TOKEN_BACKTICK:
20449 case PM_TOKEN_PERCENT_LOWER_X: {
20450 parser_lex(parser);
20451 pm_token_t opening = parser->previous;
20452
20453 // When we get here, we don't know if this string is going to have
20454 // interpolation or not, even though it is allowed. Still, we want to be
20455 // able to return a string node without interpolation if we can since
20456 // it'll be faster.
20457 if (match1(parser, PM_TOKEN_STRING_END)) {
20458 // If we get here, then we have an end immediately after a start. In
20459 // that case we'll create an empty content token and return an
20460 // uninterpolated string.
20461 pm_token_t content = (pm_token_t) {
20462 .type = PM_TOKEN_STRING_CONTENT,
20463 .start = parser->previous.end,
20464 .end = parser->previous.end
20465 };
20466
20467 parser_lex(parser);
20468 return (pm_node_t *) pm_xstring_node_create(parser, &opening, &content, &parser->previous);
20469 }
20470
20472
20473 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20474 // In this case we've hit string content so we know the string
20475 // at least has something in it. We'll need to check if the
20476 // following token is the end (in which case we can return a
20477 // plain string) or if it's not then it has interpolation.
20478 pm_string_t unescaped = parser->current_string;
20479 pm_token_t content = parser->current;
20480 parser_lex(parser);
20481
20482 if (match1(parser, PM_TOKEN_STRING_END)) {
20483 pm_node_t *node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
20484 pm_node_flag_set(node, parse_unescaped_encoding(parser));
20485 parser_lex(parser);
20486 return node;
20487 }
20488
20489 // If we get here, then we have interpolation so we'll need to
20490 // create a string node with interpolation.
20491 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20492
20493 pm_token_t opening = not_provided(parser);
20494 pm_token_t closing = not_provided(parser);
20495
20496 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
20497 pm_node_flag_set(part, parse_unescaped_encoding(parser));
20498
20499 pm_interpolated_xstring_node_append(node, part);
20500 } else {
20501 // If the first part of the body of the string is not a string
20502 // content, then we have interpolation and we need to create an
20503 // interpolated string node.
20504 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20505 }
20506
20507 pm_node_t *part;
20508 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20509 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20510 pm_interpolated_xstring_node_append(node, part);
20511 }
20512 }
20513
20514 pm_token_t closing = parser->current;
20515 if (match1(parser, PM_TOKEN_EOF)) {
20516 pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
20517 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20518 } else {
20519 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
20520 }
20521 pm_interpolated_xstring_node_closing_set(node, &closing);
20522
20523 return (pm_node_t *) node;
20524 }
20525 case PM_TOKEN_USTAR: {
20526 parser_lex(parser);
20527
20528 // * operators at the beginning of expressions are only valid in the
20529 // context of a multiple assignment. We enforce that here. We'll
20530 // still lex past it though and create a missing node place.
20531 if (binding_power != PM_BINDING_POWER_STATEMENT) {
20532 pm_parser_err_prefix(parser, diag_id);
20533 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
20534 }
20535
20536 pm_token_t operator = parser->previous;
20537 pm_node_t *name = NULL;
20538
20539 if (token_begins_expression_p(parser->current.type)) {
20540 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
20541 }
20542
20543 pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &operator, name);
20544
20545 if (match1(parser, PM_TOKEN_COMMA)) {
20546 return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
20547 } else {
20548 return parse_target_validate(parser, splat, true);
20549 }
20550 }
20551 case PM_TOKEN_BANG: {
20552 if (binding_power > PM_BINDING_POWER_UNARY) {
20553 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20554 }
20555
20556 parser_lex(parser);
20557
20558 pm_token_t operator = parser->previous;
20559 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20560 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
20561
20562 pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
20563 return (pm_node_t *) node;
20564 }
20565 case PM_TOKEN_TILDE: {
20566 if (binding_power > PM_BINDING_POWER_UNARY) {
20567 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20568 }
20569 parser_lex(parser);
20570
20571 pm_token_t operator = parser->previous;
20572 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20573 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
20574
20575 return (pm_node_t *) node;
20576 }
20577 case PM_TOKEN_UMINUS: {
20578 if (binding_power > PM_BINDING_POWER_UNARY) {
20579 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20580 }
20581 parser_lex(parser);
20582
20583 pm_token_t operator = parser->previous;
20584 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20585 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
20586
20587 return (pm_node_t *) node;
20588 }
20589 case PM_TOKEN_UMINUS_NUM: {
20590 parser_lex(parser);
20591
20592 pm_token_t operator = parser->previous;
20593 pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20594
20595 if (accept1(parser, PM_TOKEN_STAR_STAR)) {
20596 pm_token_t exponent_operator = parser->previous;
20597 pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
20598 node = (pm_node_t *) pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0);
20599 node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
20600 } else {
20601 switch (PM_NODE_TYPE(node)) {
20602 case PM_INTEGER_NODE:
20603 case PM_FLOAT_NODE:
20604 case PM_RATIONAL_NODE:
20605 case PM_IMAGINARY_NODE:
20606 parse_negative_numeric(node);
20607 break;
20608 default:
20609 node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
20610 break;
20611 }
20612 }
20613
20614 return node;
20615 }
20616 case PM_TOKEN_MINUS_GREATER: {
20617 int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting;
20619
20620 size_t opening_newline_index = token_newline_index(parser);
20621 pm_accepts_block_stack_push(parser, true);
20622 parser_lex(parser);
20623
20624 pm_token_t operator = parser->previous;
20625 pm_parser_scope_push(parser, false);
20626
20627 pm_block_parameters_node_t *block_parameters;
20628
20629 switch (parser->current.type) {
20630 case PM_TOKEN_PARENTHESIS_LEFT: {
20631 pm_token_t opening = parser->current;
20632 parser_lex(parser);
20633
20634 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
20635 block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
20636 } else {
20637 block_parameters = parse_block_parameters(parser, false, &opening, true, true, (uint16_t) (depth + 1));
20638 }
20639
20640 accept1(parser, PM_TOKEN_NEWLINE);
20641 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
20642
20643 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
20644 break;
20645 }
20646 case PM_CASE_PARAMETER: {
20647 pm_accepts_block_stack_push(parser, false);
20648 pm_token_t opening = not_provided(parser);
20649 block_parameters = parse_block_parameters(parser, false, &opening, true, false, (uint16_t) (depth + 1));
20650 pm_accepts_block_stack_pop(parser);
20651 break;
20652 }
20653 default: {
20654 block_parameters = NULL;
20655 break;
20656 }
20657 }
20658
20659 pm_token_t opening;
20660 pm_node_t *body = NULL;
20661 parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
20662
20663 if (accept1(parser, PM_TOKEN_LAMBDA_BEGIN)) {
20664 opening = parser->previous;
20665
20666 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
20667 body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1));
20668 }
20669
20670 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20671 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE);
20672 } else {
20673 expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
20674 opening = parser->previous;
20675
20676 if (!match3(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20677 pm_accepts_block_stack_push(parser, true);
20678 body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1));
20679 pm_accepts_block_stack_pop(parser);
20680 }
20681
20682 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20683 assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
20684 body = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1));
20685 } else {
20686 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20687 }
20688
20689 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END);
20690 }
20691
20692 pm_constant_id_list_t locals;
20693 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
20694 pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &operator, &parser->previous);
20695
20696 pm_parser_scope_pop(parser);
20697 pm_accepts_block_stack_pop(parser);
20698
20699 return (pm_node_t *) pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body);
20700 }
20701 case PM_TOKEN_UPLUS: {
20702 if (binding_power > PM_BINDING_POWER_UNARY) {
20703 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20704 }
20705 parser_lex(parser);
20706
20707 pm_token_t operator = parser->previous;
20708 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20709 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
20710
20711 return (pm_node_t *) node;
20712 }
20713 case PM_TOKEN_STRING_BEGIN:
20714 return parse_strings(parser, NULL, accepts_label, (uint16_t) (depth + 1));
20715 case PM_TOKEN_SYMBOL_BEGIN: {
20716 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
20717 parser_lex(parser);
20718
20719 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END, (uint16_t) (depth + 1));
20720 }
20721 default: {
20722 pm_context_t recoverable = context_recoverable(parser, &parser->current);
20723
20724 if (recoverable != PM_CONTEXT_NONE) {
20725 parser->recovering = true;
20726
20727 // If the given error is not the generic one, then we'll add it
20728 // here because it will provide more context in addition to the
20729 // recoverable error that we will also add.
20730 if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
20731 pm_parser_err_prefix(parser, diag_id);
20732 }
20733
20734 // If we get here, then we are assuming this token is closing a
20735 // parent context, so we'll indicate that to the user so that
20736 // they know how we behaved.
20737 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
20738 } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
20739 // We're going to make a special case here, because "cannot
20740 // parse expression" is pretty generic, and we know here that we
20741 // have an unexpected token.
20742 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
20743 } else {
20744 pm_parser_err_prefix(parser, diag_id);
20745 }
20746
20747 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
20748 }
20749 }
20750}
20751
20761static pm_node_t *
20762parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20763 pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, false, diag_id, (uint16_t) (depth + 1));
20764
20765 // Contradicting binding powers, the right-hand-side value of the assignment
20766 // allows the `rescue` modifier.
20767 if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20768 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20769
20770 pm_token_t rescue = parser->current;
20771 parser_lex(parser);
20772
20773 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20774 context_pop(parser);
20775
20776 return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
20777 }
20778
20779 return value;
20780}
20781
20786static void
20787parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
20788 switch (PM_NODE_TYPE(node)) {
20789 case PM_BEGIN_NODE: {
20790 const pm_begin_node_t *cast = (const pm_begin_node_t *) node;
20791 if (cast->statements != NULL) parse_assignment_value_local(parser, (const pm_node_t *) cast->statements);
20792 break;
20793 }
20794 case PM_LOCAL_VARIABLE_WRITE_NODE: {
20796 pm_locals_read(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
20797 break;
20798 }
20799 case PM_PARENTHESES_NODE: {
20800 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
20801 if (cast->body != NULL) parse_assignment_value_local(parser, cast->body);
20802 break;
20803 }
20804 case PM_STATEMENTS_NODE: {
20805 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
20806 const pm_node_t *statement;
20807
20808 PM_NODE_LIST_FOREACH(&cast->body, index, statement) {
20809 parse_assignment_value_local(parser, statement);
20810 }
20811 break;
20812 }
20813 default:
20814 break;
20815 }
20816}
20817
20830static pm_node_t *
20831parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20832 bool permitted = true;
20833 if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
20834
20835 pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, diag_id, (uint16_t) (depth + 1));
20836 if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
20837
20838 parse_assignment_value_local(parser, value);
20839 bool single_value = true;
20840
20841 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
20842 single_value = false;
20843
20844 pm_token_t opening = not_provided(parser);
20845 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20846
20847 pm_array_node_elements_append(array, value);
20848 value = (pm_node_t *) array;
20849
20850 while (accept1(parser, PM_TOKEN_COMMA)) {
20851 pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
20852
20853 pm_array_node_elements_append(array, element);
20854 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
20855
20856 parse_assignment_value_local(parser, element);
20857 }
20858 }
20859
20860 // Contradicting binding powers, the right-hand-side value of the assignment
20861 // allows the `rescue` modifier.
20862 if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20863 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20864
20865 pm_token_t rescue = parser->current;
20866 parser_lex(parser);
20867
20868 bool accepts_command_call_inner = false;
20869
20870 // RHS can accept command call iff the value is a call with arguments
20871 // but without parenthesis.
20872 if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
20873 pm_call_node_t *call_node = (pm_call_node_t *) value;
20874 if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) {
20875 accepts_command_call_inner = true;
20876 }
20877 }
20878
20879 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20880 context_pop(parser);
20881
20882 return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
20883 }
20884
20885 return value;
20886}
20887
20895static void
20896parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
20897 if (call_node->arguments != NULL) {
20898 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
20899 pm_node_destroy(parser, (pm_node_t *) call_node->arguments);
20900 call_node->arguments = NULL;
20901 }
20902
20903 if (call_node->block != NULL) {
20904 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
20905 pm_node_destroy(parser, (pm_node_t *) call_node->block);
20906 call_node->block = NULL;
20907 }
20908}
20909
20934
20935static inline const uint8_t *
20936pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20937 cursor++;
20938
20939 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20940 uint8_t value = escape_hexadecimal_digit(*cursor);
20941 cursor++;
20942
20943 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20944 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(*cursor));
20945 cursor++;
20946 }
20947
20948 pm_buffer_append_byte(unescaped, value);
20949 } else {
20950 pm_buffer_append_string(unescaped, "\\x", 2);
20951 }
20952
20953 return cursor;
20954}
20955
20956static inline const uint8_t *
20957pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20958 uint8_t value = (uint8_t) (*cursor - '0');
20959 cursor++;
20960
20961 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20962 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20963 cursor++;
20964
20965 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20966 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20967 cursor++;
20968 }
20969 }
20970
20971 pm_buffer_append_byte(unescaped, value);
20972 return cursor;
20973}
20974
20975static inline const uint8_t *
20976pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20977 const uint8_t *start = cursor - 1;
20978 cursor++;
20979
20980 if (cursor >= end) {
20981 pm_buffer_append_string(unescaped, "\\u", 2);
20982 return cursor;
20983 }
20984
20985 if (*cursor != '{') {
20986 size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
20987 uint32_t value = escape_unicode(parser, cursor, length);
20988
20989 if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
20990 pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start));
20991 }
20992
20993 return cursor + length;
20994 }
20995
20996 cursor++;
20997 for (;;) {
20998 while (cursor < end && *cursor == ' ') cursor++;
20999
21000 if (cursor >= end) break;
21001 if (*cursor == '}') {
21002 cursor++;
21003 break;
21004 }
21005
21006 size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor);
21007 uint32_t value = escape_unicode(parser, cursor, length);
21008
21009 (void) pm_buffer_append_unicode_codepoint(unescaped, value);
21010 cursor += length;
21011 }
21012
21013 return cursor;
21014}
21015
21016static void
21017pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor) {
21018 const uint8_t *end = source + length;
21019 pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source));
21020
21021 for (;;) {
21022 if (++cursor >= end) {
21023 pm_buffer_append_byte(unescaped, '\\');
21024 return;
21025 }
21026
21027 switch (*cursor) {
21028 case 'x':
21029 cursor = pm_named_capture_escape_hex(unescaped, cursor, end);
21030 break;
21031 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
21032 cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
21033 break;
21034 case 'u':
21035 cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end);
21036 break;
21037 default:
21038 pm_buffer_append_byte(unescaped, '\\');
21039 break;
21040 }
21041
21042 const uint8_t *next_cursor = pm_memchr(cursor, '\\', (size_t) (end - cursor), parser->encoding_changed, parser->encoding);
21043 if (next_cursor == NULL) break;
21044
21045 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (next_cursor - cursor));
21046 cursor = next_cursor;
21047 }
21048
21049 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (end - cursor));
21050}
21051
21056static void
21057parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
21059
21060 pm_parser_t *parser = callback_data->parser;
21061 pm_call_node_t *call = callback_data->call;
21062 pm_constant_id_list_t *names = &callback_data->names;
21063
21064 const uint8_t *source = pm_string_source(capture);
21065 size_t length = pm_string_length(capture);
21066 pm_buffer_t unescaped = { 0 };
21067
21068 // First, we need to handle escapes within the name of the capture group.
21069 // This is because regular expressions have three different representations
21070 // in prism. The first is the plain source code. The second is the
21071 // representation that will be sent to the regular expression engine, which
21072 // is the value of the "unescaped" field. This is poorly named, because it
21073 // actually still contains escapes, just a subset of them that the regular
21074 // expression engine knows how to handle. The third representation is fully
21075 // unescaped, which is what we need.
21076 const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding);
21077 if (PRISM_UNLIKELY(cursor != NULL)) {
21078 pm_named_capture_escape(parser, &unescaped, source, length, cursor);
21079 source = (const uint8_t *) pm_buffer_value(&unescaped);
21080 length = pm_buffer_length(&unescaped);
21081 }
21082
21083 pm_location_t location;
21084 pm_constant_id_t name;
21085
21086 // If the name of the capture group isn't a valid identifier, we do
21087 // not add it to the local table.
21088 if (!pm_slice_is_valid_local(parser, source, source + length)) {
21089 pm_buffer_free(&unescaped);
21090 return;
21091 }
21092
21093 if (callback_data->shared) {
21094 // If the unescaped string is a slice of the source, then we can
21095 // copy the names directly. The pointers will line up.
21096 location = (pm_location_t) { .start = source, .end = source + length };
21097 name = pm_parser_constant_id_location(parser, location.start, location.end);
21098 } else {
21099 // Otherwise, the name is a slice of the malloc-ed owned string,
21100 // in which case we need to copy it out into a new string.
21101 location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
21102
21103 void *memory = xmalloc(length);
21104 if (memory == NULL) abort();
21105
21106 memcpy(memory, source, length);
21107 name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
21108 }
21109
21110 // Add this name to the list of constants if it is valid, not duplicated,
21111 // and not a keyword.
21112 if (name != 0 && !pm_constant_id_list_includes(names, name)) {
21113 pm_constant_id_list_append(names, name);
21114
21115 int depth;
21116 if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
21117 // If the local is not already a local but it is a keyword, then we
21118 // do not want to add a capture for this.
21119 if (pm_local_is_keyword((const char *) source, length)) {
21120 pm_buffer_free(&unescaped);
21121 return;
21122 }
21123
21124 // If the identifier is not already a local, then we will add it to
21125 // the local table.
21126 pm_parser_local_add(parser, name, location.start, location.end, 0);
21127 }
21128
21129 // Here we lazily create the MatchWriteNode since we know we're
21130 // about to add a target.
21131 if (callback_data->match == NULL) {
21132 callback_data->match = pm_match_write_node_create(parser, call);
21133 }
21134
21135 // Next, create the local variable target and add it to the list of
21136 // targets for the match.
21137 pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
21138 pm_node_list_append(&callback_data->match->targets, target);
21139 }
21140
21141 pm_buffer_free(&unescaped);
21142}
21143
21148static pm_node_t *
21149parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
21151 .parser = parser,
21152 .call = call,
21153 .names = { 0 },
21154 .shared = content->type == PM_STRING_SHARED
21155 };
21156
21158 .parser = parser,
21159 .start = call->receiver->location.start,
21160 .end = call->receiver->location.end,
21161 .shared = content->type == PM_STRING_SHARED
21162 };
21163
21164 pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), extended_mode, parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
21165 pm_constant_id_list_free(&callback_data.names);
21166
21167 if (callback_data.match != NULL) {
21168 return (pm_node_t *) callback_data.match;
21169 } else {
21170 return (pm_node_t *) call;
21171 }
21172}
21173
21174static inline pm_node_t *
21175parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, uint16_t depth) {
21176 pm_token_t token = parser->current;
21177
21178 switch (token.type) {
21179 case PM_TOKEN_EQUAL: {
21180 switch (PM_NODE_TYPE(node)) {
21181 case PM_CALL_NODE: {
21182 // If we have no arguments to the call node and we need this
21183 // to be a target then this is either a method call or a
21184 // local variable write. This _must_ happen before the value
21185 // is parsed because it could be referenced in the value.
21186 pm_call_node_t *call_node = (pm_call_node_t *) node;
21187 if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21188 pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
21189 }
21190 }
21192 case PM_CASE_WRITABLE: {
21193 parser_lex(parser);
21194 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21195
21196 if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
21197 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
21198 }
21199
21200 return parse_write(parser, node, &token, value);
21201 }
21202 case PM_SPLAT_NODE: {
21203 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
21204 pm_multi_target_node_targets_append(parser, multi_target, node);
21205
21206 parser_lex(parser);
21207 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21208 return parse_write(parser, (pm_node_t *) multi_target, &token, value);
21209 }
21210 case PM_SOURCE_ENCODING_NODE:
21211 case PM_FALSE_NODE:
21212 case PM_SOURCE_FILE_NODE:
21213 case PM_SOURCE_LINE_NODE:
21214 case PM_NIL_NODE:
21215 case PM_SELF_NODE:
21216 case PM_TRUE_NODE: {
21217 // In these special cases, we have specific error messages
21218 // and we will replace them with local variable writes.
21219 parser_lex(parser);
21220 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21221 return parse_unwriteable_write(parser, node, &token, value);
21222 }
21223 default:
21224 // In this case we have an = sign, but we don't know what
21225 // it's for. We need to treat it as an error. We'll mark it
21226 // as an error and skip past it.
21227 parser_lex(parser);
21228 pm_parser_err_token(parser, &token, PM_ERR_EXPRESSION_NOT_WRITABLE);
21229 return node;
21230 }
21231 }
21232 case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL: {
21233 switch (PM_NODE_TYPE(node)) {
21234 case PM_BACK_REFERENCE_READ_NODE:
21235 case PM_NUMBERED_REFERENCE_READ_NODE:
21236 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21238 case PM_GLOBAL_VARIABLE_READ_NODE: {
21239 parser_lex(parser);
21240
21241 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21242 pm_node_t *result = (pm_node_t *) pm_global_variable_and_write_node_create(parser, node, &token, value);
21243
21244 pm_node_destroy(parser, node);
21245 return result;
21246 }
21247 case PM_CLASS_VARIABLE_READ_NODE: {
21248 parser_lex(parser);
21249
21250 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21251 pm_node_t *result = (pm_node_t *) pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21252
21253 pm_node_destroy(parser, node);
21254 return result;
21255 }
21256 case PM_CONSTANT_PATH_NODE: {
21257 parser_lex(parser);
21258
21259 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21260 pm_node_t *write = (pm_node_t *) pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21261
21262 return parse_shareable_constant_write(parser, write);
21263 }
21264 case PM_CONSTANT_READ_NODE: {
21265 parser_lex(parser);
21266
21267 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21268 pm_node_t *write = (pm_node_t *) pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21269
21270 pm_node_destroy(parser, node);
21271 return parse_shareable_constant_write(parser, write);
21272 }
21273 case PM_INSTANCE_VARIABLE_READ_NODE: {
21274 parser_lex(parser);
21275
21276 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21277 pm_node_t *result = (pm_node_t *) pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21278
21279 pm_node_destroy(parser, node);
21280 return result;
21281 }
21282 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21283 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21284 parser_lex(parser);
21285
21286 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21287 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0);
21288
21289 parse_target_implicit_parameter(parser, node);
21290 pm_node_destroy(parser, node);
21291 return result;
21292 }
21293 case PM_LOCAL_VARIABLE_READ_NODE: {
21294 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21295 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21296 parse_target_implicit_parameter(parser, node);
21297 }
21298
21300 parser_lex(parser);
21301
21302 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21303 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21304
21305 pm_node_destroy(parser, node);
21306 return result;
21307 }
21308 case PM_CALL_NODE: {
21309 pm_call_node_t *cast = (pm_call_node_t *) node;
21310
21311 // If we have a vcall (a method with no arguments and no
21312 // receiver that could have been a local variable) then we
21313 // will transform it into a local variable write.
21314 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21315 pm_location_t *message_loc = &cast->message_loc;
21316 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21317
21318 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21319 parser_lex(parser);
21320
21321 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21322 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21323
21324 pm_node_destroy(parser, (pm_node_t *) cast);
21325 return result;
21326 }
21327
21328 // Move past the token here so that we have already added
21329 // the local variable by this point.
21330 parser_lex(parser);
21331
21332 // If there is no call operator and the message is "[]" then
21333 // this is an aref expression, and we can transform it into
21334 // an aset expression.
21335 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21336 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21337 return (pm_node_t *) pm_index_and_write_node_create(parser, cast, &token, value);
21338 }
21339
21340 // If this node cannot be writable, then we have an error.
21341 if (pm_call_node_writable_p(parser, cast)) {
21342 parse_write_name(parser, &cast->name);
21343 } else {
21344 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21345 }
21346
21347 parse_call_operator_write(parser, cast, &token);
21348 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21349 return (pm_node_t *) pm_call_and_write_node_create(parser, cast, &token, value);
21350 }
21351 case PM_MULTI_WRITE_NODE: {
21352 parser_lex(parser);
21353 pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
21354 return node;
21355 }
21356 default:
21357 parser_lex(parser);
21358
21359 // In this case we have an &&= sign, but we don't know what it's for.
21360 // We need to treat it as an error. For now, we'll mark it as an error
21361 // and just skip right past it.
21362 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
21363 return node;
21364 }
21365 }
21366 case PM_TOKEN_PIPE_PIPE_EQUAL: {
21367 switch (PM_NODE_TYPE(node)) {
21368 case PM_BACK_REFERENCE_READ_NODE:
21369 case PM_NUMBERED_REFERENCE_READ_NODE:
21370 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21372 case PM_GLOBAL_VARIABLE_READ_NODE: {
21373 parser_lex(parser);
21374
21375 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21376 pm_node_t *result = (pm_node_t *) pm_global_variable_or_write_node_create(parser, node, &token, value);
21377
21378 pm_node_destroy(parser, node);
21379 return result;
21380 }
21381 case PM_CLASS_VARIABLE_READ_NODE: {
21382 parser_lex(parser);
21383
21384 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21385 pm_node_t *result = (pm_node_t *) pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21386
21387 pm_node_destroy(parser, node);
21388 return result;
21389 }
21390 case PM_CONSTANT_PATH_NODE: {
21391 parser_lex(parser);
21392
21393 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21394 pm_node_t *write = (pm_node_t *) pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21395
21396 return parse_shareable_constant_write(parser, write);
21397 }
21398 case PM_CONSTANT_READ_NODE: {
21399 parser_lex(parser);
21400
21401 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21402 pm_node_t *write = (pm_node_t *) pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21403
21404 pm_node_destroy(parser, node);
21405 return parse_shareable_constant_write(parser, write);
21406 }
21407 case PM_INSTANCE_VARIABLE_READ_NODE: {
21408 parser_lex(parser);
21409
21410 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21411 pm_node_t *result = (pm_node_t *) pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21412
21413 pm_node_destroy(parser, node);
21414 return result;
21415 }
21416 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21417 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21418 parser_lex(parser);
21419
21420 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21421 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0);
21422
21423 parse_target_implicit_parameter(parser, node);
21424 pm_node_destroy(parser, node);
21425 return result;
21426 }
21427 case PM_LOCAL_VARIABLE_READ_NODE: {
21428 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21429 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21430 parse_target_implicit_parameter(parser, node);
21431 }
21432
21434 parser_lex(parser);
21435
21436 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21437 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21438
21439 pm_node_destroy(parser, node);
21440 return result;
21441 }
21442 case PM_CALL_NODE: {
21443 pm_call_node_t *cast = (pm_call_node_t *) node;
21444
21445 // If we have a vcall (a method with no arguments and no
21446 // receiver that could have been a local variable) then we
21447 // will transform it into a local variable write.
21448 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21449 pm_location_t *message_loc = &cast->message_loc;
21450 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21451
21452 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21453 parser_lex(parser);
21454
21455 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21456 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21457
21458 pm_node_destroy(parser, (pm_node_t *) cast);
21459 return result;
21460 }
21461
21462 // Move past the token here so that we have already added
21463 // the local variable by this point.
21464 parser_lex(parser);
21465
21466 // If there is no call operator and the message is "[]" then
21467 // this is an aref expression, and we can transform it into
21468 // an aset expression.
21469 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21470 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21471 return (pm_node_t *) pm_index_or_write_node_create(parser, cast, &token, value);
21472 }
21473
21474 // If this node cannot be writable, then we have an error.
21475 if (pm_call_node_writable_p(parser, cast)) {
21476 parse_write_name(parser, &cast->name);
21477 } else {
21478 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21479 }
21480
21481 parse_call_operator_write(parser, cast, &token);
21482 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21483 return (pm_node_t *) pm_call_or_write_node_create(parser, cast, &token, value);
21484 }
21485 case PM_MULTI_WRITE_NODE: {
21486 parser_lex(parser);
21487 pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
21488 return node;
21489 }
21490 default:
21491 parser_lex(parser);
21492
21493 // In this case we have an ||= sign, but we don't know what it's for.
21494 // We need to treat it as an error. For now, we'll mark it as an error
21495 // and just skip right past it.
21496 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
21497 return node;
21498 }
21499 }
21500 case PM_TOKEN_AMPERSAND_EQUAL:
21501 case PM_TOKEN_CARET_EQUAL:
21502 case PM_TOKEN_GREATER_GREATER_EQUAL:
21503 case PM_TOKEN_LESS_LESS_EQUAL:
21504 case PM_TOKEN_MINUS_EQUAL:
21505 case PM_TOKEN_PERCENT_EQUAL:
21506 case PM_TOKEN_PIPE_EQUAL:
21507 case PM_TOKEN_PLUS_EQUAL:
21508 case PM_TOKEN_SLASH_EQUAL:
21509 case PM_TOKEN_STAR_EQUAL:
21510 case PM_TOKEN_STAR_STAR_EQUAL: {
21511 switch (PM_NODE_TYPE(node)) {
21512 case PM_BACK_REFERENCE_READ_NODE:
21513 case PM_NUMBERED_REFERENCE_READ_NODE:
21514 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21516 case PM_GLOBAL_VARIABLE_READ_NODE: {
21517 parser_lex(parser);
21518
21519 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21520 pm_node_t *result = (pm_node_t *) pm_global_variable_operator_write_node_create(parser, node, &token, value);
21521
21522 pm_node_destroy(parser, node);
21523 return result;
21524 }
21525 case PM_CLASS_VARIABLE_READ_NODE: {
21526 parser_lex(parser);
21527
21528 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21529 pm_node_t *result = (pm_node_t *) pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21530
21531 pm_node_destroy(parser, node);
21532 return result;
21533 }
21534 case PM_CONSTANT_PATH_NODE: {
21535 parser_lex(parser);
21536
21537 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21538 pm_node_t *write = (pm_node_t *) pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21539
21540 return parse_shareable_constant_write(parser, write);
21541 }
21542 case PM_CONSTANT_READ_NODE: {
21543 parser_lex(parser);
21544
21545 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21546 pm_node_t *write = (pm_node_t *) pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21547
21548 pm_node_destroy(parser, node);
21549 return parse_shareable_constant_write(parser, write);
21550 }
21551 case PM_INSTANCE_VARIABLE_READ_NODE: {
21552 parser_lex(parser);
21553
21554 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21555 pm_node_t *result = (pm_node_t *) pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21556
21557 pm_node_destroy(parser, node);
21558 return result;
21559 }
21560 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21561 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21562 parser_lex(parser);
21563
21564 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21565 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0);
21566
21567 parse_target_implicit_parameter(parser, node);
21568 pm_node_destroy(parser, node);
21569 return result;
21570 }
21571 case PM_LOCAL_VARIABLE_READ_NODE: {
21572 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21573 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21574 parse_target_implicit_parameter(parser, node);
21575 }
21576
21578 parser_lex(parser);
21579
21580 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21581 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21582
21583 pm_node_destroy(parser, node);
21584 return result;
21585 }
21586 case PM_CALL_NODE: {
21587 parser_lex(parser);
21588 pm_call_node_t *cast = (pm_call_node_t *) node;
21589
21590 // If we have a vcall (a method with no arguments and no
21591 // receiver that could have been a local variable) then we
21592 // will transform it into a local variable write.
21593 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21594 pm_location_t *message_loc = &cast->message_loc;
21595 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21596
21597 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21598 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21599 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21600
21601 pm_node_destroy(parser, (pm_node_t *) cast);
21602 return result;
21603 }
21604
21605 // If there is no call operator and the message is "[]" then
21606 // this is an aref expression, and we can transform it into
21607 // an aset expression.
21608 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21609 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21610 return (pm_node_t *) pm_index_operator_write_node_create(parser, cast, &token, value);
21611 }
21612
21613 // If this node cannot be writable, then we have an error.
21614 if (pm_call_node_writable_p(parser, cast)) {
21615 parse_write_name(parser, &cast->name);
21616 } else {
21617 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21618 }
21619
21620 parse_call_operator_write(parser, cast, &token);
21621 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21622 return (pm_node_t *) pm_call_operator_write_node_create(parser, cast, &token, value);
21623 }
21624 case PM_MULTI_WRITE_NODE: {
21625 parser_lex(parser);
21626 pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
21627 return node;
21628 }
21629 default:
21630 parser_lex(parser);
21631
21632 // In this case we have an operator but we don't know what it's for.
21633 // We need to treat it as an error. For now, we'll mark it as an error
21634 // and just skip right past it.
21635 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
21636 return node;
21637 }
21638 }
21639 case PM_TOKEN_AMPERSAND_AMPERSAND:
21640 case PM_TOKEN_KEYWORD_AND: {
21641 parser_lex(parser);
21642
21643 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21644 return (pm_node_t *) pm_and_node_create(parser, node, &token, right);
21645 }
21646 case PM_TOKEN_KEYWORD_OR:
21647 case PM_TOKEN_PIPE_PIPE: {
21648 parser_lex(parser);
21649
21650 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21651 return (pm_node_t *) pm_or_node_create(parser, node, &token, right);
21652 }
21653 case PM_TOKEN_EQUAL_TILDE: {
21654 // Note that we _must_ parse the value before adding the local
21655 // variables in order to properly mirror the behavior of Ruby. For
21656 // example,
21657 //
21658 // /(?<foo>bar)/ =~ foo
21659 //
21660 // In this case, `foo` should be a method call and not a local yet.
21661 parser_lex(parser);
21662 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21663
21664 // By default, we're going to create a call node and then return it.
21665 pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
21666 pm_node_t *result = (pm_node_t *) call;
21667
21668 // If the receiver of this =~ is a regular expression node, then we
21669 // need to introduce local variables for it based on its named
21670 // capture groups.
21671 if (PM_NODE_TYPE_P(node, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE)) {
21672 // It's possible to have an interpolated regular expression node
21673 // that only contains strings. This is because it can be split
21674 // up by a heredoc. In this case we need to concat the unescaped
21675 // strings together and then parse them as a regular expression.
21677
21678 bool interpolated = false;
21679 size_t total_length = 0;
21680
21681 pm_node_t *part;
21682 PM_NODE_LIST_FOREACH(parts, index, part) {
21683 if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
21684 total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
21685 } else {
21686 interpolated = true;
21687 break;
21688 }
21689 }
21690
21691 if (!interpolated && total_length > 0) {
21692 void *memory = xmalloc(total_length);
21693 if (!memory) abort();
21694
21695 uint8_t *cursor = memory;
21696 PM_NODE_LIST_FOREACH(parts, index, part) {
21697 pm_string_t *unescaped = &((pm_string_node_t *) part)->unescaped;
21698 size_t length = pm_string_length(unescaped);
21699
21700 memcpy(cursor, pm_string_source(unescaped), length);
21701 cursor += length;
21702 }
21703
21704 pm_string_t owned;
21705 pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
21706
21707 result = parse_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21708 pm_string_free(&owned);
21709 }
21710 } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
21711 // If we have a regular expression node, then we can just parse
21712 // the named captures directly off the unescaped string.
21713 const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
21714 result = parse_regular_expression_named_captures(parser, content, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21715 }
21716
21717 return result;
21718 }
21719 case PM_TOKEN_UAMPERSAND:
21720 case PM_TOKEN_USTAR:
21721 case PM_TOKEN_USTAR_STAR:
21722 // The only times this will occur are when we are in an error state,
21723 // but we'll put them in here so that errors can propagate.
21724 case PM_TOKEN_BANG_EQUAL:
21725 case PM_TOKEN_BANG_TILDE:
21726 case PM_TOKEN_EQUAL_EQUAL:
21727 case PM_TOKEN_EQUAL_EQUAL_EQUAL:
21728 case PM_TOKEN_LESS_EQUAL_GREATER:
21729 case PM_TOKEN_CARET:
21730 case PM_TOKEN_PIPE:
21731 case PM_TOKEN_AMPERSAND:
21732 case PM_TOKEN_GREATER_GREATER:
21733 case PM_TOKEN_LESS_LESS:
21734 case PM_TOKEN_MINUS:
21735 case PM_TOKEN_PLUS:
21736 case PM_TOKEN_PERCENT:
21737 case PM_TOKEN_SLASH:
21738 case PM_TOKEN_STAR:
21739 case PM_TOKEN_STAR_STAR: {
21740 parser_lex(parser);
21741 pm_token_t operator = parser->previous;
21742 switch (PM_NODE_TYPE(node)) {
21743 case PM_RESCUE_MODIFIER_NODE: {
21745 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21746 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21747 }
21748 break;
21749 }
21750 case PM_AND_NODE: {
21751 pm_and_node_t *cast = (pm_and_node_t *) node;
21752 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21753 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21754 }
21755 break;
21756 }
21757 case PM_OR_NODE: {
21758 pm_or_node_t *cast = (pm_or_node_t *) node;
21759 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21760 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21761 }
21762 break;
21763 }
21764 default:
21765 break;
21766 }
21767
21768 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21769 return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, 0);
21770 }
21771 case PM_TOKEN_GREATER:
21772 case PM_TOKEN_GREATER_EQUAL:
21773 case PM_TOKEN_LESS:
21774 case PM_TOKEN_LESS_EQUAL: {
21775 if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
21776 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
21777 }
21778
21779 parser_lex(parser);
21780 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21781 return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON);
21782 }
21783 case PM_TOKEN_AMPERSAND_DOT:
21784 case PM_TOKEN_DOT: {
21785 parser_lex(parser);
21786 pm_token_t operator = parser->previous;
21787 pm_arguments_t arguments = { 0 };
21788
21789 // This if statement handles the foo.() syntax.
21790 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
21791 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21792 return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
21793 }
21794
21795 switch (PM_NODE_TYPE(node)) {
21796 case PM_RESCUE_MODIFIER_NODE: {
21798 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21799 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21800 }
21801 break;
21802 }
21803 case PM_AND_NODE: {
21804 pm_and_node_t *cast = (pm_and_node_t *) node;
21805 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21806 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21807 }
21808 break;
21809 }
21810 case PM_OR_NODE: {
21811 pm_or_node_t *cast = (pm_or_node_t *) node;
21812 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21813 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21814 }
21815 break;
21816 }
21817 default:
21818 break;
21819 }
21820
21821 pm_token_t message;
21822
21823 switch (parser->current.type) {
21824 case PM_CASE_OPERATOR:
21825 case PM_CASE_KEYWORD:
21826 case PM_TOKEN_CONSTANT:
21827 case PM_TOKEN_IDENTIFIER:
21828 case PM_TOKEN_METHOD_NAME: {
21829 parser_lex(parser);
21830 message = parser->previous;
21831 break;
21832 }
21833 default: {
21834 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type));
21835 message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21836 }
21837 }
21838
21839 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21840 pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
21841
21842 if (
21843 (previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
21844 arguments.arguments == NULL &&
21845 arguments.opening_loc.start == NULL &&
21846 match1(parser, PM_TOKEN_COMMA)
21847 ) {
21848 return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21849 } else {
21850 return (pm_node_t *) call;
21851 }
21852 }
21853 case PM_TOKEN_DOT_DOT:
21854 case PM_TOKEN_DOT_DOT_DOT: {
21855 parser_lex(parser);
21856
21857 pm_node_t *right = NULL;
21858 if (token_begins_expression_p(parser->current.type)) {
21859 right = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21860 }
21861
21862 return (pm_node_t *) pm_range_node_create(parser, node, &token, right);
21863 }
21864 case PM_TOKEN_KEYWORD_IF_MODIFIER: {
21865 pm_token_t keyword = parser->current;
21866 parser_lex(parser);
21867
21868 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
21869 return (pm_node_t *) pm_if_node_modifier_create(parser, node, &keyword, predicate);
21870 }
21871 case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: {
21872 pm_token_t keyword = parser->current;
21873 parser_lex(parser);
21874
21875 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
21876 return (pm_node_t *) pm_unless_node_modifier_create(parser, node, &keyword, predicate);
21877 }
21878 case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: {
21879 parser_lex(parser);
21880 pm_statements_node_t *statements = pm_statements_node_create(parser);
21881 pm_statements_node_body_append(parser, statements, node, true);
21882
21883 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
21884 return (pm_node_t *) pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
21885 }
21886 case PM_TOKEN_KEYWORD_WHILE_MODIFIER: {
21887 parser_lex(parser);
21888 pm_statements_node_t *statements = pm_statements_node_create(parser);
21889 pm_statements_node_body_append(parser, statements, node, true);
21890
21891 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
21892 return (pm_node_t *) pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
21893 }
21894 case PM_TOKEN_QUESTION_MARK: {
21895 context_push(parser, PM_CONTEXT_TERNARY);
21896 pm_node_list_t current_block_exits = { 0 };
21897 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21898
21899 pm_token_t qmark = parser->current;
21900 parser_lex(parser);
21901
21902 pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
21903
21904 if (parser->recovering) {
21905 // If parsing the true expression of this ternary resulted in a syntax
21906 // error that we can recover from, then we're going to put missing nodes
21907 // and tokens into the remaining places. We want to be sure to do this
21908 // before the `expect` function call to make sure it doesn't
21909 // accidentally move past a ':' token that occurs after the syntax
21910 // error.
21911 pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21912 pm_node_t *false_expression = (pm_node_t *) pm_missing_node_create(parser, colon.start, colon.end);
21913
21914 context_pop(parser);
21915 pop_block_exits(parser, previous_block_exits);
21916 pm_node_list_free(&current_block_exits);
21917
21918 return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
21919 }
21920
21921 accept1(parser, PM_TOKEN_NEWLINE);
21922 expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
21923
21924 pm_token_t colon = parser->previous;
21925 pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
21926
21927 context_pop(parser);
21928 pop_block_exits(parser, previous_block_exits);
21929 pm_node_list_free(&current_block_exits);
21930
21931 return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
21932 }
21933 case PM_TOKEN_COLON_COLON: {
21934 parser_lex(parser);
21935 pm_token_t delimiter = parser->previous;
21936
21937 switch (parser->current.type) {
21938 case PM_TOKEN_CONSTANT: {
21939 parser_lex(parser);
21940 pm_node_t *path;
21941
21942 if (
21943 (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
21944 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
21945 ) {
21946 // If we have a constant immediately following a '::' operator, then
21947 // this can either be a constant path or a method call, depending on
21948 // what follows the constant.
21949 //
21950 // If we have parentheses, then this is a method call. That would
21951 // look like Foo::Bar().
21952 pm_token_t message = parser->previous;
21953 pm_arguments_t arguments = { 0 };
21954
21955 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21956 path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21957 } else {
21958 // Otherwise, this is a constant path. That would look like Foo::Bar.
21959 path = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
21960 }
21961
21962 // If this is followed by a comma then it is a multiple assignment.
21963 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21964 return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21965 }
21966
21967 return path;
21968 }
21969 case PM_CASE_OPERATOR:
21970 case PM_CASE_KEYWORD:
21971 case PM_TOKEN_IDENTIFIER:
21972 case PM_TOKEN_METHOD_NAME: {
21973 parser_lex(parser);
21974 pm_token_t message = parser->previous;
21975
21976 // If we have an identifier following a '::' operator, then it is for
21977 // sure a method call.
21978 pm_arguments_t arguments = { 0 };
21979 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21980 pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21981
21982 // If this is followed by a comma then it is a multiple assignment.
21983 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21984 return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21985 }
21986
21987 return (pm_node_t *) call;
21988 }
21989 case PM_TOKEN_PARENTHESIS_LEFT: {
21990 // If we have a parenthesis following a '::' operator, then it is the
21991 // method call shorthand. That would look like Foo::(bar).
21992 pm_arguments_t arguments = { 0 };
21993 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21994
21995 return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
21996 }
21997 default: {
21998 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
21999 return (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
22000 }
22001 }
22002 }
22003 case PM_TOKEN_KEYWORD_RESCUE_MODIFIER: {
22004 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
22005 parser_lex(parser);
22006 accept1(parser, PM_TOKEN_NEWLINE);
22007
22008 pm_node_t *value = parse_expression(parser, binding_power, true, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
22009 context_pop(parser);
22010
22011 return (pm_node_t *) pm_rescue_modifier_node_create(parser, node, &token, value);
22012 }
22013 case PM_TOKEN_BRACKET_LEFT: {
22014 parser_lex(parser);
22015
22016 pm_arguments_t arguments = { 0 };
22017 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
22018
22019 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
22020 pm_accepts_block_stack_push(parser, true);
22021 parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint16_t) (depth + 1));
22022 pm_accepts_block_stack_pop(parser);
22023 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
22024 }
22025
22026 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
22027
22028 // If we have a comma after the closing bracket then this is a multiple
22029 // assignment and we should parse the targets.
22030 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
22031 pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
22032 return parse_targets_validate(parser, (pm_node_t *) aref, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
22033 }
22034
22035 // If we're at the end of the arguments, we can now check if there is a
22036 // block node that starts with a {. If there is, then we can parse it and
22037 // add it to the arguments.
22038 pm_block_node_t *block = NULL;
22039 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
22040 block = parse_block(parser, (uint16_t) (depth + 1));
22041 pm_arguments_validate_block(parser, &arguments, block);
22042 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
22043 block = parse_block(parser, (uint16_t) (depth + 1));
22044 }
22045
22046 if (block != NULL) {
22047 if (arguments.block != NULL) {
22048 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_AFTER_BLOCK);
22049 if (arguments.arguments == NULL) {
22050 arguments.arguments = pm_arguments_node_create(parser);
22051 }
22052 pm_arguments_node_arguments_append(arguments.arguments, arguments.block);
22053 }
22054
22055 arguments.block = (pm_node_t *) block;
22056 }
22057
22058 return (pm_node_t *) pm_call_node_aref_create(parser, node, &arguments);
22059 }
22060 case PM_TOKEN_KEYWORD_IN: {
22061 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
22062 parser->pattern_matching_newlines = true;
22063
22064 pm_token_t operator = parser->current;
22065 parser->command_start = false;
22066 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
22067 parser_lex(parser);
22068
22069 pm_constant_id_list_t captures = { 0 };
22070 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
22071
22072 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
22073 pm_constant_id_list_free(&captures);
22074
22075 return (pm_node_t *) pm_match_predicate_node_create(parser, node, pattern, &operator);
22076 }
22077 case PM_TOKEN_EQUAL_GREATER: {
22078 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
22079 parser->pattern_matching_newlines = true;
22080
22081 pm_token_t operator = parser->current;
22082 parser->command_start = false;
22083 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
22084 parser_lex(parser);
22085
22086 pm_constant_id_list_t captures = { 0 };
22087 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
22088
22089 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
22090 pm_constant_id_list_free(&captures);
22091
22092 return (pm_node_t *) pm_match_required_node_create(parser, node, pattern, &operator);
22093 }
22094 default:
22095 assert(false && "unreachable");
22096 return NULL;
22097 }
22098}
22099
22100#undef PM_PARSE_PATTERN_SINGLE
22101#undef PM_PARSE_PATTERN_TOP
22102#undef PM_PARSE_PATTERN_MULTI
22103
22108static inline bool
22109pm_call_node_command_p(const pm_call_node_t *node) {
22110 return (
22111 (node->opening_loc.start == NULL) &&
22112 (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
22113 (node->arguments != NULL || node->block != NULL)
22114 );
22115}
22116
22125static pm_node_t *
22126parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
22127 if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
22128 pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
22129 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
22130 }
22131
22132 pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
22133
22134 switch (PM_NODE_TYPE(node)) {
22135 case PM_MISSING_NODE:
22136 // If we found a syntax error, then the type of node returned by
22137 // parse_expression_prefix is going to be a missing node.
22138 return node;
22139 case PM_PRE_EXECUTION_NODE:
22140 case PM_POST_EXECUTION_NODE:
22141 case PM_ALIAS_GLOBAL_VARIABLE_NODE:
22142 case PM_ALIAS_METHOD_NODE:
22143 case PM_MULTI_WRITE_NODE:
22144 case PM_UNDEF_NODE:
22145 // These expressions are statements, and cannot be followed by
22146 // operators (except modifiers).
22147 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22148 return node;
22149 }
22150 break;
22151 case PM_CALL_NODE:
22152 // If we have a call node, then we need to check if it looks like a
22153 // method call without parentheses that contains arguments. If it
22154 // does, then it has different rules for parsing infix operators,
22155 // namely that it only accepts composition (and/or) and modifiers
22156 // (if/unless/etc.).
22157 if ((pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_COMPOSITION) && pm_call_node_command_p((pm_call_node_t *) node)) {
22158 return node;
22159 }
22160 break;
22161 case PM_SYMBOL_NODE:
22162 // If we have a symbol node that is being parsed as a label, then we
22163 // need to immediately return, because there should never be an
22164 // infix operator following this node.
22165 if (pm_symbol_node_label_p(node)) {
22166 return node;
22167 }
22168 break;
22169 default:
22170 break;
22171 }
22172
22173 // Otherwise we'll look and see if the next token can be parsed as an infix
22174 // operator. If it can, then we'll parse it using parse_expression_infix.
22175 pm_binding_powers_t current_binding_powers;
22176 pm_token_type_t current_token_type;
22177
22178 while (
22179 current_token_type = parser->current.type,
22180 current_binding_powers = pm_binding_powers[current_token_type],
22181 binding_power <= current_binding_powers.left &&
22182 current_binding_powers.binary
22183 ) {
22184 node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1));
22185
22186 switch (PM_NODE_TYPE(node)) {
22187 case PM_MULTI_WRITE_NODE:
22188 // Multi-write nodes are statements, and cannot be followed by
22189 // operators except modifiers.
22190 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22191 return node;
22192 }
22193 break;
22194 case PM_CLASS_VARIABLE_WRITE_NODE:
22195 case PM_CONSTANT_PATH_WRITE_NODE:
22196 case PM_CONSTANT_WRITE_NODE:
22197 case PM_GLOBAL_VARIABLE_WRITE_NODE:
22198 case PM_INSTANCE_VARIABLE_WRITE_NODE:
22199 case PM_LOCAL_VARIABLE_WRITE_NODE:
22200 // These expressions are statements, by virtue of the right-hand
22201 // side of their write being an implicit array.
22202 if (PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22203 return node;
22204 }
22205 break;
22206 case PM_CALL_NODE:
22207 // These expressions are also statements, by virtue of the
22208 // right-hand side of the expression (i.e., the last argument to
22209 // the call node) being an implicit array.
22210 if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22211 return node;
22212 }
22213 break;
22214 default:
22215 break;
22216 }
22217
22218 // If the operator is nonassoc and we should not be able to parse the
22219 // upcoming infix operator, break.
22220 if (current_binding_powers.nonassoc) {
22221 // If this is a non-assoc operator and we are about to parse the
22222 // exact same operator, then we need to add an error.
22223 if (match1(parser, current_token_type)) {
22224 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
22225 break;
22226 }
22227
22228 // If this is an endless range, then we need to reject a couple of
22229 // additional operators because it violates the normal operator
22230 // precedence rules. Those patterns are:
22231 //
22232 // 1.. & 2
22233 // 1.. * 2
22234 //
22235 if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
22236 if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) {
22237 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
22238 break;
22239 }
22240
22241 if (PM_BINDING_POWER_TERM <= pm_binding_powers[parser->current.type].left) {
22242 break;
22243 }
22244 } else if (current_binding_powers.left <= pm_binding_powers[parser->current.type].left) {
22245 break;
22246 }
22247 }
22248
22249 if (accepts_command_call) {
22250 // A command-style method call is only accepted on method chains.
22251 // Thus, we check whether the parsed node can continue method chains.
22252 // The method chain can continue if the parsed node is one of the following five kinds:
22253 // (1) index access: foo[1]
22254 // (2) attribute access: foo.bar
22255 // (3) method call with parenthesis: foo.bar(1)
22256 // (4) method call with a block: foo.bar do end
22257 // (5) constant path: foo::Bar
22258 switch (node->type) {
22259 case PM_CALL_NODE: {
22260 pm_call_node_t *cast = (pm_call_node_t *)node;
22261 if (
22262 // (1) foo[1]
22263 !(
22264 cast->call_operator_loc.start == NULL &&
22265 cast->message_loc.start != NULL &&
22266 cast->message_loc.start[0] == '[' &&
22267 cast->message_loc.end[-1] == ']'
22268 ) &&
22269 // (2) foo.bar
22270 !(
22271 cast->call_operator_loc.start != NULL &&
22272 cast->arguments == NULL &&
22273 cast->block == NULL &&
22274 cast->opening_loc.start == NULL
22275 ) &&
22276 // (3) foo.bar(1)
22277 !(
22278 cast->call_operator_loc.start != NULL &&
22279 cast->opening_loc.start != NULL
22280 ) &&
22281 // (4) foo.bar do end
22282 !(
22283 cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
22284 )
22285 ) {
22286 accepts_command_call = false;
22287 }
22288 break;
22289 }
22290 // (5) foo::Bar
22291 case PM_CONSTANT_PATH_NODE:
22292 break;
22293 default:
22294 accepts_command_call = false;
22295 break;
22296 }
22297 }
22298 }
22299
22300 return node;
22301}
22302
22307static pm_statements_node_t *
22308wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
22309 if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
22310 if (statements == NULL) {
22311 statements = pm_statements_node_create(parser);
22312 }
22313
22314 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22315 pm_arguments_node_arguments_append(
22316 arguments,
22317 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2))
22318 );
22319
22320 pm_statements_node_body_append(parser, statements, (pm_node_t *) pm_call_node_fcall_synthesized_create(
22321 parser,
22322 arguments,
22323 pm_parser_constant_id_constant(parser, "print", 5)
22324 ), true);
22325 }
22326
22327 if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
22328 if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
22329 if (statements == NULL) {
22330 statements = pm_statements_node_create(parser);
22331 }
22332
22333 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22334 pm_arguments_node_arguments_append(
22335 arguments,
22336 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2))
22337 );
22338
22339 pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
22340 pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, (pm_node_t *) receiver, "split", arguments);
22341
22342 pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
22343 parser,
22344 pm_parser_constant_id_constant(parser, "$F", 2),
22345 (pm_node_t *) call
22346 );
22347
22348 pm_statements_node_body_prepend(statements, (pm_node_t *) write);
22349 }
22350
22351 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22352 pm_arguments_node_arguments_append(
22353 arguments,
22354 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2))
22355 );
22356
22357 if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
22358 pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
22359 pm_keyword_hash_node_elements_append(keywords, (pm_node_t *) pm_assoc_node_create(
22360 parser,
22361 (pm_node_t *) pm_symbol_node_synthesized_create(parser, "chomp"),
22362 &(pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start },
22363 (pm_node_t *) pm_true_node_synthesized_create(parser)
22364 ));
22365
22366 pm_arguments_node_arguments_append(arguments, (pm_node_t *) keywords);
22367 pm_node_flag_set((pm_node_t *) arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
22368 }
22369
22370 pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
22371 pm_statements_node_body_append(parser, wrapped_statements, (pm_node_t *) pm_while_node_synthesized_create(
22372 parser,
22373 (pm_node_t *) pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4)),
22374 statements
22375 ), true);
22376
22377 statements = wrapped_statements;
22378 }
22379
22380 return statements;
22381}
22382
22386static pm_node_t *
22387parse_program(pm_parser_t *parser) {
22388 // If the current scope is NULL, then we want to push a new top level scope.
22389 // The current scope could exist in the event that we are parsing an eval
22390 // and the user has passed into scopes that already exist.
22391 if (parser->current_scope == NULL) {
22392 pm_parser_scope_push(parser, true);
22393 }
22394
22395 pm_node_list_t current_block_exits = { 0 };
22396 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
22397
22398 parser_lex(parser);
22399 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
22400
22401 if (statements != NULL && !parser->parsing_eval) {
22402 // If we have statements, then the top-level statement should be
22403 // explicitly checked as well. We have to do this here because
22404 // everywhere else we check all but the last statement.
22405 assert(statements->body.size > 0);
22406 pm_void_statement_check(parser, statements->body.nodes[statements->body.size - 1]);
22407 }
22408
22409 pm_constant_id_list_t locals;
22410 pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
22411 pm_parser_scope_pop(parser);
22412
22413 // At the top level, see if we need to wrap the statements in a program
22414 // node with a while loop based on the options.
22416 statements = wrap_statements(parser, statements);
22417 } else {
22418 flush_block_exits(parser, previous_block_exits);
22419 pm_node_list_free(&current_block_exits);
22420 }
22421
22422 // If this is an empty file, then we're still going to parse all of the
22423 // statements in order to gather up all of the comments and such. Here we'll
22424 // correct the location information.
22425 if (statements == NULL) {
22426 statements = pm_statements_node_create(parser);
22427 pm_statements_node_location_set(statements, parser->start, parser->start);
22428 }
22429
22430 return (pm_node_t *) pm_program_node_create(parser, &locals, statements);
22431}
22432
22433/******************************************************************************/
22434/* External functions */
22435/******************************************************************************/
22436
22446static const char *
22447pm_strnstr(const char *big, const char *little, size_t big_length) {
22448 size_t little_length = strlen(little);
22449
22450 for (const char *big_end = big + big_length; big < big_end; big++) {
22451 if (*big == *little && memcmp(big, little, little_length) == 0) return big;
22452 }
22453
22454 return NULL;
22455}
22456
22457#ifdef _WIN32
22458#define pm_parser_warn_shebang_carriage_return(parser, start, length) ((void) 0)
22459#else
22465static void
22466pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
22467 if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
22468 pm_parser_warn(parser, start, start + length, PM_WARN_SHEBANG_CARRIAGE_RETURN);
22469 }
22470}
22471#endif
22472
22477static void
22478pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const char *engine, size_t length) {
22479 const char *switches = pm_strnstr(engine, " -", length);
22480 if (switches == NULL) return;
22481
22482 pm_options_t next_options = *options;
22483 options->shebang_callback(
22484 &next_options,
22485 (const uint8_t *) (switches + 1),
22486 length - ((size_t) (switches - engine)) - 1,
22487 options->shebang_callback_data
22488 );
22489
22490 size_t encoding_length;
22491 if ((encoding_length = pm_string_length(&next_options.encoding)) > 0) {
22492 const uint8_t *encoding_source = pm_string_source(&next_options.encoding);
22493 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22494 }
22495
22496 parser->command_line = next_options.command_line;
22497 parser->frozen_string_literal = next_options.frozen_string_literal;
22498}
22499
22504pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
22505 assert(source != NULL);
22506
22507 *parser = (pm_parser_t) {
22508 .node_id = 0,
22509 .lex_state = PM_LEX_STATE_BEG,
22510 .enclosure_nesting = 0,
22511 .lambda_enclosure_nesting = -1,
22512 .brace_nesting = 0,
22513 .do_loop_stack = 0,
22514 .accepts_block_stack = 0,
22515 .lex_modes = {
22516 .index = 0,
22517 .stack = {{ .mode = PM_LEX_DEFAULT }},
22518 .current = &parser->lex_modes.stack[0],
22519 },
22520 .start = source,
22521 .end = source + size,
22522 .previous = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22523 .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22524 .next_start = NULL,
22525 .heredoc_end = NULL,
22526 .data_loc = { .start = NULL, .end = NULL },
22527 .comment_list = { 0 },
22528 .magic_comment_list = { 0 },
22529 .warning_list = { 0 },
22530 .error_list = { 0 },
22531 .current_scope = NULL,
22532 .current_context = NULL,
22533 .encoding = PM_ENCODING_UTF_8_ENTRY,
22534 .encoding_changed_callback = NULL,
22535 .encoding_comment_start = source,
22536 .lex_callback = NULL,
22537 .filepath = { 0 },
22538 .constant_pool = { 0 },
22539 .newline_list = { 0 },
22540 .integer_base = 0,
22541 .current_string = PM_STRING_EMPTY,
22542 .start_line = 1,
22543 .explicit_encoding = NULL,
22544 .command_line = 0,
22545 .parsing_eval = false,
22546 .partial_script = false,
22547 .command_start = true,
22548 .recovering = false,
22549 .encoding_locked = false,
22550 .encoding_changed = false,
22551 .pattern_matching_newlines = false,
22552 .in_keyword_arg = false,
22553 .current_block_exits = NULL,
22554 .semantic_token_seen = false,
22555 .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
22556 .current_regular_expression_ascii_only = false,
22557 .warn_mismatched_indentation = true
22558 };
22559
22560 // Initialize the constant pool. We're going to completely guess as to the
22561 // number of constants that we'll need based on the size of the input. The
22562 // ratio we chose here is actually less arbitrary than you might think.
22563 //
22564 // We took ~50K Ruby files and measured the size of the file versus the
22565 // number of constants that were found in those files. Then we found the
22566 // average and standard deviation of the ratios of constants/bytesize. Then
22567 // we added 1.34 standard deviations to the average to get a ratio that
22568 // would fit 75% of the files (for a two-tailed distribution). This works
22569 // because there was about a 0.77 correlation and the distribution was
22570 // roughly normal.
22571 //
22572 // This ratio will need to change if we add more constants to the constant
22573 // pool for another node type.
22574 uint32_t constant_size = ((uint32_t) size) / 95;
22575 pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size);
22576
22577 // Initialize the newline list. Similar to the constant pool, we're going to
22578 // guess at the number of newlines that we'll need based on the size of the
22579 // input.
22580 size_t newline_size = size / 22;
22581 pm_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
22582
22583 // If options were provided to this parse, establish them here.
22584 if (options != NULL) {
22585 // filepath option
22586 parser->filepath = options->filepath;
22587
22588 // line option
22589 parser->start_line = options->line;
22590
22591 // encoding option
22592 size_t encoding_length = pm_string_length(&options->encoding);
22593 if (encoding_length > 0) {
22594 const uint8_t *encoding_source = pm_string_source(&options->encoding);
22595 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22596 }
22597
22598 // encoding_locked option
22599 parser->encoding_locked = options->encoding_locked;
22600
22601 // frozen_string_literal option
22603
22604 // command_line option
22605 parser->command_line = options->command_line;
22606
22607 // version option
22608 parser->version = options->version;
22609
22610 // partial_script
22611 parser->partial_script = options->partial_script;
22612
22613 // scopes option
22614 parser->parsing_eval = options->scopes_count > 0;
22615 if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
22616
22617 for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
22618 const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index);
22619 pm_parser_scope_push(parser, scope_index == 0);
22620
22621 // Scopes given from the outside are not allowed to have numbered
22622 // parameters.
22623 parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
22624
22625 for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
22626 const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
22627
22628 const uint8_t *source = pm_string_source(local);
22629 size_t length = pm_string_length(local);
22630
22631 void *allocated = xmalloc(length);
22632 if (allocated == NULL) continue;
22633
22634 memcpy(allocated, source, length);
22635 pm_parser_local_add_owned(parser, (uint8_t *) allocated, length);
22636 }
22637 }
22638 }
22639
22640 pm_accepts_block_stack_push(parser, true);
22641
22642 // Skip past the UTF-8 BOM if it exists.
22643 if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
22644 parser->current.end += 3;
22645 parser->encoding_comment_start += 3;
22646
22647 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
22649 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
22650 }
22651 }
22652
22653 // If the -x command line flag is set, or the first shebang of the file does
22654 // not include "ruby", then we'll search for a shebang that does include
22655 // "ruby" and start parsing from there.
22656 bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
22657
22658 // If the first two bytes of the source are a shebang, then we will do a bit
22659 // of extra processing.
22660 //
22661 // First, we'll indicate that the encoding comment is at the end of the
22662 // shebang. This means that when a shebang is present the encoding comment
22663 // can begin on the second line.
22664 //
22665 // Second, we will check if the shebang includes "ruby". If it does, then we
22666 // we will start parsing from there. We will also potentially warning the
22667 // user if there is a carriage return at the end of the shebang. We will
22668 // also potentially call the shebang callback if this is the main script to
22669 // allow the caller to parse the shebang and find any command-line options.
22670 // If the shebang does not include "ruby" and this is the main script being
22671 // parsed, then we will start searching the file for a shebang that does
22672 // contain "ruby" as if -x were passed on the command line.
22673 const uint8_t *newline = next_newline(parser->start, parser->end - parser->start);
22674 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->start);
22675
22676 if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
22677 const char *engine;
22678
22679 if ((engine = pm_strnstr((const char *) parser->start, "ruby", length)) != NULL) {
22680 if (newline != NULL) {
22681 parser->encoding_comment_start = newline + 1;
22682
22683 if (options == NULL || options->main_script) {
22684 pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1);
22685 }
22686 }
22687
22688 if (options != NULL && options->main_script && options->shebang_callback != NULL) {
22689 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) parser->start)));
22690 }
22691
22692 search_shebang = false;
22693 } else if (options != NULL && options->main_script && !parser->parsing_eval) {
22694 search_shebang = true;
22695 }
22696 }
22697
22698 // Here we're going to find the first shebang that includes "ruby" and start
22699 // parsing from there.
22700 if (search_shebang) {
22701 // If a shebang that includes "ruby" is not found, then we're going to a
22702 // a load error to the list of errors on the parser.
22703 bool found_shebang = false;
22704
22705 // This is going to point to the start of each line as we check it.
22706 // We'll maintain a moving window looking at each line at they come.
22707 const uint8_t *cursor = parser->start;
22708
22709 // The newline pointer points to the end of the current line that we're
22710 // considering. If it is NULL, then we're at the end of the file.
22711 const uint8_t *newline = next_newline(cursor, parser->end - cursor);
22712
22713 while (newline != NULL) {
22714 pm_newline_list_append(&parser->newline_list, newline);
22715
22716 cursor = newline + 1;
22717 newline = next_newline(cursor, parser->end - cursor);
22718
22719 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - cursor);
22720 if (length > 2 && cursor[0] == '#' && cursor[1] == '!') {
22721 const char *engine;
22722 if ((engine = pm_strnstr((const char *) cursor, "ruby", length)) != NULL) {
22723 found_shebang = true;
22724
22725 if (newline != NULL) {
22726 pm_parser_warn_shebang_carriage_return(parser, cursor, length + 1);
22727 parser->encoding_comment_start = newline + 1;
22728 }
22729
22730 if (options != NULL && options->shebang_callback != NULL) {
22731 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) cursor)));
22732 }
22733
22734 break;
22735 }
22736 }
22737 }
22738
22739 if (found_shebang) {
22740 parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22741 parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22742 } else {
22743 pm_parser_err(parser, parser->start, parser->start, PM_ERR_SCRIPT_NOT_FOUND);
22744 pm_newline_list_clear(&parser->newline_list);
22745 }
22746 }
22747
22748 // The encoding comment can start after any amount of inline whitespace, so
22749 // here we'll advance it to the first non-inline-whitespace character so
22750 // that it is ready for future comparisons.
22751 parser->encoding_comment_start += pm_strspn_inline_whitespace(parser->encoding_comment_start, parser->end - parser->encoding_comment_start);
22752}
22753
22759pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback) {
22760 parser->encoding_changed_callback = callback;
22761}
22762
22766static inline void
22767pm_comment_list_free(pm_list_t *list) {
22768 pm_list_node_t *node, *next;
22769
22770 for (node = list->head; node != NULL; node = next) {
22771 next = node->next;
22772
22773 pm_comment_t *comment = (pm_comment_t *) node;
22774 xfree(comment);
22775 }
22776}
22777
22781static inline void
22782pm_magic_comment_list_free(pm_list_t *list) {
22783 pm_list_node_t *node, *next;
22784
22785 for (node = list->head; node != NULL; node = next) {
22786 next = node->next;
22787
22790 }
22791}
22792
22797pm_parser_free(pm_parser_t *parser) {
22798 pm_string_free(&parser->filepath);
22799 pm_diagnostic_list_free(&parser->error_list);
22800 pm_diagnostic_list_free(&parser->warning_list);
22801 pm_comment_list_free(&parser->comment_list);
22802 pm_magic_comment_list_free(&parser->magic_comment_list);
22803 pm_constant_pool_free(&parser->constant_pool);
22804 pm_newline_list_free(&parser->newline_list);
22805
22806 while (parser->current_scope != NULL) {
22807 // Normally, popping the scope doesn't free the locals since it is
22808 // assumed that ownership has transferred to the AST. However if we have
22809 // scopes while we're freeing the parser, it's likely they came from
22810 // eval scopes and we need to free them explicitly here.
22811 pm_parser_scope_pop(parser);
22812 }
22813
22814 while (parser->lex_modes.index >= PM_LEX_STACK_SIZE) {
22815 lex_mode_pop(parser);
22816 }
22817}
22818
22823pm_parse(pm_parser_t *parser) {
22824 return parse_program(parser);
22825}
22826
22832static bool
22833pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets) {
22834#define LINE_SIZE 4096
22835 char line[LINE_SIZE];
22836
22837 while (memset(line, '\n', LINE_SIZE), stream_fgets(line, LINE_SIZE, stream) != NULL) {
22838 size_t length = LINE_SIZE;
22839 while (length > 0 && line[length - 1] == '\n') length--;
22840
22841 if (length == LINE_SIZE) {
22842 // If we read a line that is the maximum size and it doesn't end
22843 // with a newline, then we'll just append it to the buffer and
22844 // continue reading.
22845 length--;
22846 pm_buffer_append_string(buffer, line, length);
22847 continue;
22848 }
22849
22850 // Append the line to the buffer.
22851 length--;
22852 pm_buffer_append_string(buffer, line, length);
22853
22854 // Check if the line matches the __END__ marker. If it does, then stop
22855 // reading and return false. In most circumstances, this means we should
22856 // stop reading from the stream so that the DATA constant can pick it
22857 // up.
22858 switch (length) {
22859 case 7:
22860 if (strncmp(line, "__END__", 7) == 0) return false;
22861 break;
22862 case 8:
22863 if (strncmp(line, "__END__\n", 8) == 0) return false;
22864 break;
22865 case 9:
22866 if (strncmp(line, "__END__\r\n", 9) == 0) return false;
22867 break;
22868 }
22869 }
22870
22871 return true;
22872#undef LINE_SIZE
22873}
22874
22884static bool
22885pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
22886 pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) parser->error_list.head;
22887
22888 for (; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
22889 if (diagnostic->diag_id == PM_ERR_HEREDOC_TERM) {
22890 return true;
22891 }
22892 }
22893
22894 return false;
22895}
22896
22904pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const pm_options_t *options) {
22905 pm_buffer_init(buffer);
22906
22907 bool eof = pm_parse_stream_read(buffer, stream, stream_fgets);
22908 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22909 pm_node_t *node = pm_parse(parser);
22910
22911 while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
22912 pm_node_destroy(parser, node);
22913 eof = pm_parse_stream_read(buffer, stream, stream_fgets);
22914
22915 pm_parser_free(parser);
22916 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22917 node = pm_parse(parser);
22918 }
22919
22920 return node;
22921}
22922
22927pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
22928 pm_options_t options = { 0 };
22929 pm_options_read(&options, data);
22930
22931 pm_parser_t parser;
22932 pm_parser_init(&parser, source, size, &options);
22933
22934 pm_node_t *node = pm_parse(&parser);
22935 pm_node_destroy(&parser, node);
22936
22937 bool result = parser.error_list.size == 0;
22938 pm_parser_free(&parser);
22939 pm_options_free(&options);
22940
22941 return result;
22942}
22943
22944#undef PM_CASE_KEYWORD
22945#undef PM_CASE_OPERATOR
22946#undef PM_CASE_WRITABLE
22947#undef PM_STRING_EMPTY
22948#undef PM_LOCATION_NODE_BASE_VALUE
22949#undef PM_LOCATION_NODE_VALUE
22950#undef PM_LOCATION_NULL_VALUE
22951#undef PM_LOCATION_TOKEN_VALUE
22952
22953// We optionally support serializing to a binary string. For systems that don't
22954// want or need this functionality, it can be turned off with the
22955// PRISM_EXCLUDE_SERIALIZATION define.
22956#ifndef PRISM_EXCLUDE_SERIALIZATION
22957
22958static inline void
22959pm_serialize_header(pm_buffer_t *buffer) {
22960 pm_buffer_append_string(buffer, "PRISM", 5);
22961 pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
22962 pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR);
22963 pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH);
22964 pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
22965}
22966
22971pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
22972 pm_serialize_header(buffer);
22973 pm_serialize_content(parser, node, buffer);
22974 pm_buffer_append_byte(buffer, '\0');
22975}
22976
22982pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22983 pm_options_t options = { 0 };
22984 pm_options_read(&options, data);
22985
22986 pm_parser_t parser;
22987 pm_parser_init(&parser, source, size, &options);
22988
22989 pm_node_t *node = pm_parse(&parser);
22990
22991 pm_serialize_header(buffer);
22992 pm_serialize_content(&parser, node, buffer);
22993 pm_buffer_append_byte(buffer, '\0');
22994
22995 pm_node_destroy(&parser, node);
22996 pm_parser_free(&parser);
22997 pm_options_free(&options);
22998}
22999
23005pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const char *data) {
23006 pm_parser_t parser;
23007 pm_options_t options = { 0 };
23008 pm_options_read(&options, data);
23009
23010 pm_buffer_t parser_buffer;
23011 pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, &options);
23012 pm_serialize_header(buffer);
23013 pm_serialize_content(&parser, node, buffer);
23014 pm_buffer_append_byte(buffer, '\0');
23015
23016 pm_node_destroy(&parser, node);
23017 pm_buffer_free(&parser_buffer);
23018 pm_parser_free(&parser);
23019 pm_options_free(&options);
23020}
23021
23026pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
23027 pm_options_t options = { 0 };
23028 pm_options_read(&options, data);
23029
23030 pm_parser_t parser;
23031 pm_parser_init(&parser, source, size, &options);
23032
23033 pm_node_t *node = pm_parse(&parser);
23034 pm_serialize_header(buffer);
23035 pm_serialize_encoding(parser.encoding, buffer);
23036 pm_buffer_append_varsint(buffer, parser.start_line);
23037 pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
23038
23039 pm_node_destroy(&parser, node);
23040 pm_parser_free(&parser);
23041 pm_options_free(&options);
23042}
23043
23044#endif
23045
23046/******************************************************************************/
23047/* Slice queries for the Ruby API */
23048/******************************************************************************/
23049
23051typedef enum {
23053 PM_SLICE_TYPE_ERROR = -1,
23054
23056 PM_SLICE_TYPE_NONE,
23057
23059 PM_SLICE_TYPE_LOCAL,
23060
23062 PM_SLICE_TYPE_CONSTANT,
23063
23065 PM_SLICE_TYPE_METHOD_NAME
23066} pm_slice_type_t;
23067
23071pm_slice_type_t
23072pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
23073 // first, get the right encoding object
23074 const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
23075 if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
23076
23077 // check that there is at least one character
23078 if (length == 0) return PM_SLICE_TYPE_NONE;
23079
23080 size_t width;
23081 if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
23082 // valid because alphabetical
23083 } else if (*source == '_') {
23084 // valid because underscore
23085 width = 1;
23086 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
23087 // valid because multibyte
23088 } else {
23089 // invalid because no match
23090 return PM_SLICE_TYPE_NONE;
23091 }
23092
23093 // determine the type of the slice based on the first character
23094 const uint8_t *end = source + length;
23095 pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
23096
23097 // next, iterate through all of the bytes of the string to ensure that they
23098 // are all valid identifier characters
23099 source += width;
23100
23101 while (source < end) {
23102 if ((width = encoding->alnum_char(source, end - source)) != 0) {
23103 // valid because alphanumeric
23104 source += width;
23105 } else if (*source == '_') {
23106 // valid because underscore
23107 source++;
23108 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
23109 // valid because multibyte
23110 source += width;
23111 } else {
23112 // invalid because no match
23113 break;
23114 }
23115 }
23116
23117 // accept a ! or ? at the end of the slice as a method name
23118 if (*source == '!' || *source == '?' || *source == '=') {
23119 source++;
23120 result = PM_SLICE_TYPE_METHOD_NAME;
23121 }
23122
23123 // valid if we are at the end of the slice
23124 return source == end ? result : PM_SLICE_TYPE_NONE;
23125}
23126
23131pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
23132 switch (pm_slice_type(source, length, encoding_name)) {
23133 case PM_SLICE_TYPE_ERROR:
23134 return PM_STRING_QUERY_ERROR;
23135 case PM_SLICE_TYPE_NONE:
23136 case PM_SLICE_TYPE_CONSTANT:
23137 case PM_SLICE_TYPE_METHOD_NAME:
23138 return PM_STRING_QUERY_FALSE;
23139 case PM_SLICE_TYPE_LOCAL:
23140 return PM_STRING_QUERY_TRUE;
23141 }
23142
23143 assert(false && "unreachable");
23144 return PM_STRING_QUERY_FALSE;
23145}
23146
23151pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
23152 switch (pm_slice_type(source, length, encoding_name)) {
23153 case PM_SLICE_TYPE_ERROR:
23154 return PM_STRING_QUERY_ERROR;
23155 case PM_SLICE_TYPE_NONE:
23156 case PM_SLICE_TYPE_LOCAL:
23157 case PM_SLICE_TYPE_METHOD_NAME:
23158 return PM_STRING_QUERY_FALSE;
23159 case PM_SLICE_TYPE_CONSTANT:
23160 return PM_STRING_QUERY_TRUE;
23161 }
23162
23163 assert(false && "unreachable");
23164 return PM_STRING_QUERY_FALSE;
23165}
23166
23171pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
23172#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
23173#define C1(c) (*source == c)
23174#define C2(s) (memcmp(source, s, 2) == 0)
23175#define C3(s) (memcmp(source, s, 3) == 0)
23176
23177 switch (pm_slice_type(source, length, encoding_name)) {
23178 case PM_SLICE_TYPE_ERROR:
23179 return PM_STRING_QUERY_ERROR;
23180 case PM_SLICE_TYPE_NONE:
23181 break;
23182 case PM_SLICE_TYPE_LOCAL:
23183 // numbered parameters are not valid method names
23184 return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
23185 case PM_SLICE_TYPE_CONSTANT:
23186 // all constants are valid method names
23187 case PM_SLICE_TYPE_METHOD_NAME:
23188 // all method names are valid method names
23189 return PM_STRING_QUERY_TRUE;
23190 }
23191
23192 switch (length) {
23193 case 1:
23194 return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
23195 case 2:
23196 return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
23197 case 3:
23198 return B(C3("===") || C3("<=>") || C3("[]="));
23199 default:
23200 return PM_STRING_QUERY_FALSE;
23201 }
23202
23203#undef B
23204#undef C1
23205#undef C2
23206#undef C3
23207}
pm_diagnostic_id_t
The diagnostic IDs of all of the diagnostics, used to communicate the types of errors between the par...
Definition diagnostic.h:31
#define xfree
Old name of ruby_xfree.
Definition xmalloc.h:58
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
#define xcalloc
Old name of ruby_xcalloc.
Definition xmalloc.h:55
VALUE type(ANYARGS)
ANYARGS-ed function type.
static const uint8_t PM_OPTIONS_COMMAND_LINE_N
A bit representing whether or not the command line -n option was set.
Definition options.h:213
#define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
String literals should be made frozen.
Definition options.h:20
#define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
String literals should be made mutable.
Definition options.h:31
#define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
String literals may be frozen or mutable depending on the implementation default.
Definition options.h:26
static const uint8_t PM_OPTIONS_COMMAND_LINE_P
A bit representing whether or not the command line -p option was set.
Definition options.h:219
@ PM_OPTIONS_VERSION_CRUBY_3_3
The vendored version of prism in CRuby 3.3.x.
Definition options.h:89
pm_heredoc_indent_t
The type of indentation that a heredoc uses.
Definition parser.h:79
struct pm_context_node pm_context_node_t
This is a node in a linked list of contexts.
#define PM_LEX_STACK_SIZE
We pre-allocate a certain number of lex states in order to avoid having to call malloc too many times...
Definition parser.h:262
struct pm_parser pm_parser_t
The parser used to parse Ruby source.
Definition parser.h:267
struct pm_comment pm_comment_t
This is a node in the linked list of comments that we've found while parsing.
pm_lex_state_t
This enum combines the various bits from the above enum into individual values that represent the var...
Definition parser.h:46
struct pm_scope pm_scope_t
This struct represents a node in a linked list of scopes.
pm_heredoc_quote_t
The type of quote that a heredoc uses.
Definition parser.h:69
void(* pm_encoding_changed_callback_t)(pm_parser_t *parser)
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:496
pm_context_t
While parsing, we keep track of a stack of contexts.
Definition parser.h:274
@ PM_CONTEXT_CLASS_RESCUE
a rescue statement within a class statement
Definition parser.h:321
@ PM_CONTEXT_ELSIF
an elsif clause
Definition parser.h:348
@ PM_CONTEXT_DEF_RESCUE
a rescue statement within a method definition
Definition parser.h:333
@ PM_CONTEXT_ELSE
an else clause
Definition parser.h:345
@ PM_CONTEXT_FOR_INDEX
a for loop's index
Definition parser.h:357
@ PM_CONTEXT_CASE_WHEN
a case when statements
Definition parser.h:306
@ PM_CONTEXT_BLOCK_RESCUE
a rescue statement within a do..end block
Definition parser.h:303
@ PM_CONTEXT_MODULE
a module declaration
Definition parser.h:384
@ PM_CONTEXT_DEF_PARAMS
a method definition's parameters
Definition parser.h:336
@ PM_CONTEXT_CASE_IN
a case in statements
Definition parser.h:309
@ PM_CONTEXT_BLOCK_ELSE
a rescue else statement within a do..end block
Definition parser.h:300
@ PM_CONTEXT_LOOP_PREDICATE
the predicate clause of a loop statement
Definition parser.h:378
@ PM_CONTEXT_SCLASS
a singleton class definition
Definition parser.h:414
@ PM_CONTEXT_UNLESS
an unless statement
Definition parser.h:429
@ PM_CONTEXT_POSTEXE
an END block
Definition parser.h:402
@ PM_CONTEXT_IF
an if statement
Definition parser.h:360
@ PM_CONTEXT_MULTI_TARGET
a multiple target expression
Definition parser.h:396
@ PM_CONTEXT_LAMBDA_RESCUE
a rescue statement within a lambda expression
Definition parser.h:375
@ PM_CONTEXT_BEGIN_ELSE
a rescue else statement with an explicit begin
Definition parser.h:285
@ PM_CONTEXT_NONE
a null context, used for returning a value from a function
Definition parser.h:276
@ PM_CONTEXT_CLASS_ELSE
a rescue else statement within a class statement
Definition parser.h:318
@ PM_CONTEXT_LAMBDA_ENSURE
an ensure statement within a lambda expression
Definition parser.h:369
@ PM_CONTEXT_BLOCK_ENSURE
an ensure statement within a do..end block
Definition parser.h:297
@ PM_CONTEXT_CLASS_ENSURE
an ensure statement within a class statement
Definition parser.h:315
@ PM_CONTEXT_LAMBDA_BRACES
a lambda expression with braces
Definition parser.h:363
@ PM_CONTEXT_MODULE_ELSE
a rescue else statement within a module statement
Definition parser.h:390
@ PM_CONTEXT_PARENS
a parenthesized expression
Definition parser.h:399
@ PM_CONTEXT_BLOCK_BRACES
expressions in block arguments using braces
Definition parser.h:291
@ PM_CONTEXT_DEF_ENSURE
an ensure statement within a method definition
Definition parser.h:327
@ PM_CONTEXT_SCLASS_RESCUE
a rescue statement with a singleton class
Definition parser.h:423
@ PM_CONTEXT_PREEXE
a BEGIN block
Definition parser.h:408
@ PM_CONTEXT_DEFINED
a defined? expression
Definition parser.h:339
@ PM_CONTEXT_MODULE_ENSURE
an ensure statement within a module statement
Definition parser.h:387
@ PM_CONTEXT_BEGIN_RESCUE
a rescue statement with an explicit begin
Definition parser.h:288
@ PM_CONTEXT_UNTIL
an until statement
Definition parser.h:432
@ PM_CONTEXT_DEF_ELSE
a rescue else statement within a method definition
Definition parser.h:330
@ PM_CONTEXT_FOR
a for loop
Definition parser.h:354
@ PM_CONTEXT_PREDICATE
a predicate inside an if/elsif/unless statement
Definition parser.h:405
@ PM_CONTEXT_BEGIN_ENSURE
an ensure statement with an explicit begin
Definition parser.h:282
@ PM_CONTEXT_SCLASS_ENSURE
an ensure statement with a singleton class
Definition parser.h:417
@ PM_CONTEXT_DEFAULT_PARAMS
a method definition's default parameter
Definition parser.h:342
@ PM_CONTEXT_LAMBDA_ELSE
a rescue else statement within a lambda expression
Definition parser.h:372
@ PM_CONTEXT_CLASS
a class declaration
Definition parser.h:312
@ PM_CONTEXT_MAIN
the top level context
Definition parser.h:381
@ PM_CONTEXT_LAMBDA_DO_END
a lambda expression with do..end
Definition parser.h:366
@ PM_CONTEXT_BEGIN
a begin statement
Definition parser.h:279
@ PM_CONTEXT_RESCUE_MODIFIER
a modifier rescue clause
Definition parser.h:411
@ PM_CONTEXT_EMBEXPR
an interpolated expression
Definition parser.h:351
@ PM_CONTEXT_TERNARY
a ternary expression
Definition parser.h:426
@ PM_CONTEXT_DEF
a method definition
Definition parser.h:324
@ PM_CONTEXT_SCLASS_ELSE
a rescue else statement with a singleton class
Definition parser.h:420
@ PM_CONTEXT_MODULE_RESCUE
a rescue statement within a module statement
Definition parser.h:393
@ PM_CONTEXT_BLOCK_KEYWORDS
expressions in block arguments using do..end
Definition parser.h:294
@ PM_CONTEXT_WHILE
a while statement
Definition parser.h:435
uint8_t pm_scope_parameters_t
The flags about scope parameters that can be set.
Definition parser.h:566
uint8_t pm_shareable_constant_value_t
The type of shareable constant value that can be set.
Definition parser.h:522
pm_comment_type_t
This is the type of a comment that we've found while parsing.
Definition parser.h:448
#define PM_CONSTANT_ID_UNSET
When we allocate constants into the pool, we reserve 0 to mean that the slot is not yet filled.
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
#define PM_STRING_EMPTY
Defines an empty string.
Definition pm_string.h:70
#define PRISM_FALLTHROUGH
We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
Definition defines.h:253
#define PRISM_UNLIKELY(x)
The compiler should predicate that this branch will not be taken.
Definition defines.h:237
#define PRISM_ATTRIBUTE_UNUSED
GCC will warn if you specify a function or parameter that is unused at runtime.
Definition defines.h:81
#define PRISM_DEPTH_MAXIMUM
When we are parsing using recursive descent, we want to protect against malicious payloads that could...
Definition defines.h:37
#define PM_STATIC_ASSERT(line, condition, message)
We want to be able to use static assertions, but they weren't standardized until C11.
Definition defines.h:116
#define PRISM_EXPORTED_FUNCTION
By default, we compile with -fvisibility=hidden.
Definition defines.h:53
#define PM_ENCODING_US_ASCII_ENTRY
This is the US-ASCII encoding.
Definition encoding.h:252
#define PM_ENCODING_UTF_8_ENTRY
This is the default UTF-8 encoding.
Definition encoding.h:245
#define PRISM_ENCODING_ALPHABETIC_BIT
All of the lookup tables use the first bit of each embedded byte to indicate whether the codepoint is...
Definition encoding.h:68
#define PRISM_ENCODING_ALPHANUMERIC_BIT
All of the lookup tables use the second bit of each embedded byte to indicate whether the codepoint i...
Definition encoding.h:74
#define PM_NODE_LIST_FOREACH(list, index, node)
Loop through each node in the node list, writing each node to the given pm_node_t pointer.
Definition node.h:17
#define PRISM_VERSION
The version of the Prism library as a constant string.
Definition version.h:27
#define PRISM_VERSION_PATCH
The patch version of the Prism library as an int.
Definition version.h:22
#define PRISM_VERSION_MINOR
The minor version of the Prism library as an int.
Definition version.h:17
#define PRISM_VERSION_MAJOR
The major version of the Prism library as an int.
Definition version.h:12
The main header file for the prism parser.
pm_string_query_t
Represents the results of a slice query.
Definition prism.h:240
@ PM_STRING_QUERY_TRUE
Returned if the result of the slice query is true.
Definition prism.h:248
@ PM_STRING_QUERY_ERROR
Returned if the encoding given to a slice query was invalid.
Definition prism.h:242
@ PM_STRING_QUERY_FALSE
Returned if the result of the slice query is false.
Definition prism.h:245
void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
Serialize the encoding, metadata, nodes, and constant pool.
Definition serialize.c:2141
char *() pm_parse_stream_fgets_t(char *string, int size, void *stream)
This function is used in pm_parse_stream to retrieve a line of input from a stream.
Definition prism.h:88
void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer)
Serialize the name of the encoding to the buffer.
Definition serialize.c:2118
void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer)
Serialize the given list of comments to the given buffer.
Definition serialize.c:2048
const char * pm_token_type_human(pm_token_type_t token_type)
Returns the human name of the given token type.
Definition token_type.c:364
This struct is used to pass information between the regular expression parser and the error callback.
Definition prism.c:18009
pm_parser_t * parser
The parser that we are parsing the regular expression for.
Definition prism.c:18011
const uint8_t * start
The start of the regular expression.
Definition prism.c:18014
bool shared
Whether or not the source of the regular expression is shared.
Definition prism.c:18025
const uint8_t * end
The end of the regular expression.
Definition prism.c:18017
This struct is used to pass information between the regular expression parser and the named capture c...
Definition prism.c:20914
pm_constant_id_list_t names
The list of names that have been parsed.
Definition prism.c:20925
pm_parser_t * parser
The parser that is parsing the regular expression.
Definition prism.c:20916
pm_match_write_node_t * match
The match write node that is being created.
Definition prism.c:20922
pm_call_node_t * call
The call node wrapping the regular expression node.
Definition prism.c:20919
bool shared
Whether the content of the regular expression is shared.
Definition prism.c:20932
AndNode.
Definition ast.h:1262
struct pm_node * left
AndNode::left.
Definition ast.h:1278
struct pm_node * right
AndNode::right.
Definition ast.h:1291
ArgumentsNode.
Definition ast.h:1323
pm_node_t base
The embedded base node.
Definition ast.h:1325
struct pm_node_list arguments
ArgumentsNode::arguments.
Definition ast.h:1336
This is a special out parameter to the parse_arguments_list function that includes opening and closin...
Definition prism.c:1575
pm_node_t * block
The optional block attached to the call.
Definition prism.c:1586
bool has_forwarding
The flag indicating whether this arguments list has forwarding argument.
Definition prism.c:1589
pm_location_t opening_loc
The optional location of the opening parenthesis or bracket.
Definition prism.c:1577
pm_arguments_node_t * arguments
The lazily-allocated optional arguments node.
Definition prism.c:1580
pm_location_t closing_loc
The optional location of the closing parenthesis or bracket.
Definition prism.c:1583
ArrayNode.
Definition ast.h:1354
struct pm_node_list elements
ArrayNode::elements.
Definition ast.h:1364
ArrayPatternNode.
Definition ast.h:1415
struct pm_node * constant
ArrayPatternNode::constant.
Definition ast.h:1423
pm_location_t opening_loc
ArrayPatternNode::opening_loc.
Definition ast.h:1463
pm_node_t base
The embedded base node.
Definition ast.h:1417
pm_location_t closing_loc
ArrayPatternNode::closing_loc.
Definition ast.h:1473
AssocNode.
Definition ast.h:1488
struct pm_node * value
AssocNode::value.
Definition ast.h:1520
struct pm_node * key
AssocNode::key.
Definition ast.h:1507
BeginNode.
Definition ast.h:1614
struct pm_ensure_node * ensure_clause
BeginNode::ensure_clause.
Definition ast.h:1667
struct pm_rescue_node * rescue_clause
BeginNode::rescue_clause.
Definition ast.h:1647
struct pm_statements_node * statements
BeginNode::statements.
Definition ast.h:1637
pm_node_t base
The embedded base node.
Definition ast.h:1616
struct pm_else_node * else_clause
BeginNode::else_clause.
Definition ast.h:1657
This struct represents a set of binding powers used for a given token.
Definition prism.c:12963
bool binary
Whether or not this token can be used as a binary operator.
Definition prism.c:12971
pm_binding_power_t left
The left binding power.
Definition prism.c:12965
bool nonassoc
Whether or not this token can be used as non-associative binary operator.
Definition prism.c:12977
pm_binding_power_t right
The right binding power.
Definition prism.c:12968
BlockLocalVariableNode.
Definition ast.h:1733
BlockNode.
Definition ast.h:1761
BlockParameterNode.
Definition ast.h:1837
BlockParametersNode.
Definition ast.h:1891
A pm_buffer_t is a simple memory buffer that stores data in a contiguous block of memory.
Definition pm_buffer.h:22
size_t length
The length of the buffer in bytes.
Definition pm_buffer.h:24
char * value
A pointer to the start of the buffer.
Definition pm_buffer.h:30
CallNode.
Definition ast.h:2118
pm_location_t opening_loc
CallNode::opening_loc.
Definition ast.h:2179
pm_location_t closing_loc
CallNode::closing_loc.
Definition ast.h:2199
struct pm_node * receiver
CallNode::receiver.
Definition ast.h:2137
pm_constant_id_t name
CallNode::name.
Definition ast.h:2160
pm_node_t base
The embedded base node.
Definition ast.h:2120
pm_location_t call_operator_loc
CallNode::call_operator_loc.
Definition ast.h:2150
pm_location_t message_loc
CallNode::message_loc.
Definition ast.h:2170
struct pm_arguments_node * arguments
CallNode::arguments.
Definition ast.h:2189
struct pm_node * block
CallNode::block.
Definition ast.h:2209
CaseMatchNode.
Definition ast.h:2544
struct pm_node_list conditions
CaseMatchNode::conditions.
Definition ast.h:2567
CaseNode.
Definition ast.h:2614
struct pm_node_list conditions
CaseNode::conditions.
Definition ast.h:2637
ClassVariableReadNode.
Definition ast.h:2909
ClassVariableTargetNode.
Definition ast.h:2938
ClassVariableWriteNode.
Definition ast.h:2961
This is a node in the linked list of comments that we've found while parsing.
Definition parser.h:458
pm_comment_type_t type
The type of comment that we've found.
Definition parser.h:466
pm_location_t location
The location of the comment in the source.
Definition parser.h:463
A list of constant IDs.
ConstantPathNode.
Definition ast.h:3175
ConstantPathTargetNode.
Definition ast.h:3313
ConstantReadNode.
Definition ast.h:3408
A constant in the pool which effectively stores a string.
size_t length
The length of the string.
const uint8_t * start
A pointer to the start of the string.
ConstantTargetNode.
Definition ast.h:3437
ConstantWriteNode.
Definition ast.h:3460
This is a node in a linked list of contexts.
Definition parser.h:439
pm_context_t context
The context that this node represents.
Definition parser.h:441
struct pm_context_node * prev
A pointer to the previous context in the linked list.
Definition parser.h:444
This struct represents a diagnostic generated during parsing.
Definition diagnostic.h:363
ElseNode.
Definition ast.h:3639
struct pm_statements_node * statements
ElseNode::statements.
Definition ast.h:3652
This struct defines the functions necessary to implement the encoding interface so we can determine h...
Definition encoding.h:23
size_t(* alpha_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphab...
Definition encoding.h:36
size_t(* char_width)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding.
Definition encoding.h:29
bool(* isupper_char)(const uint8_t *b, ptrdiff_t n)
Return true if the next character is valid in the encoding and is an uppercase character.
Definition encoding.h:50
const char * name
The name of the encoding.
Definition encoding.h:56
size_t(* alnum_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphan...
Definition encoding.h:43
EnsureNode.
Definition ast.h:3737
struct pm_statements_node * statements
EnsureNode::statements.
Definition ast.h:3750
FindPatternNode.
Definition ast.h:3794
struct pm_node * constant
FindPatternNode::constant.
Definition ast.h:3802
pm_location_t opening_loc
FindPatternNode::opening_loc.
Definition ast.h:3822
pm_node_t base
The embedded base node.
Definition ast.h:3796
pm_location_t closing_loc
FindPatternNode::closing_loc.
Definition ast.h:3827
FlipFlopNode.
Definition ast.h:3845
FloatNode.
Definition ast.h:3878
double value
FloatNode::value.
Definition ast.h:3888
pm_node_t base
The embedded base node.
Definition ast.h:3880
ForwardingParameterNode.
Definition ast.h:4014
GlobalVariableReadNode.
Definition ast.h:4174
GlobalVariableTargetNode.
Definition ast.h:4203
GlobalVariableWriteNode.
Definition ast.h:4226
HashNode.
Definition ast.h:4288
struct pm_node_list elements
HashNode::elements.
Definition ast.h:4314
HashPatternNode.
Definition ast.h:4342
pm_location_t opening_loc
HashPatternNode::opening_loc.
Definition ast.h:4365
pm_node_t base
The embedded base node.
Definition ast.h:4344
pm_location_t closing_loc
HashPatternNode::closing_loc.
Definition ast.h:4370
struct pm_node * constant
HashPatternNode::constant.
Definition ast.h:4350
All of the information necessary to store to lexing a heredoc.
Definition parser.h:88
size_t ident_length
The length of the heredoc identifier.
Definition parser.h:93
pm_heredoc_quote_t quote
The type of quote that the heredoc uses.
Definition parser.h:96
pm_heredoc_indent_t indent
The type of indentation that the heredoc uses.
Definition parser.h:99
const uint8_t * ident_start
A pointer to the start of the heredoc identifier.
Definition parser.h:90
IfNode.
Definition ast.h:4391
struct pm_statements_node * statements
IfNode::statements.
Definition ast.h:4451
struct pm_node * subsequent
IfNode::subsequent.
Definition ast.h:4470
ImaginaryNode.
Definition ast.h:4497
InstanceVariableReadNode.
Definition ast.h:4987
InstanceVariableTargetNode.
Definition ast.h:5016
InstanceVariableWriteNode.
Definition ast.h:5039
IntegerNode.
Definition ast.h:5107
pm_integer_t value
IntegerNode::value.
Definition ast.h:5117
pm_node_t base
The embedded base node.
Definition ast.h:5109
bool negative
Whether or not the integer is negative.
Definition pm_integer.h:42
InterpolatedMatchLastLineNode.
Definition ast.h:5145
InterpolatedRegularExpressionNode.
Definition ast.h:5191
InterpolatedStringNode.
Definition ast.h:5228
pm_node_t base
The embedded base node.
Definition ast.h:5230
pm_location_t opening_loc
InterpolatedStringNode::opening_loc.
Definition ast.h:5236
InterpolatedSymbolNode.
Definition ast.h:5261
pm_node_t base
The embedded base node.
Definition ast.h:5263
InterpolatedXStringNode.
Definition ast.h:5294
pm_location_t opening_loc
InterpolatedXStringNode::opening_loc.
Definition ast.h:5302
pm_node_t base
The embedded base node.
Definition ast.h:5296
struct pm_node_list parts
InterpolatedXStringNode::parts.
Definition ast.h:5307
KeywordHashNode.
Definition ast.h:5366
void(* callback)(void *data, pm_parser_t *parser, pm_token_t *token)
This is the callback that is called when a token is lexed.
Definition parser.h:518
void * data
This opaque pointer is used to provide whatever information the user deemed necessary to the callback...
Definition parser.h:512
When lexing Ruby source, the lexer has a small amount of state to tell which kind of token it is curr...
Definition parser.h:109
uint8_t terminator
This is the terminator of the list literal.
Definition parser.h:165
size_t nesting
This keeps track of the nesting level of the list.
Definition parser.h:153
bool interpolation
Whether or not interpolation is allowed in this list.
Definition parser.h:156
uint8_t incrementor
When lexing a list, it takes into account balancing the terminator if the terminator is one of (),...
Definition parser.h:162
uint8_t breakpoints[11]
This is the character set that should be used to delimit the tokens within the list.
Definition parser.h:171
enum pm_lex_mode::@96 mode
The type of this lex mode.
pm_heredoc_lex_mode_t base
All of the data necessary to lex a heredoc.
Definition parser.h:233
bool line_continuation
True if the previous token ended with a line continuation.
Definition parser.h:249
struct pm_lex_mode * prev
The previous lex state so that it knows how to pop.
Definition parser.h:254
bool label_allowed
Whether or not at the end of the string we should allow a :, which would indicate this was a dynamic ...
Definition parser.h:208
const uint8_t * next_start
This is the pointer to the character where lexing should resume once the heredoc has been completely ...
Definition parser.h:239
union pm_lex_mode::@97 as
The data associated with this type of lex mode.
size_t * common_whitespace
This is used to track the amount of common whitespace on each line so that we know how much to dedent...
Definition parser.h:246
int32_t line
The line number.
This struct represents an abstract linked list that provides common functionality.
Definition pm_list.h:46
struct pm_list_node * next
A pointer to the next node in the list.
Definition pm_list.h:48
This represents the overall linked list.
Definition pm_list.h:55
pm_list_node_t * head
A pointer to the head of the list.
Definition pm_list.h:60
size_t size
The size of the list.
Definition pm_list.h:57
This tracks an individual local variable in a certain lexical context, as well as the number of times...
Definition parser.h:532
pm_constant_id_t name
The name of the local variable.
Definition parser.h:534
pm_location_t location
The location of the local variable in the source.
Definition parser.h:537
uint32_t hash
The hash of the local variable.
Definition parser.h:546
uint32_t index
The index of the local variable in the local table.
Definition parser.h:540
uint32_t reads
The number of times the local variable is read.
Definition parser.h:543
LocalVariableReadNode.
Definition ast.h:5608
uint32_t depth
LocalVariableReadNode::depth.
Definition ast.h:5639
pm_constant_id_t name
LocalVariableReadNode::name.
Definition ast.h:5626
LocalVariableTargetNode.
Definition ast.h:5654
LocalVariableWriteNode.
Definition ast.h:5682
uint32_t depth
LocalVariableWriteNode::depth.
Definition ast.h:5709
pm_constant_id_t name
LocalVariableWriteNode::name.
Definition ast.h:5696
This is a set of local variables in a certain lexical context (method, class, module,...
Definition parser.h:554
pm_local_t * locals
The nullable allocated memory for the local variables in the set.
Definition parser.h:562
uint32_t capacity
The capacity of the local variables set.
Definition parser.h:559
uint32_t size
The number of local variables in the set.
Definition parser.h:556
This represents a range of bytes in the source string to which a node or token corresponds.
Definition ast.h:544
const uint8_t * start
A pointer to the start location of the range in the source.
Definition ast.h:546
const uint8_t * end
A pointer to the end location of the range in the source.
Definition ast.h:549
This is a node in the linked list of magic comments that we've found while parsing.
Definition parser.h:475
MatchLastLineNode.
Definition ast.h:5774
MatchWriteNode.
Definition ast.h:5878
struct pm_node_list targets
MatchWriteNode::targets.
Definition ast.h:5891
MissingNode.
Definition ast.h:5903
MultiTargetNode.
Definition ast.h:5974
pm_node_t base
The embedded base node.
Definition ast.h:5976
pm_location_t lparen_loc
MultiTargetNode::lparen_loc.
Definition ast.h:6032
struct pm_node_list lefts
MultiTargetNode::lefts.
Definition ast.h:5992
pm_location_t rparen_loc
MultiTargetNode::rparen_loc.
Definition ast.h:6042
MultiWriteNode.
Definition ast.h:6057
size_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
A list of nodes in the source, most often used for lists of children.
Definition ast.h:557
size_t size
The number of nodes in the list.
Definition ast.h:559
struct pm_node ** nodes
The nodes in the list.
Definition ast.h:565
This is the base structure that represents a node in the syntax tree.
Definition ast.h:1068
pm_node_type_t type
This represents the type of the node.
Definition ast.h:1073
pm_node_flags_t flags
This represents any flags on the node.
Definition ast.h:1079
pm_location_t location
This is the location of the node in the source.
Definition ast.h:1091
OptionalParameterNode.
Definition ast.h:6330
A scope of locals surrounding the code that is being parsed.
Definition options.h:36
size_t locals_count
The number of locals in the scope.
Definition options.h:38
uint8_t forwarding
Flags for the set of forwarding parameters in this scope.
Definition options.h:44
The options that can be passed to the parser.
Definition options.h:98
uint8_t command_line
A bitset of the various options that were set on the command line.
Definition options.h:147
void * shebang_callback_data
Any additional data that should be passed along to the shebang callback if one was set.
Definition options.h:109
bool encoding_locked
Whether or not the encoding magic comments should be respected.
Definition options.h:163
bool main_script
When the file being parsed is the main script, the shebang will be considered for command-line flags ...
Definition options.h:170
pm_string_t encoding
The name of the encoding that the source file is in.
Definition options.h:124
int32_t line
The line within the file that the parse starts on.
Definition options.h:118
pm_options_shebang_callback_t shebang_callback
The callback to call when additional switches are found in a shebang comment.
Definition options.h:103
int8_t frozen_string_literal
Whether or not the frozen string literal option has been set.
Definition options.h:156
bool partial_script
When the file being parsed is considered a "partial" script, jumps will not be marked as errors if th...
Definition options.h:180
size_t scopes_count
The number of scopes surrounding the code that is being parsed.
Definition options.h:129
pm_string_t filepath
The name of the file that is currently being parsed.
Definition options.h:112
pm_options_version_t version
The version of prism that we should be parsing with.
Definition options.h:144
OrNode.
Definition ast.h:6368
struct pm_node * left
OrNode::left.
Definition ast.h:6384
struct pm_node * right
OrNode::right.
Definition ast.h:6397
ParametersNode.
Definition ast.h:6423
struct pm_node * rest
ParametersNode::rest.
Definition ast.h:6441
struct pm_block_parameter_node * block
ParametersNode::block.
Definition ast.h:6461
pm_node_t base
The embedded base node.
Definition ast.h:6425
struct pm_node * keyword_rest
ParametersNode::keyword_rest.
Definition ast.h:6456
ParenthesesNode.
Definition ast.h:6479
struct pm_node * body
ParenthesesNode::body.
Definition ast.h:6487
This struct represents the overall parser.
Definition parser.h:640
const pm_encoding_t * explicit_encoding
When a string-like expression is being lexed, any byte or escape sequence that resolves to a value wh...
Definition parser.h:840
pm_lex_state_t lex_state
The current state of the lexer.
Definition parser.h:649
uint8_t command_line
The command line flags given from the options.
Definition parser.h:859
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
Definition parser.h:755
bool partial_script
Whether or not we are parsing a "partial" script, which is a script that will be evaluated in the con...
Definition parser.h:882
bool pattern_matching_newlines
This flag indicates that we are currently parsing a pattern matching expression and impacts that calc...
Definition parser.h:909
const uint8_t * end
The pointer to the end of the source.
Definition parser.h:694
bool recovering
Whether or not we're currently recovering from a syntax error.
Definition parser.h:888
pm_node_flags_t integer_base
We want to add a flag to integer nodes that indicates their base.
Definition parser.h:797
bool warn_mismatched_indentation
By default, Ruby always warns about mismatched indentation.
Definition parser.h:930
pm_constant_pool_t constant_pool
This constant pool keeps all of the constants defined throughout the file so that we can reference th...
Definition parser.h:786
bool in_keyword_arg
This flag indicates that we are currently parsing a keyword argument.
Definition parser.h:912
const uint8_t * next_start
This is a special field set on the parser when we need the parser to jump to a specific location when...
Definition parser.h:707
pm_static_literals_t * current_hash_keys
The hash keys for the hash that is currently being parsed.
Definition parser.h:749
pm_list_t magic_comment_list
The list of magic comments that have been found while parsing.
Definition parser.h:721
int lambda_enclosure_nesting
Used to temporarily track the nesting of enclosures to determine if a { is the beginning of a lambda ...
Definition parser.h:658
pm_lex_callback_t * lex_callback
This is an optional callback that can be attached to the parser that will be called whenever a new to...
Definition parser.h:774
pm_options_version_t version
The version of prism that we should use to parse.
Definition parser.h:856
pm_token_t previous
The previous token we were considering.
Definition parser.h:697
pm_string_t current_string
This string is used to pass information from the lexer to the parser.
Definition parser.h:803
bool parsing_eval
Whether or not we are parsing an eval string.
Definition parser.h:875
bool current_regular_expression_ascii_only
True if the current regular expression being lexed contains only ASCII characters.
Definition parser.h:924
bool encoding_changed
Whether or not the encoding has been changed by a magic comment.
Definition parser.h:903
pm_location_t data_loc
An optional location that represents the location of the END marker and the rest of the content of th...
Definition parser.h:728
pm_context_node_t * current_context
The current parsing context.
Definition parser.h:740
const uint8_t * start
The pointer to the start of the source.
Definition parser.h:691
int enclosure_nesting
Tracks the current nesting of (), [], and {}.
Definition parser.h:652
pm_list_t error_list
The list of errors that have been found while parsing.
Definition parser.h:734
int8_t frozen_string_literal
Whether or not we have found a frozen_string_literal magic comment with a true or false value.
Definition parser.h:869
pm_node_list_t * current_block_exits
When parsing block exits (e.g., break, next, redo), we need to validate that they are in correct cont...
Definition parser.h:853
const uint8_t * encoding_comment_start
This pointer indicates where a comment must start if it is to be considered an encoding comment.
Definition parser.h:768
pm_lex_mode_t stack[PM_LEX_STACK_SIZE]
The stack of lexer modes.
Definition parser.h:684
pm_list_t warning_list
The list of warnings that have been found while parsing.
Definition parser.h:731
const uint8_t * heredoc_end
This field indicates the end of a heredoc whose identifier was found on the current line.
Definition parser.h:715
int brace_nesting
Used to track the nesting of braces to ensure we get the correct value when we are interpolating bloc...
Definition parser.h:664
pm_encoding_changed_callback_t encoding_changed_callback
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:762
int32_t start_line
The line number at the start of the parse.
Definition parser.h:809
bool encoding_locked
This is very specialized behavior for when you want to parse in a context that does not respect encod...
Definition parser.h:896
struct pm_parser::@102 lex_modes
A stack of lex modes.
pm_lex_mode_t * current
The current mode of the lexer.
Definition parser.h:681
pm_list_t comment_list
The list of comments that have been found while parsing.
Definition parser.h:718
size_t index
The current index into the lexer mode stack.
Definition parser.h:687
pm_string_t filepath
This is the path of the file being parsed.
Definition parser.h:780
pm_scope_t * current_scope
The current local scope.
Definition parser.h:737
bool command_start
Whether or not we're at the beginning of a command.
Definition parser.h:885
pm_newline_list_t newline_list
This is the list of newline offsets in the source file.
Definition parser.h:789
bool semantic_token_seen
Whether or not the parser has seen a token that has semantic meaning (i.e., a token that is not a com...
Definition parser.h:918
uint32_t node_id
The next node identifier that will be assigned.
Definition parser.h:646
RangeNode.
Definition ast.h:6685
struct pm_node * right
RangeNode::right.
Definition ast.h:6715
struct pm_node * left
RangeNode::left.
Definition ast.h:6701
RationalNode.
Definition ast.h:6743
pm_node_t base
The embedded base node.
Definition ast.h:6745
pm_integer_t numerator
RationalNode::numerator.
Definition ast.h:6755
In order to properly set a regular expression's encoding and to validate the byte sequence for the un...
Definition prism.c:10389
pm_buffer_t regexp_buffer
The buffer holding the regexp source.
Definition prism.c:10394
pm_token_buffer_t base
The embedded base buffer.
Definition prism.c:10391
RegularExpressionNode.
Definition ast.h:6810
pm_node_t base
The embedded base node.
Definition ast.h:6812
pm_string_t unescaped
RegularExpressionNode::unescaped.
Definition ast.h:6833
RequiredParameterNode.
Definition ast.h:6884
RescueModifierNode.
Definition ast.h:6907
struct pm_node * rescue_expression
RescueModifierNode::rescue_expression.
Definition ast.h:6925
RescueNode.
Definition ast.h:6945
struct pm_rescue_node * subsequent
RescueNode::subsequent.
Definition ast.h:6983
pm_location_t then_keyword_loc
RescueNode::then_keyword_loc.
Definition ast.h:6973
pm_node_t base
The embedded base node.
Definition ast.h:6947
This struct represents a node in a linked list of scopes.
Definition parser.h:580
struct pm_scope * previous
A pointer to the previous scope in the linked list.
Definition parser.h:582
pm_node_list_t implicit_parameters
This is a list of the implicit parameters contained within the block.
Definition parser.h:593
pm_shareable_constant_value_t shareable_constant
The current state of constant shareability for this scope.
Definition parser.h:620
pm_locals_t locals
The IDs of the locals in the given scope.
Definition parser.h:585
pm_scope_parameters_t parameters
This is a bitfield that indicates the parameters that are being used in this scope.
Definition parser.h:614
bool closed
A boolean indicating whether or not this scope can see into its parent.
Definition parser.h:626
SplatNode.
Definition ast.h:7245
struct pm_node * expression
SplatNode::expression.
Definition ast.h:7258
StatementsNode.
Definition ast.h:7273
struct pm_node_list body
StatementsNode::body.
Definition ast.h:7281
Certain sets of nodes (hash keys and when clauses) check for duplicate nodes to alert the user of pot...
StringNode.
Definition ast.h:7308
pm_node_t base
The embedded base node.
Definition ast.h:7310
pm_string_t unescaped
StringNode::unescaped.
Definition ast.h:7331
pm_location_t closing_loc
StringNode::closing_loc.
Definition ast.h:7326
pm_location_t opening_loc
StringNode::opening_loc.
Definition ast.h:7316
A generic string type that can have various ownership semantics.
Definition pm_string.h:33
const uint8_t * source
A pointer to the start of the string.
Definition pm_string.h:35
size_t length
The length of the string in bytes of memory.
Definition pm_string.h:38
enum pm_string_t::@103 type
The type of the string.
SymbolNode.
Definition ast.h:7400
pm_location_t value_loc
SymbolNode::value_loc.
Definition ast.h:7413
pm_string_t unescaped
SymbolNode::unescaped.
Definition ast.h:7423
When we're lexing certain types (strings, symbols, lists, etc.) we have string content associated wit...
Definition prism.c:10363
pm_buffer_t buffer
The buffer that we're using to keep track of the string content.
Definition prism.c:10368
const uint8_t * cursor
The cursor into the source string that points to how far we have currently copied into the buffer.
Definition prism.c:10374
This struct represents a token in the Ruby source.
Definition ast.h:529
const uint8_t * end
A pointer to the end location of the token in the source.
Definition ast.h:537
const uint8_t * start
A pointer to the start location of the token in the source.
Definition ast.h:534
pm_token_type_t type
The type of the token.
Definition ast.h:531
UndefNode.
Definition ast.h:7456
UnlessNode.
Definition ast.h:7487
struct pm_statements_node * statements
UnlessNode::statements.
Definition ast.h:7537
struct pm_else_node * else_clause
UnlessNode::else_clause.
Definition ast.h:7547
WhenNode.
Definition ast.h:7623
XStringNode.
Definition ast.h:7714