Ruby 4.1.0dev (2026-03-16 revision 2fc3013e7a06cb667a0aa31df9e6f6e0f89de4be)
prism.c
1#include "prism.h"
2#include "prism/node_new.h"
3
7const char *
8pm_version(void) {
9 return PRISM_VERSION;
10}
11
16#define PM_TAB_WHITESPACE_SIZE 8
17
18// Macros for min/max.
19#define MIN(a,b) (((a)<(b))?(a):(b))
20#define MAX(a,b) (((a)>(b))?(a):(b))
21
22/******************************************************************************/
23/* Helpful AST-related macros */
24/******************************************************************************/
25
26#define U32(value_) ((uint32_t) (value_))
27
28#define FL PM_NODE_FLAGS
29#define UP PM_NODE_UPCAST
30
31#define PM_LOCATION_START(location_) ((location_)->start)
32#define PM_LOCATION_END(location_) ((location_)->start + (location_)->length)
33
34#define PM_TOKEN_START(parser_, token_) U32((token_)->start - (parser_)->start)
35#define PM_TOKEN_END(parser_, token_) U32((token_)->end - (parser_)->start)
36#define PM_TOKEN_LENGTH(token_) U32((token_)->end - (token_)->start)
37#define PM_TOKENS_LENGTH(left_, right_) U32((right_)->end - (left_)->start)
38
39#define PM_NODE_START(node_) (UP(node_)->location.start)
40#define PM_NODE_LENGTH(node_) (UP(node_)->location.length)
41#define PM_NODE_END(node_) (UP(node_)->location.start + UP(node_)->location.length)
42#define PM_NODES_LENGTH(left_, right_) (PM_NODE_END(right_) - PM_NODE_START(left_))
43
44#define PM_TOKEN_NODE_LENGTH(parser_, token_, node_) (PM_NODE_END(node_) - PM_TOKEN_START(parser_, token_))
45#define PM_NODE_TOKEN_LENGTH(parser_, node_, token_) (PM_TOKEN_END(parser_, token_) - PM_NODE_START(node_))
46
47#define PM_NODE_START_SET_NODE(left_, right_) (PM_NODE_START(left_) = PM_NODE_START(right_))
48#define PM_NODE_START_SET_TOKEN(parser_, node_, token_) (PM_NODE_START(node_) = PM_TOKEN_START(parser_, token_))
49#define PM_NODE_LENGTH_SET_NODE(left_, right_) (PM_NODE_LENGTH(left_) = PM_NODE_END(right_) - PM_NODE_START(left_))
50#define PM_NODE_LENGTH_SET_TOKEN(parser_, node_, token_) (PM_NODE_LENGTH(node_) = PM_TOKEN_END(parser_, token_) - PM_NODE_START(node_))
51#define PM_NODE_LENGTH_SET_LOCATION(node_, location_) (PM_NODE_LENGTH(node_) = PM_LOCATION_END(location_) - PM_NODE_START(node_))
52
53#define PM_LOCATION_INIT(start_, length_) ((pm_location_t) { .start = (start_), .length = (length_) })
54#define PM_LOCATION_INIT_UNSET PM_LOCATION_INIT(0, 0)
55#define PM_LOCATION_INIT_TOKEN(parser_, token_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_))
56#define PM_LOCATION_INIT_NODE(node_) UP(node_)->location
57
58#define PM_LOCATION_INIT_TOKENS(parser_, left_, right_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, left_), PM_TOKENS_LENGTH(left_, right_))
59#define PM_LOCATION_INIT_NODES(left_, right_) PM_LOCATION_INIT(PM_NODE_START(left_), PM_NODES_LENGTH(left_, right_))
60#define PM_LOCATION_INIT_TOKEN_NODE(parser_, token_, node_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, token_), PM_TOKEN_NODE_LENGTH(parser_, token_, node_))
61#define PM_LOCATION_INIT_NODE_TOKEN(parser_, node_, token_) PM_LOCATION_INIT(PM_NODE_START(node_), PM_NODE_TOKEN_LENGTH(parser_, node_, token_))
62
63#define TOK2LOC(parser_, token_) PM_LOCATION_INIT_TOKEN(parser_, token_)
64#define NTOK2LOC(parser_, token_) ((token_) == NULL ? PM_LOCATION_INIT_UNSET : TOK2LOC(parser_, token_))
65#define NTOK2PTR(token_) ((token_).start == NULL ? NULL : &(token_))
66
67/******************************************************************************/
68/* Lex mode manipulations */
69/******************************************************************************/
70
75static inline uint8_t
76lex_mode_incrementor(const uint8_t start) {
77 switch (start) {
78 case '(':
79 case '[':
80 case '{':
81 case '<':
82 return start;
83 default:
84 return '\0';
85 }
86}
87
92static inline uint8_t
93lex_mode_terminator(const uint8_t start) {
94 switch (start) {
95 case '(':
96 return ')';
97 case '[':
98 return ']';
99 case '{':
100 return '}';
101 case '<':
102 return '>';
103 default:
104 return start;
105 }
106}
107
113static bool
114lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
115 lex_mode.prev = parser->lex_modes.current;
116 parser->lex_modes.index++;
117
118 if (parser->lex_modes.index > PM_LEX_STACK_SIZE - 1) {
119 parser->lex_modes.current = (pm_lex_mode_t *) xmalloc(sizeof(pm_lex_mode_t));
120 if (parser->lex_modes.current == NULL) return false;
121
122 *parser->lex_modes.current = lex_mode;
123 } else {
124 parser->lex_modes.stack[parser->lex_modes.index] = lex_mode;
125 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
126 }
127
128 return true;
129}
130
134static inline bool
135lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
136 uint8_t incrementor = lex_mode_incrementor(delimiter);
137 uint8_t terminator = lex_mode_terminator(delimiter);
138
139 pm_lex_mode_t lex_mode = {
140 .mode = PM_LEX_LIST,
141 .as.list = {
142 .nesting = 0,
143 .interpolation = interpolation,
144 .incrementor = incrementor,
145 .terminator = terminator
146 }
147 };
148
149 // These are the places where we need to split up the content of the list.
150 // We'll use strpbrk to find the first of these characters.
151 uint8_t *breakpoints = lex_mode.as.list.breakpoints;
152 memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
153 size_t index = 7;
154
155 // Now we'll add the terminator to the list of breakpoints. If the
156 // terminator is not already a NULL byte, add it to the list.
157 if (terminator != '\0') {
158 breakpoints[index++] = terminator;
159 }
160
161 // If interpolation is allowed, then we're going to check for the #
162 // character. Otherwise we'll only look for escapes and the terminator.
163 if (interpolation) {
164 breakpoints[index++] = '#';
165 }
166
167 // If there is an incrementor, then we'll check for that as well.
168 if (incrementor != '\0') {
169 breakpoints[index++] = incrementor;
170 }
171
172 parser->explicit_encoding = NULL;
173 return lex_mode_push(parser, lex_mode);
174}
175
181static inline bool
182lex_mode_push_list_eof(pm_parser_t *parser) {
183 return lex_mode_push_list(parser, false, '\0');
184}
185
189static inline bool
190lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
191 pm_lex_mode_t lex_mode = {
192 .mode = PM_LEX_REGEXP,
193 .as.regexp = {
194 .nesting = 0,
195 .incrementor = incrementor,
196 .terminator = terminator
197 }
198 };
199
200 // These are the places where we need to split up the content of the
201 // regular expression. We'll use strpbrk to find the first of these
202 // characters.
203 uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
204 memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
205 size_t index = 4;
206
207 // First we'll add the terminator.
208 if (terminator != '\0') {
209 breakpoints[index++] = terminator;
210 }
211
212 // Next, if there is an incrementor, then we'll check for that as well.
213 if (incrementor != '\0') {
214 breakpoints[index++] = incrementor;
215 }
216
217 parser->explicit_encoding = NULL;
218 return lex_mode_push(parser, lex_mode);
219}
220
224static inline bool
225lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
226 pm_lex_mode_t lex_mode = {
227 .mode = PM_LEX_STRING,
228 .as.string = {
229 .nesting = 0,
230 .interpolation = interpolation,
231 .label_allowed = label_allowed,
232 .incrementor = incrementor,
233 .terminator = terminator
234 }
235 };
236
237 // These are the places where we need to split up the content of the
238 // string. We'll use strpbrk to find the first of these characters.
239 uint8_t *breakpoints = lex_mode.as.string.breakpoints;
240 memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
241 size_t index = 3;
242
243 // Now add in the terminator. If the terminator is not already a NULL byte,
244 // then we'll add it.
245 if (terminator != '\0') {
246 breakpoints[index++] = terminator;
247 }
248
249 // If interpolation is allowed, then we're going to check for the #
250 // character. Otherwise we'll only look for escapes and the terminator.
251 if (interpolation) {
252 breakpoints[index++] = '#';
253 }
254
255 // If we have an incrementor, then we'll add that in as a breakpoint as
256 // well.
257 if (incrementor != '\0') {
258 breakpoints[index++] = incrementor;
259 }
260
261 parser->explicit_encoding = NULL;
262 return lex_mode_push(parser, lex_mode);
263}
264
270static inline bool
271lex_mode_push_string_eof(pm_parser_t *parser) {
272 return lex_mode_push_string(parser, false, false, '\0', '\0');
273}
274
280static void
281lex_mode_pop(pm_parser_t *parser) {
282 if (parser->lex_modes.index == 0) {
283 parser->lex_modes.current->mode = PM_LEX_DEFAULT;
284 } else if (parser->lex_modes.index < PM_LEX_STACK_SIZE) {
285 parser->lex_modes.index--;
286 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
287 } else {
288 parser->lex_modes.index--;
289 pm_lex_mode_t *prev = parser->lex_modes.current->prev;
290 xfree_sized(parser->lex_modes.current, sizeof(pm_lex_mode_t));
291 parser->lex_modes.current = prev;
292 }
293}
294
298static inline bool
299lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
300 return parser->lex_state & state;
301}
302
303typedef enum {
304 PM_IGNORED_NEWLINE_NONE = 0,
305 PM_IGNORED_NEWLINE_ALL,
306 PM_IGNORED_NEWLINE_PATTERN
307} pm_ignored_newline_type_t;
308
309static inline pm_ignored_newline_type_t
310lex_state_ignored_p(pm_parser_t *parser) {
311 bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
312
313 if (ignored) {
314 return PM_IGNORED_NEWLINE_ALL;
315 } else if ((parser->lex_state & ~((unsigned int) PM_LEX_STATE_LABEL)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) {
316 return PM_IGNORED_NEWLINE_PATTERN;
317 } else {
318 return PM_IGNORED_NEWLINE_NONE;
319 }
320}
321
322static inline bool
323lex_state_beg_p(pm_parser_t *parser) {
324 return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
325}
326
327static inline bool
328lex_state_arg_p(pm_parser_t *parser) {
329 return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
330}
331
332static inline bool
333lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
334 if (parser->current.end >= parser->end) {
335 return false;
336 }
337 return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
338}
339
340static inline bool
341lex_state_end_p(pm_parser_t *parser) {
342 return lex_state_p(parser, PM_LEX_STATE_END_ANY);
343}
344
348static inline bool
349lex_state_operator_p(pm_parser_t *parser) {
350 return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
351}
352
357static inline void
358lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
359 parser->lex_state = state;
360}
361
362#ifndef PM_DEBUG_LOGGING
367#define PM_DEBUG_LOGGING 0
368#endif
369
370#if PM_DEBUG_LOGGING
371PRISM_ATTRIBUTE_UNUSED static void
372debug_state(pm_parser_t *parser) {
373 fprintf(stderr, "STATE: ");
374 bool first = true;
375
376 if (parser->lex_state == PM_LEX_STATE_NONE) {
377 fprintf(stderr, "NONE\n");
378 return;
379 }
380
381#define CHECK_STATE(state) \
382 if (parser->lex_state & state) { \
383 if (!first) fprintf(stderr, "|"); \
384 fprintf(stderr, "%s", #state); \
385 first = false; \
386 }
387
388 CHECK_STATE(PM_LEX_STATE_BEG)
389 CHECK_STATE(PM_LEX_STATE_END)
390 CHECK_STATE(PM_LEX_STATE_ENDARG)
391 CHECK_STATE(PM_LEX_STATE_ENDFN)
392 CHECK_STATE(PM_LEX_STATE_ARG)
393 CHECK_STATE(PM_LEX_STATE_CMDARG)
394 CHECK_STATE(PM_LEX_STATE_MID)
395 CHECK_STATE(PM_LEX_STATE_FNAME)
396 CHECK_STATE(PM_LEX_STATE_DOT)
397 CHECK_STATE(PM_LEX_STATE_CLASS)
398 CHECK_STATE(PM_LEX_STATE_LABEL)
399 CHECK_STATE(PM_LEX_STATE_LABELED)
400 CHECK_STATE(PM_LEX_STATE_FITEM)
401
402#undef CHECK_STATE
403
404 fprintf(stderr, "\n");
405}
406
407static void
408debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * caller_name, int line_number) {
409 fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
410 debug_state(parser);
411 lex_state_set(parser, state);
412 fprintf(stderr, "Now: ");
413 debug_state(parser);
414 fprintf(stderr, "\n");
415}
416
417#define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
418#endif
419
420/******************************************************************************/
421/* Command-line macro helpers */
422/******************************************************************************/
423
425#define PM_PARSER_COMMAND_LINE_OPTION(parser, option) ((parser)->command_line & (option))
426
428#define PM_PARSER_COMMAND_LINE_OPTION_A(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_A)
429
431#define PM_PARSER_COMMAND_LINE_OPTION_E(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_E)
432
434#define PM_PARSER_COMMAND_LINE_OPTION_L(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_L)
435
437#define PM_PARSER_COMMAND_LINE_OPTION_N(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_N)
438
440#define PM_PARSER_COMMAND_LINE_OPTION_P(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_P)
441
443#define PM_PARSER_COMMAND_LINE_OPTION_X(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_X)
444
445/******************************************************************************/
446/* Diagnostic-related functions */
447/******************************************************************************/
448
452static inline void
453pm_parser_err(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) {
454 pm_diagnostic_list_append(&parser->error_list, start, length, diag_id);
455}
456
461static inline void
462pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
463 pm_parser_err(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id);
464}
465
470static inline void
471pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
472 pm_parser_err_token(parser, &parser->current, diag_id);
473}
474
479static inline void
480pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
481 pm_parser_err_token(parser, &parser->previous, diag_id);
482}
483
488static inline void
489pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
490 pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id);
491}
492
496#define PM_PARSER_ERR_FORMAT(parser_, start_, length_, diag_id_, ...) \
497 pm_diagnostic_list_append_format(&(parser_)->error_list, start_, length_, diag_id_, __VA_ARGS__)
498
503#define PM_PARSER_ERR_NODE_FORMAT(parser_, node_, diag_id_, ...) \
504 PM_PARSER_ERR_FORMAT(parser_, PM_NODE_START(node_), PM_NODE_LENGTH(node_), diag_id_, __VA_ARGS__)
505
510#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser_, node_, diag_id_) \
511 PM_PARSER_ERR_NODE_FORMAT(parser_, node_, diag_id_, (int) PM_NODE_LENGTH(node_), (const char *) (parser_->start + PM_NODE_START(node_)))
512
517#define PM_PARSER_ERR_TOKEN_FORMAT(parser_, token_, diag_id, ...) \
518 PM_PARSER_ERR_FORMAT(parser_, PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_), diag_id, __VA_ARGS__)
519
524#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser_, token_, diag_id_) \
525 PM_PARSER_ERR_TOKEN_FORMAT(parser_, token_, diag_id_, (int) PM_TOKEN_LENGTH(token_), (const char *) (token_)->start)
526
530static inline void
531pm_parser_warn(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) {
532 pm_diagnostic_list_append(&parser->warning_list, start, length, diag_id);
533}
534
539static inline void
540pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
541 pm_parser_warn(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id);
542}
543
548static inline void
549pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
550 pm_parser_warn(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id);
551}
552
557#define PM_PARSER_WARN_FORMAT(parser_, start_, length_, diag_id_, ...) \
558 pm_diagnostic_list_append_format(&(parser_)->warning_list, start_, length_, diag_id_, __VA_ARGS__)
559
564#define PM_PARSER_WARN_TOKEN_FORMAT(parser_, token_, diag_id_, ...) \
565 PM_PARSER_WARN_FORMAT(parser_, PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_), diag_id_, __VA_ARGS__)
566
571#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser_, token_, diag_id_) \
572 PM_PARSER_WARN_TOKEN_FORMAT(parser_, token_, diag_id_, (int) PM_TOKEN_LENGTH(token_), (const char *) (token_)->start)
573
578#define PM_PARSER_WARN_NODE_FORMAT(parser_, node_, diag_id_, ...) \
579 PM_PARSER_WARN_FORMAT(parser_, PM_NODE_START(node_), PM_NODE_LENGTH(node_), diag_id_, __VA_ARGS__)
580
586static void
587pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
588 PM_PARSER_ERR_FORMAT(
589 parser,
590 U32(ident_start - parser->start),
591 U32(ident_length),
592 PM_ERR_HEREDOC_TERM,
593 (int) ident_length,
594 (const char *) ident_start
595 );
596}
597
598/******************************************************************************/
599/* Scope-related functions */
600/******************************************************************************/
601
605static bool
606pm_parser_scope_push(pm_parser_t *parser, bool closed) {
607 pm_scope_t *scope = (pm_scope_t *) xmalloc(sizeof(pm_scope_t));
608 if (scope == NULL) return false;
609
610 *scope = (pm_scope_t) {
611 .previous = parser->current_scope,
612 .locals = { 0 },
613 .parameters = PM_SCOPE_PARAMETERS_NONE,
614 .implicit_parameters = { 0 },
615 .shareable_constant = parser->current_scope == NULL ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
616 .closed = closed
617 };
618
619 parser->current_scope = scope;
620 return true;
621}
622
627static bool
628pm_parser_scope_toplevel_p(pm_parser_t *parser) {
629 pm_scope_t *scope = parser->current_scope;
630
631 do {
632 if (scope->previous == NULL) return true;
633 if (scope->closed) return false;
634 } while ((scope = scope->previous) != NULL);
635
636 assert(false && "unreachable");
637 return true;
638}
639
643static pm_scope_t *
644pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
645 pm_scope_t *scope = parser->current_scope;
646
647 while (depth-- > 0) {
648 assert(scope != NULL);
649 scope = scope->previous;
650 }
651
652 return scope;
653}
654
655typedef enum {
656 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
657 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
658 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
659} pm_scope_forwarding_param_check_result_t;
660
661static pm_scope_forwarding_param_check_result_t
662pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
663 pm_scope_t *scope = parser->current_scope;
664 bool conflict = false;
665
666 while (scope != NULL) {
667 if (scope->parameters & mask) {
668 if (scope->closed) {
669 if (conflict) {
670 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
671 } else {
672 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
673 }
674 }
675
676 conflict = true;
677 }
678
679 if (scope->closed) break;
680 scope = scope->previous;
681 }
682
683 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
684}
685
686static void
687pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
688 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
689 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
690 // Pass.
691 break;
692 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
693 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
694 break;
695 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
696 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
697 break;
698 }
699}
700
701static void
702pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
703 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
704 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
705 // Pass.
706 break;
707 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
708 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
709 break;
710 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
711 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
712 break;
713 }
714}
715
716static void
717pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
718 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
719 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
720 // Pass.
721 break;
722 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
723 // This shouldn't happen, because ... is not allowed in the
724 // declaration of blocks. If we get here, we assume we already have
725 // an error for this.
726 break;
727 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
728 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
729 break;
730 }
731}
732
733static void
734pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
735 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
736 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
737 // Pass.
738 break;
739 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
740 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
741 break;
742 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
743 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
744 break;
745 }
746}
747
752pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
753 return parser->current_scope->shareable_constant;
754}
755
760static void
761pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constant_value_t shareable_constant) {
762 pm_scope_t *scope = parser->current_scope;
763
764 do {
765 scope->shareable_constant = shareable_constant;
766 } while (!scope->closed && (scope = scope->previous) != NULL);
767}
768
769/******************************************************************************/
770/* Local variable-related functions */
771/******************************************************************************/
772
776#define PM_LOCALS_HASH_THRESHOLD 9
777
778static void
779pm_locals_free(pm_locals_t *locals) {
780 if (locals->capacity > 0) {
781 xfree_sized(locals->locals, locals->capacity * sizeof(pm_local_t));
782 }
783}
784
789static uint32_t
790pm_locals_hash(pm_constant_id_t name) {
791 name = ((name >> 16) ^ name) * 0x45d9f3b;
792 name = ((name >> 16) ^ name) * 0x45d9f3b;
793 name = (name >> 16) ^ name;
794 return name;
795}
796
801static void
802pm_locals_resize(pm_locals_t *locals) {
803 uint32_t next_capacity = locals->capacity == 0 ? 4 : (locals->capacity * 2);
804 assert(next_capacity > locals->capacity);
805
806 pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
807 if (next_locals == NULL) abort();
808
809 if (next_capacity < PM_LOCALS_HASH_THRESHOLD) {
810 if (locals->size > 0) {
811 memcpy(next_locals, locals->locals, locals->size * sizeof(pm_local_t));
812 }
813 } else {
814 // If we just switched from a list to a hash, then we need to fill in
815 // the hash values of all of the locals.
816 bool hash_needed = (locals->capacity <= PM_LOCALS_HASH_THRESHOLD);
817 uint32_t mask = next_capacity - 1;
818
819 for (uint32_t index = 0; index < locals->capacity; index++) {
820 pm_local_t *local = &locals->locals[index];
821
822 if (local->name != PM_CONSTANT_ID_UNSET) {
823 if (hash_needed) local->hash = pm_locals_hash(local->name);
824
825 uint32_t hash = local->hash;
826 while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
827 next_locals[hash & mask] = *local;
828 }
829 }
830 }
831
832 pm_locals_free(locals);
833 locals->locals = next_locals;
834 locals->capacity = next_capacity;
835}
836
852static bool
853pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, uint32_t start, uint32_t length, uint32_t reads) {
854 if (locals->size >= (locals->capacity / 4 * 3)) {
855 pm_locals_resize(locals);
856 }
857
858 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
859 for (uint32_t index = 0; index < locals->capacity; index++) {
860 pm_local_t *local = &locals->locals[index];
861
862 if (local->name == PM_CONSTANT_ID_UNSET) {
863 *local = (pm_local_t) {
864 .name = name,
865 .location = { .start = start, .length = length },
866 .index = locals->size++,
867 .reads = reads,
868 .hash = 0
869 };
870 return true;
871 } else if (local->name == name) {
872 return false;
873 }
874 }
875 } else {
876 uint32_t mask = locals->capacity - 1;
877 uint32_t hash = pm_locals_hash(name);
878 uint32_t initial_hash = hash;
879
880 do {
881 pm_local_t *local = &locals->locals[hash & mask];
882
883 if (local->name == PM_CONSTANT_ID_UNSET) {
884 *local = (pm_local_t) {
885 .name = name,
886 .location = { .start = start, .length = length },
887 .index = locals->size++,
888 .reads = reads,
889 .hash = initial_hash
890 };
891 return true;
892 } else if (local->name == name) {
893 return false;
894 } else {
895 hash++;
896 }
897 } while ((hash & mask) != initial_hash);
898 }
899
900 assert(false && "unreachable");
901 return true;
902}
903
908static uint32_t
909pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
910 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
911 for (uint32_t index = 0; index < locals->size; index++) {
912 pm_local_t *local = &locals->locals[index];
913 if (local->name == name) return index;
914 }
915 } else {
916 uint32_t mask = locals->capacity - 1;
917 uint32_t hash = pm_locals_hash(name);
918 uint32_t initial_hash = hash & mask;
919
920 do {
921 pm_local_t *local = &locals->locals[hash & mask];
922
923 if (local->name == PM_CONSTANT_ID_UNSET) {
924 return UINT32_MAX;
925 } else if (local->name == name) {
926 return hash & mask;
927 } else {
928 hash++;
929 }
930 } while ((hash & mask) != initial_hash);
931 }
932
933 return UINT32_MAX;
934}
935
940static void
941pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
942 uint32_t index = pm_locals_find(locals, name);
943 assert(index != UINT32_MAX);
944
945 pm_local_t *local = &locals->locals[index];
946 assert(local->reads < UINT32_MAX);
947
948 local->reads++;
949}
950
955static void
956pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
957 uint32_t index = pm_locals_find(locals, name);
958 assert(index != UINT32_MAX);
959
960 pm_local_t *local = &locals->locals[index];
961 assert(local->reads > 0);
962
963 local->reads--;
964}
965
969static uint32_t
970pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
971 uint32_t index = pm_locals_find(locals, name);
972 assert(index != UINT32_MAX);
973
974 return locals->locals[index].reads;
975}
976
985static void
986pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
987 pm_constant_id_list_init_capacity(parser->arena, list, locals->size);
988
989 // If we're still below the threshold for switching to a hash, then we only
990 // need to loop over the locals until we hit the size because the locals are
991 // stored in a list.
992 uint32_t capacity = locals->capacity < PM_LOCALS_HASH_THRESHOLD ? locals->size : locals->capacity;
993
994 // We will only warn for unused variables if we're not at the top level, or
995 // if we're parsing a file outside of eval or -e.
996 bool warn_unused = !toplevel || (!parser->parsing_eval && !PM_PARSER_COMMAND_LINE_OPTION_E(parser));
997
998 for (uint32_t index = 0; index < capacity; index++) {
999 pm_local_t *local = &locals->locals[index];
1000
1001 if (local->name != PM_CONSTANT_ID_UNSET) {
1002 pm_constant_id_list_insert(list, (size_t) local->index, local->name);
1003
1004 if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_line_offset_list_line(&parser->line_offsets, local->location.start, parser->start_line) >= 0))) {
1005 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
1006
1007 if (constant->length >= 1 && *constant->start != '_') {
1008 PM_PARSER_WARN_FORMAT(
1009 parser,
1010 local->location.start,
1011 local->location.length,
1012 PM_WARN_UNUSED_LOCAL_VARIABLE,
1013 (int) constant->length,
1014 (const char *) constant->start
1015 );
1016 }
1017 }
1018 }
1019 }
1020}
1021
1022/******************************************************************************/
1023/* Node-related functions */
1024/******************************************************************************/
1025
1029static inline pm_constant_id_t
1030pm_parser_constant_id_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
1031 return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
1032}
1033
1037static inline pm_constant_id_t
1038pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
1039 return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
1040}
1041
1045static inline pm_constant_id_t
1046pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
1047 return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length);
1048}
1049
1053static inline pm_constant_id_t
1054pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1055 return pm_parser_constant_id_raw(parser, token->start, token->end);
1056}
1057
1062#define PM_CASE_VOID_VALUE PM_RETURN_NODE: case PM_BREAK_NODE: case PM_NEXT_NODE: \
1063 case PM_REDO_NODE: case PM_RETRY_NODE: case PM_MATCH_REQUIRED_NODE
1064
1070static pm_node_t *
1071pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
1072 pm_node_t *void_node = NULL;
1073
1074 while (node != NULL) {
1075 switch (PM_NODE_TYPE(node)) {
1076 case PM_CASE_VOID_VALUE:
1077 return void_node != NULL ? void_node : node;
1078 case PM_MATCH_PREDICATE_NODE:
1079 return NULL;
1080 case PM_BEGIN_NODE: {
1081 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1082
1083 if (cast->ensure_clause != NULL) {
1084 if (cast->rescue_clause != NULL) {
1085 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->rescue_clause));
1086 if (vn != NULL) return vn;
1087 }
1088
1089 if (cast->statements != NULL) {
1090 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1091 if (vn != NULL) return vn;
1092 }
1093
1094 node = UP(cast->ensure_clause);
1095 } else if (cast->rescue_clause != NULL) {
1096 // https://bugs.ruby-lang.org/issues/21669
1097 if (cast->else_clause == NULL || parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
1098 if (cast->statements == NULL) return NULL;
1099
1100 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1101 if (vn == NULL) return NULL;
1102 if (void_node == NULL) void_node = vn;
1103 }
1104
1105 for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
1106 pm_node_t *vn = pm_check_value_expression(parser, UP(rescue_clause->statements));
1107
1108 if (vn == NULL) {
1109 // https://bugs.ruby-lang.org/issues/21669
1110 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
1111 return NULL;
1112 }
1113 void_node = NULL;
1114 break;
1115 }
1116 }
1117
1118 if (cast->else_clause != NULL) {
1119 node = UP(cast->else_clause);
1120
1121 // https://bugs.ruby-lang.org/issues/21669
1122 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
1123 pm_node_t *vn = pm_check_value_expression(parser, node);
1124 if (vn != NULL) return vn;
1125 }
1126 } else {
1127 return void_node;
1128 }
1129 } else {
1130 node = UP(cast->statements);
1131 }
1132
1133 break;
1134 }
1135 case PM_CASE_NODE: {
1136 // https://bugs.ruby-lang.org/issues/21669
1137 if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
1138 return NULL;
1139 }
1140
1141 pm_case_node_t *cast = (pm_case_node_t *) node;
1142 if (cast->else_clause == NULL) return NULL;
1143
1144 pm_node_t *condition;
1145 PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
1146 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
1147
1148 pm_when_node_t *cast = (pm_when_node_t *) condition;
1149 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1150 if (vn == NULL) return NULL;
1151 if (void_node == NULL) void_node = vn;
1152 }
1153
1154 node = UP(cast->else_clause);
1155 break;
1156 }
1157 case PM_CASE_MATCH_NODE: {
1158 // https://bugs.ruby-lang.org/issues/21669
1159 if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
1160 return NULL;
1161 }
1162
1164 if (cast->else_clause == NULL) return NULL;
1165
1166 pm_node_t *condition;
1167 PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
1168 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
1169
1170 pm_in_node_t *cast = (pm_in_node_t *) condition;
1171 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1172 if (vn == NULL) return NULL;
1173 if (void_node == NULL) void_node = vn;
1174 }
1175
1176 node = UP(cast->else_clause);
1177 break;
1178 }
1179 case PM_ENSURE_NODE: {
1180 pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
1181 node = UP(cast->statements);
1182 break;
1183 }
1184 case PM_PARENTHESES_NODE: {
1186 node = UP(cast->body);
1187 break;
1188 }
1189 case PM_STATEMENTS_NODE: {
1191
1192 // https://bugs.ruby-lang.org/issues/21669
1193 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
1194 pm_node_t *body_part;
1195 PM_NODE_LIST_FOREACH(&cast->body, index, body_part) {
1196 switch (PM_NODE_TYPE(body_part)) {
1197 case PM_CASE_VOID_VALUE:
1198 if (void_node == NULL) {
1199 void_node = body_part;
1200 }
1201 return void_node;
1202 default: break;
1203 }
1204 }
1205 }
1206
1207 node = cast->body.nodes[cast->body.size - 1];
1208 break;
1209 }
1210 case PM_IF_NODE: {
1211 pm_if_node_t *cast = (pm_if_node_t *) node;
1212 if (cast->statements == NULL || cast->subsequent == NULL) {
1213 return NULL;
1214 }
1215 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1216 if (vn == NULL) {
1217 return NULL;
1218 }
1219 if (void_node == NULL) {
1220 void_node = vn;
1221 }
1222 node = cast->subsequent;
1223 break;
1224 }
1225 case PM_UNLESS_NODE: {
1226 pm_unless_node_t *cast = (pm_unless_node_t *) node;
1227 if (cast->statements == NULL || cast->else_clause == NULL) {
1228 return NULL;
1229 }
1230 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1231 if (vn == NULL) {
1232 return NULL;
1233 }
1234 if (void_node == NULL) {
1235 void_node = vn;
1236 }
1237 node = UP(cast->else_clause);
1238 break;
1239 }
1240 case PM_ELSE_NODE: {
1241 pm_else_node_t *cast = (pm_else_node_t *) node;
1242 node = UP(cast->statements);
1243 break;
1244 }
1245 case PM_AND_NODE: {
1246 pm_and_node_t *cast = (pm_and_node_t *) node;
1247 node = cast->left;
1248 break;
1249 }
1250 case PM_OR_NODE: {
1251 pm_or_node_t *cast = (pm_or_node_t *) node;
1252 node = cast->left;
1253 break;
1254 }
1255 case PM_LOCAL_VARIABLE_WRITE_NODE: {
1257
1258 pm_scope_t *scope = parser->current_scope;
1259 for (uint32_t depth = 0; depth < cast->depth; depth++) scope = scope->previous;
1260
1261 pm_locals_read(&scope->locals, cast->name);
1262 return NULL;
1263 }
1264 default:
1265 return NULL;
1266 }
1267 }
1268
1269 return NULL;
1270}
1271
1272static inline void
1273pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
1274 pm_node_t *void_node = pm_check_value_expression(parser, node);
1275 if (void_node != NULL) {
1276 pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
1277 }
1278}
1279
1283static void
1284pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
1285 const char *type = NULL;
1286 int length = 0;
1287
1288 switch (PM_NODE_TYPE(node)) {
1289 case PM_BACK_REFERENCE_READ_NODE:
1290 case PM_CLASS_VARIABLE_READ_NODE:
1291 case PM_GLOBAL_VARIABLE_READ_NODE:
1292 case PM_INSTANCE_VARIABLE_READ_NODE:
1293 case PM_LOCAL_VARIABLE_READ_NODE:
1294 case PM_NUMBERED_REFERENCE_READ_NODE:
1295 type = "a variable";
1296 length = 10;
1297 break;
1298 case PM_CALL_NODE: {
1299 const pm_call_node_t *cast = (const pm_call_node_t *) node;
1300 if (cast->call_operator_loc.length > 0 || cast->message_loc.length == 0) break;
1301
1302 const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
1303 switch (message->length) {
1304 case 1:
1305 switch (message->start[0]) {
1306 case '+':
1307 case '-':
1308 case '*':
1309 case '/':
1310 case '%':
1311 case '|':
1312 case '^':
1313 case '&':
1314 case '>':
1315 case '<':
1316 type = (const char *) message->start;
1317 length = 1;
1318 break;
1319 }
1320 break;
1321 case 2:
1322 switch (message->start[1]) {
1323 case '=':
1324 if (message->start[0] == '<' || message->start[0] == '>' || message->start[0] == '!' || message->start[0] == '=') {
1325 type = (const char *) message->start;
1326 length = 2;
1327 }
1328 break;
1329 case '@':
1330 if (message->start[0] == '+' || message->start[0] == '-') {
1331 type = (const char *) message->start;
1332 length = 2;
1333 }
1334 break;
1335 case '*':
1336 if (message->start[0] == '*') {
1337 type = (const char *) message->start;
1338 length = 2;
1339 }
1340 break;
1341 }
1342 break;
1343 case 3:
1344 if (memcmp(message->start, "<=>", 3) == 0) {
1345 type = "<=>";
1346 length = 3;
1347 }
1348 break;
1349 }
1350
1351 break;
1352 }
1353 case PM_CONSTANT_PATH_NODE:
1354 type = "::";
1355 length = 2;
1356 break;
1357 case PM_CONSTANT_READ_NODE:
1358 type = "a constant";
1359 length = 10;
1360 break;
1361 case PM_DEFINED_NODE:
1362 type = "defined?";
1363 length = 8;
1364 break;
1365 case PM_FALSE_NODE:
1366 type = "false";
1367 length = 5;
1368 break;
1369 case PM_FLOAT_NODE:
1370 case PM_IMAGINARY_NODE:
1371 case PM_INTEGER_NODE:
1372 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1373 case PM_INTERPOLATED_STRING_NODE:
1374 case PM_RATIONAL_NODE:
1375 case PM_REGULAR_EXPRESSION_NODE:
1376 case PM_SOURCE_ENCODING_NODE:
1377 case PM_SOURCE_FILE_NODE:
1378 case PM_SOURCE_LINE_NODE:
1379 case PM_STRING_NODE:
1380 case PM_SYMBOL_NODE:
1381 type = "a literal";
1382 length = 9;
1383 break;
1384 case PM_NIL_NODE:
1385 type = "nil";
1386 length = 3;
1387 break;
1388 case PM_RANGE_NODE: {
1389 const pm_range_node_t *cast = (const pm_range_node_t *) node;
1390
1391 if (PM_NODE_FLAG_P(cast, PM_RANGE_FLAGS_EXCLUDE_END)) {
1392 type = "...";
1393 length = 3;
1394 } else {
1395 type = "..";
1396 length = 2;
1397 }
1398
1399 break;
1400 }
1401 case PM_SELF_NODE:
1402 type = "self";
1403 length = 4;
1404 break;
1405 case PM_TRUE_NODE:
1406 type = "true";
1407 length = 4;
1408 break;
1409 default:
1410 break;
1411 }
1412
1413 if (type != NULL) {
1414 PM_PARSER_WARN_NODE_FORMAT(parser, node, PM_WARN_VOID_STATEMENT, length, type);
1415 }
1416}
1417
1422static void
1423pm_void_statements_check(pm_parser_t *parser, const pm_statements_node_t *node, bool last_value) {
1424 assert(node->body.size > 0);
1425 const size_t size = node->body.size - (last_value ? 1 : 0);
1426 for (size_t index = 0; index < size; index++) {
1427 pm_void_statement_check(parser, node->body.nodes[index]);
1428 }
1429}
1430
1436typedef enum {
1437 PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL,
1438 PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP,
1439 PM_CONDITIONAL_PREDICATE_TYPE_NOT
1440} pm_conditional_predicate_type_t;
1441
1445static void
1446pm_parser_warn_conditional_predicate_literal(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type, pm_diagnostic_id_t diag_id, const char *prefix) {
1447 switch (type) {
1448 case PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL:
1449 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "condition");
1450 break;
1451 case PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP:
1452 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "flip-flop");
1453 break;
1454 case PM_CONDITIONAL_PREDICATE_TYPE_NOT:
1455 break;
1456 }
1457}
1458
1463static bool
1464pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1465 switch (PM_NODE_TYPE(node)) {
1466 case PM_ARRAY_NODE: {
1467 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1468
1469 const pm_array_node_t *cast = (const pm_array_node_t *) node;
1470 for (size_t index = 0; index < cast->elements.size; index++) {
1471 if (!pm_conditional_predicate_warn_write_literal_p(cast->elements.nodes[index])) return false;
1472 }
1473
1474 return true;
1475 }
1476 case PM_HASH_NODE: {
1477 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1478
1479 const pm_hash_node_t *cast = (const pm_hash_node_t *) node;
1480 for (size_t index = 0; index < cast->elements.size; index++) {
1481 const pm_node_t *element = cast->elements.nodes[index];
1482 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) return false;
1483
1484 const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element;
1485 if (!pm_conditional_predicate_warn_write_literal_p(assoc->key) || !pm_conditional_predicate_warn_write_literal_p(assoc->value)) return false;
1486 }
1487
1488 return true;
1489 }
1490 case PM_FALSE_NODE:
1491 case PM_FLOAT_NODE:
1492 case PM_IMAGINARY_NODE:
1493 case PM_INTEGER_NODE:
1494 case PM_NIL_NODE:
1495 case PM_RATIONAL_NODE:
1496 case PM_REGULAR_EXPRESSION_NODE:
1497 case PM_SOURCE_ENCODING_NODE:
1498 case PM_SOURCE_FILE_NODE:
1499 case PM_SOURCE_LINE_NODE:
1500 case PM_STRING_NODE:
1501 case PM_SYMBOL_NODE:
1502 case PM_TRUE_NODE:
1503 return true;
1504 default:
1505 return false;
1506 }
1507}
1508
1513static inline void
1514pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1515 if (pm_conditional_predicate_warn_write_literal_p(node)) {
1516 pm_parser_warn_node(parser, node, parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1517 }
1518}
1519
1532static void
1533pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type) {
1534 switch (PM_NODE_TYPE(node)) {
1535 case PM_AND_NODE: {
1536 pm_and_node_t *cast = (pm_and_node_t *) node;
1537 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1538 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1539 break;
1540 }
1541 case PM_OR_NODE: {
1542 pm_or_node_t *cast = (pm_or_node_t *) node;
1543 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1544 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1545 break;
1546 }
1547 case PM_PARENTHESES_NODE: {
1549
1550 if ((cast->body != NULL) && PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE)) {
1551 pm_statements_node_t *statements = (pm_statements_node_t *) cast->body;
1552 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1553 }
1554
1555 break;
1556 }
1557 case PM_BEGIN_NODE: {
1558 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1559 if (cast->statements != NULL) {
1560 pm_statements_node_t *statements = cast->statements;
1561 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1562 }
1563 break;
1564 }
1565 case PM_RANGE_NODE: {
1566 pm_range_node_t *cast = (pm_range_node_t *) node;
1567
1568 if (cast->left != NULL) pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1569 if (cast->right != NULL) pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1570
1571 // Here we change the range node into a flip flop node. We can do
1572 // this since the nodes are exactly the same except for the type.
1573 // We're only asserting against the size when we should probably
1574 // assert against the entire layout, but we'll assume tests will
1575 // catch this.
1576 assert(sizeof(pm_range_node_t) == sizeof(pm_flip_flop_node_t));
1577 node->type = PM_FLIP_FLOP_NODE;
1578
1579 break;
1580 }
1581 case PM_REGULAR_EXPRESSION_NODE:
1582 // Here we change the regular expression node into a match last line
1583 // node. We can do this since the nodes are exactly the same except
1584 // for the type.
1586 node->type = PM_MATCH_LAST_LINE_NODE;
1587
1588 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1589 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "regex ");
1590 }
1591
1592 break;
1593 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1594 // Here we change the interpolated regular expression node into an
1595 // interpolated match last line node. We can do this since the nodes
1596 // are exactly the same except for the type.
1598 node->type = PM_INTERPOLATED_MATCH_LAST_LINE_NODE;
1599
1600 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1601 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "regex ");
1602 }
1603
1604 break;
1605 case PM_INTEGER_NODE:
1606 if (type == PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP) {
1607 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1608 pm_parser_warn_node(parser, node, PM_WARN_INTEGER_IN_FLIP_FLOP);
1609 }
1610 } else {
1611 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1612 }
1613 break;
1614 case PM_STRING_NODE:
1615 case PM_SOURCE_FILE_NODE:
1616 case PM_INTERPOLATED_STRING_NODE:
1617 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "string ");
1618 break;
1619 case PM_SYMBOL_NODE:
1620 case PM_INTERPOLATED_SYMBOL_NODE:
1621 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "symbol ");
1622 break;
1623 case PM_SOURCE_LINE_NODE:
1624 case PM_SOURCE_ENCODING_NODE:
1625 case PM_FLOAT_NODE:
1626 case PM_RATIONAL_NODE:
1627 case PM_IMAGINARY_NODE:
1628 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1629 break;
1630 case PM_CLASS_VARIABLE_WRITE_NODE:
1631 pm_conditional_predicate_warn_write_literal(parser, ((pm_class_variable_write_node_t *) node)->value);
1632 break;
1633 case PM_CONSTANT_WRITE_NODE:
1634 pm_conditional_predicate_warn_write_literal(parser, ((pm_constant_write_node_t *) node)->value);
1635 break;
1636 case PM_GLOBAL_VARIABLE_WRITE_NODE:
1637 pm_conditional_predicate_warn_write_literal(parser, ((pm_global_variable_write_node_t *) node)->value);
1638 break;
1639 case PM_INSTANCE_VARIABLE_WRITE_NODE:
1640 pm_conditional_predicate_warn_write_literal(parser, ((pm_instance_variable_write_node_t *) node)->value);
1641 break;
1642 case PM_LOCAL_VARIABLE_WRITE_NODE:
1643 pm_conditional_predicate_warn_write_literal(parser, ((pm_local_variable_write_node_t *) node)->value);
1644 break;
1645 case PM_MULTI_WRITE_NODE:
1646 pm_conditional_predicate_warn_write_literal(parser, ((pm_multi_write_node_t *) node)->value);
1647 break;
1648 default:
1649 break;
1650 }
1651}
1652
1675
1679static inline const pm_location_t *
1680pm_arguments_end(pm_arguments_t *arguments) {
1681 if (arguments->block != NULL) {
1682 uint32_t end = PM_NODE_END(arguments->block);
1683
1684 if (arguments->closing_loc.length > 0) {
1685 uint32_t arguments_end = PM_LOCATION_END(&arguments->closing_loc);
1686 if (arguments_end > end) {
1687 return &arguments->closing_loc;
1688 }
1689 }
1690 return &arguments->block->location;
1691 }
1692 if (arguments->closing_loc.length > 0) {
1693 return &arguments->closing_loc;
1694 }
1695 if (arguments->arguments != NULL) {
1696 return &arguments->arguments->base.location;
1697 }
1698 if (arguments->opening_loc.length > 0) {
1699 return &arguments->opening_loc;
1700 }
1701 return NULL;
1702}
1703
1708static void
1709pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
1710 // First, check that we have arguments and that we don't have a closing
1711 // location for them.
1712 if (arguments->arguments == NULL || arguments->closing_loc.length > 0) {
1713 return;
1714 }
1715
1716 // Next, check that we don't have a single parentheses argument. This would
1717 // look like:
1718 //
1719 // foo (1) {}
1720 //
1721 // In this case, it's actually okay for the block to be attached to the
1722 // call, even though it looks like it's attached to the argument.
1723 if (arguments->arguments->arguments.size == 1 && PM_NODE_TYPE_P(arguments->arguments->arguments.nodes[0], PM_PARENTHESES_NODE)) {
1724 return;
1725 }
1726
1727 // If we didn't hit a case before this check, then at this point we need to
1728 // add a syntax error.
1729 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
1730}
1731
1732/******************************************************************************/
1733/* Basic character checks */
1734/******************************************************************************/
1735
1742static inline size_t
1743char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1744 if (n <= 0) return 0;
1745
1746 if (parser->encoding_changed) {
1747 size_t width;
1748
1749 if ((width = parser->encoding->alpha_char(b, n)) != 0) {
1750 return width;
1751 } else if (*b == '_') {
1752 return 1;
1753 } else if (*b >= 0x80) {
1754 return parser->encoding->char_width(b, n);
1755 } else {
1756 return 0;
1757 }
1758 } else if (*b < 0x80) {
1759 return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
1760 } else {
1761 return pm_encoding_utf_8_char_width(b, n);
1762 }
1763}
1764
1769static inline size_t
1770char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
1771 if (n <= 0) {
1772 return 0;
1773 } else if (*b < 0x80) {
1774 return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
1775 } else {
1776 return pm_encoding_utf_8_char_width(b, n);
1777 }
1778}
1779
1785static inline size_t
1786char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1787 if (n <= 0) {
1788 return 0;
1789 } else if (parser->encoding_changed) {
1790 size_t width;
1791
1792 if ((width = parser->encoding->alnum_char(b, n)) != 0) {
1793 return width;
1794 } else if (*b == '_') {
1795 return 1;
1796 } else if (*b >= 0x80) {
1797 return parser->encoding->char_width(b, n);
1798 } else {
1799 return 0;
1800 }
1801 } else {
1802 return char_is_identifier_utf8(b, n);
1803 }
1804}
1805
1806// Here we're defining a perfect hash for the characters that are allowed in
1807// global names. This is used to quickly check the next character after a $ to
1808// see if it's a valid character for a global name.
1809#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
1810#define PUNCT(idx) ( \
1811 BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
1812 BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
1813 BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
1814 BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
1815 BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
1816 BIT('0', idx))
1817
1818const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
1819
1820#undef BIT
1821#undef PUNCT
1822
1823static inline bool
1824char_is_global_name_punctuation(const uint8_t b) {
1825 const unsigned int i = (const unsigned int) b;
1826 if (i <= 0x20 || 0x7e < i) return false;
1827
1828 return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
1829}
1830
1831static inline bool
1832token_is_setter_name(pm_token_t *token) {
1833 return (
1834 (token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
1835 ((token->type == PM_TOKEN_IDENTIFIER) &&
1836 (token->end - token->start >= 2) &&
1837 (token->end[-1] == '='))
1838 );
1839}
1840
1844static bool
1845pm_local_is_keyword(const char *source, size_t length) {
1846#define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
1847
1848 switch (length) {
1849 case 2:
1850 switch (source[0]) {
1851 case 'd': KEYWORD("do"); return false;
1852 case 'i': KEYWORD("if"); KEYWORD("in"); return false;
1853 case 'o': KEYWORD("or"); return false;
1854 default: return false;
1855 }
1856 case 3:
1857 switch (source[0]) {
1858 case 'a': KEYWORD("and"); return false;
1859 case 'd': KEYWORD("def"); return false;
1860 case 'e': KEYWORD("end"); return false;
1861 case 'f': KEYWORD("for"); return false;
1862 case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
1863 default: return false;
1864 }
1865 case 4:
1866 switch (source[0]) {
1867 case 'c': KEYWORD("case"); return false;
1868 case 'e': KEYWORD("else"); return false;
1869 case 'n': KEYWORD("next"); return false;
1870 case 'r': KEYWORD("redo"); return false;
1871 case 's': KEYWORD("self"); return false;
1872 case 't': KEYWORD("then"); KEYWORD("true"); return false;
1873 case 'w': KEYWORD("when"); return false;
1874 default: return false;
1875 }
1876 case 5:
1877 switch (source[0]) {
1878 case 'a': KEYWORD("alias"); return false;
1879 case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
1880 case 'c': KEYWORD("class"); return false;
1881 case 'e': KEYWORD("elsif"); return false;
1882 case 'f': KEYWORD("false"); return false;
1883 case 'r': KEYWORD("retry"); return false;
1884 case 's': KEYWORD("super"); return false;
1885 case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
1886 case 'w': KEYWORD("while"); return false;
1887 case 'y': KEYWORD("yield"); return false;
1888 default: return false;
1889 }
1890 case 6:
1891 switch (source[0]) {
1892 case 'e': KEYWORD("ensure"); return false;
1893 case 'm': KEYWORD("module"); return false;
1894 case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
1895 case 'u': KEYWORD("unless"); return false;
1896 default: return false;
1897 }
1898 case 8:
1899 KEYWORD("__LINE__");
1900 KEYWORD("__FILE__");
1901 return false;
1902 case 12:
1903 KEYWORD("__ENCODING__");
1904 return false;
1905 default:
1906 return false;
1907 }
1908
1909#undef KEYWORD
1910}
1911
1912/******************************************************************************/
1913/* Node flag handling functions */
1914/******************************************************************************/
1915
1919static inline void
1920pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
1921 node->flags |= flag;
1922}
1923
1927static inline void
1928pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
1929 node->flags &= (pm_node_flags_t) ~flag;
1930}
1931
1935static inline void
1936pm_node_flag_set_repeated_parameter(pm_node_t *node) {
1937 assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
1938 PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
1939 PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
1940 PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
1941 PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
1942 PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
1943 PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
1944 PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
1945
1946 pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
1947}
1948
1949/******************************************************************************/
1950/* Node creation functions */
1951/******************************************************************************/
1952
1958#define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
1959
1963static inline pm_node_flags_t
1964pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
1965 pm_node_flags_t flags = 0;
1966
1967 if (closing->type == PM_TOKEN_REGEXP_END) {
1968 pm_buffer_t unknown_flags = { 0 };
1969
1970 for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
1971 switch (*flag) {
1972 case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
1973 case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
1974 case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
1975 case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
1976
1977 case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
1978 case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
1979 case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
1980 case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
1981
1982 default: pm_buffer_append_byte(&unknown_flags, *flag);
1983 }
1984 }
1985
1986 size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
1987 if (unknown_flags_length != 0) {
1988 const char *word = unknown_flags_length >= 2 ? "options" : "option";
1989 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
1990 }
1991 pm_buffer_free(&unknown_flags);
1992 }
1993
1994 return flags;
1995}
1996
1997#undef PM_REGULAR_EXPRESSION_ENCODING_MASK
1998
1999static pm_statements_node_t *
2000pm_statements_node_create(pm_parser_t *parser);
2001
2002static void
2003pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline);
2004
2005static size_t
2006pm_statements_node_body_length(pm_statements_node_t *node);
2007
2012static inline void
2013pm_integer_arena_move(pm_arena_t *arena, pm_integer_t *integer) {
2014 if (integer->values != NULL) {
2015 size_t byte_size = integer->length * sizeof(uint32_t);
2016 uint32_t *old_values = integer->values;
2017 integer->values = (uint32_t *) pm_arena_memdup(arena, old_values, byte_size, PRISM_ALIGNOF(uint32_t));
2018 xfree(old_values);
2019 }
2020}
2021
2025static pm_missing_node_t *
2026pm_missing_node_create(pm_parser_t *parser, uint32_t start, uint32_t length) {
2027 return pm_missing_node_new(
2028 parser->arena,
2029 ++parser->node_id,
2030 0,
2031 ((pm_location_t) { .start = start, .length = length })
2032 );
2033}
2034
2038static pm_alias_global_variable_node_t *
2039pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
2040 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
2041
2042 return pm_alias_global_variable_node_new(
2043 parser->arena,
2044 ++parser->node_id,
2045 0,
2046 PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, old_name),
2047 new_name,
2048 old_name,
2049 TOK2LOC(parser, keyword)
2050 );
2051}
2052
2056static pm_alias_method_node_t *
2057pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
2058 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
2059
2060 return pm_alias_method_node_new(
2061 parser->arena,
2062 ++parser->node_id,
2063 0,
2064 PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, old_name),
2065 new_name,
2066 old_name,
2067 TOK2LOC(parser, keyword)
2068 );
2069}
2070
2074static pm_alternation_pattern_node_t *
2075pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
2076 return pm_alternation_pattern_node_new(
2077 parser->arena,
2078 ++parser->node_id,
2079 0,
2080 PM_LOCATION_INIT_NODES(left, right),
2081 left,
2082 right,
2083 TOK2LOC(parser, operator)
2084 );
2085}
2086
2090static pm_and_node_t *
2091pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
2092 pm_assert_value_expression(parser, left);
2093
2094 return pm_and_node_new(
2095 parser->arena,
2096 ++parser->node_id,
2097 0,
2098 PM_LOCATION_INIT_NODES(left, right),
2099 left,
2100 right,
2101 TOK2LOC(parser, operator)
2102 );
2103}
2104
2108static pm_arguments_node_t *
2109pm_arguments_node_create(pm_parser_t *parser) {
2110 return pm_arguments_node_new(
2111 parser->arena,
2112 ++parser->node_id,
2113 0,
2114 PM_LOCATION_INIT_UNSET,
2115 ((pm_node_list_t) { 0 })
2116 );
2117}
2118
2122static size_t
2123pm_arguments_node_size(pm_arguments_node_t *node) {
2124 return node->arguments.size;
2125}
2126
2130static void
2131pm_arguments_node_arguments_append(pm_arena_t *arena, pm_arguments_node_t *node, pm_node_t *argument) {
2132 if (pm_arguments_node_size(node) == 0) {
2133 PM_NODE_START_SET_NODE(node, argument);
2134 }
2135
2136 if (PM_NODE_END(node) < PM_NODE_END(argument)) {
2137 PM_NODE_LENGTH_SET_NODE(node, argument);
2138 }
2139
2140 pm_node_list_append(arena, &node->arguments, argument);
2141
2142 if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
2143 if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
2144 pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
2145 } else {
2146 pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
2147 }
2148 }
2149}
2150
2154static pm_array_node_t *
2155pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
2156 if (opening == NULL) {
2157 return pm_array_node_new(
2158 parser->arena,
2159 ++parser->node_id,
2160 PM_NODE_FLAG_STATIC_LITERAL,
2161 PM_LOCATION_INIT_UNSET,
2162 ((pm_node_list_t) { 0 }),
2163 ((pm_location_t) { 0 }),
2164 ((pm_location_t) { 0 })
2165 );
2166 } else {
2167 return pm_array_node_new(
2168 parser->arena,
2169 ++parser->node_id,
2170 PM_NODE_FLAG_STATIC_LITERAL,
2171 PM_LOCATION_INIT_TOKEN(parser, opening),
2172 ((pm_node_list_t) { 0 }),
2173 TOK2LOC(parser, opening),
2174 TOK2LOC(parser, opening)
2175 );
2176 }
2177}
2178
2182static inline void
2183pm_array_node_elements_append(pm_arena_t *arena, pm_array_node_t *node, pm_node_t *element) {
2184 if (!node->elements.size && !node->opening_loc.length) {
2185 PM_NODE_START_SET_NODE(node, element);
2186 }
2187
2188 pm_node_list_append(arena, &node->elements, element);
2189 PM_NODE_LENGTH_SET_NODE(node, element);
2190
2191 // If the element is not a static literal, then the array is not a static
2192 // literal. Turn that flag off.
2193 if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
2194 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
2195 }
2196
2197 if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
2198 pm_node_flag_set(UP(node), PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
2199 }
2200}
2201
2205static void
2206pm_array_node_close_set(const pm_parser_t *parser, pm_array_node_t *node, const pm_token_t *closing) {
2207 assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == 0);
2208 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
2209 node->closing_loc = TOK2LOC(parser, closing);
2210}
2211
2216static pm_array_pattern_node_t *
2217pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
2218 pm_array_pattern_node_t *node = pm_array_pattern_node_new(
2219 parser->arena,
2220 ++parser->node_id,
2221 0,
2222 PM_LOCATION_INIT_NODES(nodes->nodes[0], nodes->nodes[nodes->size - 1]),
2223 NULL,
2224 ((pm_node_list_t) { 0 }),
2225 NULL,
2226 ((pm_node_list_t) { 0 }),
2227 ((pm_location_t) { 0 }),
2228 ((pm_location_t) { 0 })
2229 );
2230
2231 // For now we're going to just copy over each pointer manually. This could be
2232 // much more efficient, as we could instead resize the node list.
2233 bool found_rest = false;
2234 pm_node_t *child;
2235
2236 PM_NODE_LIST_FOREACH(nodes, index, child) {
2237 if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
2238 node->rest = child;
2239 found_rest = true;
2240 } else if (found_rest) {
2241 pm_node_list_append(parser->arena, &node->posts, child);
2242 } else {
2243 pm_node_list_append(parser->arena, &node->requireds, child);
2244 }
2245 }
2246
2247 return node;
2248}
2249
2253static pm_array_pattern_node_t *
2254pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
2255 return pm_array_pattern_node_new(
2256 parser->arena,
2257 ++parser->node_id,
2258 0,
2259 PM_LOCATION_INIT_NODE(rest),
2260 NULL,
2261 ((pm_node_list_t) { 0 }),
2262 rest,
2263 ((pm_node_list_t) { 0 }),
2264 ((pm_location_t) { 0 }),
2265 ((pm_location_t) { 0 })
2266 );
2267}
2268
2273static pm_array_pattern_node_t *
2274pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
2275 return pm_array_pattern_node_new(
2276 parser->arena,
2277 ++parser->node_id,
2278 0,
2279 PM_LOCATION_INIT_NODE_TOKEN(parser, constant, closing),
2280 constant,
2281 ((pm_node_list_t) { 0 }),
2282 NULL,
2283 ((pm_node_list_t) { 0 }),
2284 TOK2LOC(parser, opening),
2285 TOK2LOC(parser, closing)
2286 );
2287}
2288
2293static pm_array_pattern_node_t *
2294pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
2295 return pm_array_pattern_node_new(
2296 parser->arena,
2297 ++parser->node_id,
2298 0,
2299 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
2300 NULL,
2301 ((pm_node_list_t) { 0 }),
2302 NULL,
2303 ((pm_node_list_t) { 0 }),
2304 TOK2LOC(parser, opening),
2305 TOK2LOC(parser, closing)
2306 );
2307}
2308
2309static inline void
2310pm_array_pattern_node_requireds_append(pm_arena_t *arena, pm_array_pattern_node_t *node, pm_node_t *inner) {
2311 pm_node_list_append(arena, &node->requireds, inner);
2312}
2313
2317static pm_assoc_node_t *
2318pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
2319 uint32_t end;
2320
2321 if (value != NULL && PM_NODE_END(value) > PM_NODE_END(key)) {
2322 end = PM_NODE_END(value);
2323 } else if (operator != NULL) {
2324 end = PM_TOKEN_END(parser, operator);
2325 } else {
2326 end = PM_NODE_END(key);
2327 }
2328
2329 // Hash string keys will be frozen, so we can mark them as frozen here so
2330 // that the compiler picks them up and also when we check for static literal
2331 // on the keys it gets factored in.
2332 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
2333 key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
2334 }
2335
2336 // If the key and value of this assoc node are both static literals, then
2337 // we can mark this node as a static literal.
2338 pm_node_flags_t flags = 0;
2339 if (
2340 !PM_NODE_TYPE_P(key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(key, PM_HASH_NODE) && !PM_NODE_TYPE_P(key, PM_RANGE_NODE) &&
2341 value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)
2342 ) {
2343 flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
2344 }
2345
2346 return pm_assoc_node_new(
2347 parser->arena,
2348 ++parser->node_id,
2349 flags,
2350 ((pm_location_t) { .start = PM_NODE_START(key), .length = U32(end - PM_NODE_START(key)) }),
2351 key,
2352 value,
2353 NTOK2LOC(parser, operator)
2354 );
2355}
2356
2360static pm_assoc_splat_node_t *
2361pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
2362 assert(operator->type == PM_TOKEN_USTAR_STAR);
2363
2364 return pm_assoc_splat_node_new(
2365 parser->arena,
2366 ++parser->node_id,
2367 0,
2368 (value == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, value),
2369 value,
2370 TOK2LOC(parser, operator)
2371 );
2372}
2373
2377static pm_back_reference_read_node_t *
2378pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
2379 assert(name->type == PM_TOKEN_BACK_REFERENCE);
2380
2381 return pm_back_reference_read_node_new(
2382 parser->arena,
2383 ++parser->node_id,
2384 0,
2385 PM_LOCATION_INIT_TOKEN(parser, name),
2386 pm_parser_constant_id_token(parser, name)
2387 );
2388}
2389
2393static pm_begin_node_t *
2394pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
2395 uint32_t start = begin_keyword == NULL ? 0 : PM_TOKEN_START(parser, begin_keyword);
2396 uint32_t end = statements == NULL ? (begin_keyword == NULL ? 0 : PM_TOKEN_END(parser, begin_keyword)) : PM_NODE_END(statements);
2397
2398 return pm_begin_node_new(
2399 parser->arena,
2400 ++parser->node_id,
2401 0,
2402 ((pm_location_t) { .start = start, .length = U32(end - start) }),
2403 NTOK2LOC(parser, begin_keyword),
2404 statements,
2405 NULL,
2406 NULL,
2407 NULL,
2408 ((pm_location_t) { 0 })
2409 );
2410}
2411
2415static void
2416pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
2417 if (node->begin_keyword_loc.length == 0) {
2418 PM_NODE_START_SET_NODE(node, rescue_clause);
2419 }
2420 PM_NODE_LENGTH_SET_NODE(node, rescue_clause);
2421 node->rescue_clause = rescue_clause;
2422}
2423
2427static void
2428pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
2429 if ((node->begin_keyword_loc.length == 0) && PM_NODE_START(node) == 0) {
2430 PM_NODE_START_SET_NODE(node, else_clause);
2431 }
2432 PM_NODE_LENGTH_SET_NODE(node, else_clause);
2433 node->else_clause = else_clause;
2434}
2435
2439static void
2440pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
2441 if ((node->begin_keyword_loc.length == 0) && PM_NODE_START(node) == 0) {
2442 PM_NODE_START_SET_NODE(node, ensure_clause);
2443 }
2444 PM_NODE_LENGTH_SET_NODE(node, ensure_clause);
2445 node->ensure_clause = ensure_clause;
2446}
2447
2451static void
2452pm_begin_node_end_keyword_set(const pm_parser_t *parser, pm_begin_node_t *node, const pm_token_t *end_keyword) {
2453 assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == 0);
2454 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
2455 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
2456}
2457
2461static pm_block_argument_node_t *
2462pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
2463 assert(operator->type == PM_TOKEN_UAMPERSAND);
2464
2465 return pm_block_argument_node_new(
2466 parser->arena,
2467 ++parser->node_id,
2468 0,
2469 (expression == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, expression),
2470 expression,
2471 TOK2LOC(parser, operator)
2472 );
2473}
2474
2478static pm_block_node_t *
2479pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
2480 return pm_block_node_new(
2481 parser->arena,
2482 ++parser->node_id,
2483 0,
2484 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
2485 *locals,
2486 parameters,
2487 body,
2488 TOK2LOC(parser, opening),
2489 TOK2LOC(parser, closing)
2490 );
2491}
2492
2496static pm_block_parameter_node_t *
2497pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
2498 assert(operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
2499
2500 return pm_block_parameter_node_new(
2501 parser->arena,
2502 ++parser->node_id,
2503 0,
2504 (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
2505 name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
2506 NTOK2LOC(parser, name),
2507 TOK2LOC(parser, operator)
2508 );
2509}
2510
2514static pm_block_parameters_node_t *
2515pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
2516 uint32_t start;
2517 if (opening != NULL) {
2518 start = PM_TOKEN_START(parser, opening);
2519 } else if (parameters != NULL) {
2520 start = PM_NODE_START(parameters);
2521 } else {
2522 start = 0;
2523 }
2524
2525 uint32_t end;
2526 if (parameters != NULL) {
2527 end = PM_NODE_END(parameters);
2528 } else if (opening != NULL) {
2529 end = PM_TOKEN_END(parser, opening);
2530 } else {
2531 end = 0;
2532 }
2533
2534 return pm_block_parameters_node_new(
2535 parser->arena,
2536 ++parser->node_id,
2537 0,
2538 ((pm_location_t) { .start = start, .length = U32(end - start) }),
2539 parameters,
2540 ((pm_node_list_t) { 0 }),
2541 NTOK2LOC(parser, opening),
2542 ((pm_location_t) { 0 })
2543 );
2544}
2545
2549static void
2550pm_block_parameters_node_closing_set(const pm_parser_t *parser, pm_block_parameters_node_t *node, const pm_token_t *closing) {
2551 assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == 0);
2552 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
2553 node->closing_loc = TOK2LOC(parser, closing);
2554}
2555
2559static pm_block_local_variable_node_t *
2560pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
2561 return pm_block_local_variable_node_new(
2562 parser->arena,
2563 ++parser->node_id,
2564 0,
2565 PM_LOCATION_INIT_TOKEN(parser, name),
2566 pm_parser_constant_id_token(parser, name)
2567 );
2568}
2569
2573static void
2574pm_block_parameters_node_append_local(pm_arena_t *arena, pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
2575 pm_node_list_append(arena, &node->locals, UP(local));
2576
2577 if (PM_NODE_LENGTH(node) == 0) {
2578 PM_NODE_START_SET_NODE(node, local);
2579 }
2580
2581 PM_NODE_LENGTH_SET_NODE(node, local);
2582}
2583
2587static pm_break_node_t *
2588pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
2589 assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
2590
2591 return pm_break_node_new(
2592 parser->arena,
2593 ++parser->node_id,
2594 0,
2595 (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
2596 arguments,
2597 TOK2LOC(parser, keyword)
2598 );
2599}
2600
2601// There are certain flags that we want to use internally but don't want to
2602// expose because they are not relevant beyond parsing. Therefore we'll define
2603// them here and not define them in config.yml/a header file.
2604static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = (1 << 2);
2605
2606static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = ((PM_CALL_NODE_FLAGS_LAST - 1) << 1);
2607static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = ((PM_CALL_NODE_FLAGS_LAST - 1) << 2);
2608static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = ((PM_CALL_NODE_FLAGS_LAST - 1) << 3);
2609
2615static pm_call_node_t *
2616pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
2617 return pm_call_node_new(
2618 parser->arena,
2619 ++parser->node_id,
2620 flags,
2621 PM_LOCATION_INIT_UNSET,
2622 NULL,
2623 ((pm_location_t) { 0 }),
2624 0,
2625 ((pm_location_t) { 0 }),
2626 ((pm_location_t) { 0 }),
2627 NULL,
2628 ((pm_location_t) { 0 }),
2629 ((pm_location_t) { 0 }),
2630 NULL
2631 );
2632}
2633
2638static inline pm_node_flags_t
2639pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
2640 return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
2641}
2642
2647static pm_call_node_t *
2648pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
2649 pm_assert_value_expression(parser, receiver);
2650
2651 pm_node_flags_t flags = pm_call_node_ignore_visibility_flag(receiver);
2652 if (arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_ARGUMENT_NODE)) {
2653 flags |= PM_CALL_NODE_FLAGS_INDEX;
2654 }
2655
2656 pm_call_node_t *node = pm_call_node_create(parser, flags);
2657
2658 PM_NODE_START_SET_NODE(node, receiver);
2659
2660 const pm_location_t *end = pm_arguments_end(arguments);
2661 assert(end != NULL && "unreachable");
2662 PM_NODE_LENGTH_SET_LOCATION(node, end);
2663
2664 node->receiver = receiver;
2665 node->message_loc.start = arguments->opening_loc.start;
2666 node->message_loc.length = (arguments->closing_loc.start + arguments->closing_loc.length) - arguments->opening_loc.start;
2667
2668 node->opening_loc = arguments->opening_loc;
2669 node->arguments = arguments->arguments;
2670 node->closing_loc = arguments->closing_loc;
2671 node->block = arguments->block;
2672
2673 node->name = pm_parser_constant_id_constant(parser, "[]", 2);
2674 return node;
2675}
2676
2680static pm_call_node_t *
2681pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument, pm_node_flags_t flags) {
2682 pm_assert_value_expression(parser, receiver);
2683 pm_assert_value_expression(parser, argument);
2684
2685 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
2686
2687 PM_NODE_START_SET_NODE(node, PM_NODE_START(receiver) < PM_NODE_START(argument) ? receiver : argument);
2688 PM_NODE_LENGTH_SET_NODE(node, PM_NODE_END(receiver) > PM_NODE_END(argument) ? receiver : argument);
2689
2690 node->receiver = receiver;
2691 node->message_loc = TOK2LOC(parser, operator);
2692
2693 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
2694 pm_arguments_node_arguments_append(parser->arena, arguments, argument);
2695 node->arguments = arguments;
2696
2697 node->name = pm_parser_constant_id_token(parser, operator);
2698 return node;
2699}
2700
2701static const uint8_t * parse_operator_symbol_name(const pm_token_t *);
2702
2706static pm_call_node_t *
2707pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
2708 pm_assert_value_expression(parser, receiver);
2709
2710 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2711
2712 PM_NODE_START_SET_NODE(node, receiver);
2713 const pm_location_t *end = pm_arguments_end(arguments);
2714 if (end == NULL) {
2715 PM_NODE_LENGTH_SET_TOKEN(parser, node, message);
2716 } else {
2717 PM_NODE_LENGTH_SET_LOCATION(node, end);
2718 }
2719
2720 node->receiver = receiver;
2721 node->call_operator_loc = TOK2LOC(parser, operator);
2722 node->message_loc = TOK2LOC(parser, message);
2723 node->opening_loc = arguments->opening_loc;
2724 node->arguments = arguments->arguments;
2725 node->closing_loc = arguments->closing_loc;
2726 node->block = arguments->block;
2727
2728 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2729 pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2730 }
2731
2736 node->name = pm_parser_constant_id_raw(parser, message->start, parse_operator_symbol_name(message));
2737 return node;
2738}
2739
2743static pm_call_node_t *
2744pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
2745 pm_call_node_t *node = pm_call_node_create(parser, 0);
2746 node->base.location = (pm_location_t) { .start = 0, .length = U32(parser->end - parser->start) };
2747
2748 node->receiver = receiver;
2749 node->arguments = arguments;
2750
2751 node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
2752 return node;
2753}
2754
2759static pm_call_node_t *
2760pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
2761 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2762
2763 PM_NODE_START_SET_TOKEN(parser, node, message);
2764 const pm_location_t *end = pm_arguments_end(arguments);
2765 assert(end != NULL && "unreachable");
2766 PM_NODE_LENGTH_SET_LOCATION(node, end);
2767
2768 node->message_loc = TOK2LOC(parser, message);
2769 node->opening_loc = arguments->opening_loc;
2770 node->arguments = arguments->arguments;
2771 node->closing_loc = arguments->closing_loc;
2772 node->block = arguments->block;
2773
2774 node->name = pm_parser_constant_id_token(parser, message);
2775 return node;
2776}
2777
2782static pm_call_node_t *
2783pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
2784 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2785
2786 node->base.location = (pm_location_t) { 0 };
2787 node->arguments = arguments;
2788
2789 node->name = name;
2790 return node;
2791}
2792
2796static pm_call_node_t *
2797pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
2798 pm_assert_value_expression(parser, receiver);
2799 if (receiver != NULL) pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
2800
2801 pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
2802
2803 PM_NODE_START_SET_TOKEN(parser, node, message);
2804 if (arguments->closing_loc.length > 0) {
2805 PM_NODE_LENGTH_SET_LOCATION(node, &arguments->closing_loc);
2806 } else {
2807 assert(receiver != NULL);
2808 PM_NODE_LENGTH_SET_NODE(node, receiver);
2809 }
2810
2811 node->receiver = receiver;
2812 node->message_loc = TOK2LOC(parser, message);
2813 node->opening_loc = arguments->opening_loc;
2814 node->arguments = arguments->arguments;
2815 node->closing_loc = arguments->closing_loc;
2816
2817 node->name = pm_parser_constant_id_constant(parser, "!", 1);
2818 return node;
2819}
2820
2824static pm_call_node_t *
2825pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
2826 pm_assert_value_expression(parser, receiver);
2827
2828 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2829
2830 PM_NODE_START_SET_NODE(node, receiver);
2831 const pm_location_t *end = pm_arguments_end(arguments);
2832 assert(end != NULL && "unreachable");
2833 PM_NODE_LENGTH_SET_LOCATION(node, end);
2834
2835 node->receiver = receiver;
2836 node->call_operator_loc = TOK2LOC(parser, operator);
2837 node->opening_loc = arguments->opening_loc;
2838 node->arguments = arguments->arguments;
2839 node->closing_loc = arguments->closing_loc;
2840 node->block = arguments->block;
2841
2842 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2843 pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2844 }
2845
2846 node->name = pm_parser_constant_id_constant(parser, "call", 4);
2847 return node;
2848}
2849
2853static pm_call_node_t *
2854pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
2855 pm_assert_value_expression(parser, receiver);
2856
2857 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2858
2859 PM_NODE_START_SET_TOKEN(parser, node, operator);
2860 PM_NODE_LENGTH_SET_NODE(node, receiver);
2861
2862 node->receiver = receiver;
2863 node->message_loc = TOK2LOC(parser, operator);
2864
2865 node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
2866 return node;
2867}
2868
2873static pm_call_node_t *
2874pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
2875 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2876
2877 node->base.location = TOK2LOC(parser, message);
2878 node->message_loc = TOK2LOC(parser, message);
2879
2880 node->name = pm_parser_constant_id_token(parser, message);
2881 return node;
2882}
2883
2888static inline bool
2889pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
2890 return (
2891 (node->message_loc.length > 0) &&
2892 (parser->start[node->message_loc.start + node->message_loc.length - 1] != '!') &&
2893 (parser->start[node->message_loc.start + node->message_loc.length - 1] != '?') &&
2894 char_is_identifier_start(parser, parser->start + node->message_loc.start, (ptrdiff_t) node->message_loc.length) &&
2895 (node->opening_loc.length == 0) &&
2896 (node->arguments == NULL) &&
2897 (node->block == NULL)
2898 );
2899}
2900
2904static void
2905pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
2906 pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
2907
2908 if (write_constant->length > 0) {
2909 size_t length = write_constant->length - 1;
2910
2911 void *memory = xmalloc(length);
2912 memcpy(memory, write_constant->start, length);
2913
2914 *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
2915 } else {
2916 // We can get here if the message was missing because of a syntax error.
2917 *read_name = pm_parser_constant_id_constant(parser, "", 0);
2918 }
2919}
2920
2924static pm_call_and_write_node_t *
2925pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2926 assert(target->block == NULL);
2927 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2928
2929 pm_call_and_write_node_t *node = pm_call_and_write_node_new(
2930 parser->arena,
2931 ++parser->node_id,
2932 FL(target),
2933 PM_LOCATION_INIT_NODES(target, value),
2934 target->receiver,
2935 target->call_operator_loc,
2936 target->message_loc,
2937 0,
2938 target->name,
2939 TOK2LOC(parser, operator),
2940 value
2941 );
2942
2943 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
2944
2945 // The target is no longer necessary because we've reused its children.
2946 // It is arena-allocated so no explicit free is needed.
2947
2948 return node;
2949}
2950
2955static void
2956pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
2957 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
2958 if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
2959 pm_node_t *node;
2960 PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
2961 if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
2962 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
2963 break;
2964 }
2965 }
2966 }
2967
2968 if (block != NULL) {
2969 pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
2970 }
2971 }
2972}
2973
2977static pm_index_and_write_node_t *
2978pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2979 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2980
2981 pm_index_arguments_check(parser, target->arguments, target->block);
2982
2983 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
2984
2985 pm_index_and_write_node_t *node = pm_index_and_write_node_new(
2986 parser->arena,
2987 ++parser->node_id,
2988 FL(target),
2989 PM_LOCATION_INIT_NODES(target, value),
2990 target->receiver,
2991 target->call_operator_loc,
2992 target->opening_loc,
2993 target->arguments,
2994 target->closing_loc,
2995 (pm_block_argument_node_t *) target->block,
2996 TOK2LOC(parser, operator),
2997 value
2998 );
2999
3000 // The target is no longer necessary because we've reused its children.
3001 // It is arena-allocated so no explicit free is needed.
3002
3003 return node;
3004}
3005
3009static pm_call_operator_write_node_t *
3010pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3011 assert(target->block == NULL);
3012
3013 pm_call_operator_write_node_t *node = pm_call_operator_write_node_new(
3014 parser->arena,
3015 ++parser->node_id,
3016 FL(target),
3017 PM_LOCATION_INIT_NODES(target, value),
3018 target->receiver,
3019 target->call_operator_loc,
3020 target->message_loc,
3021 0,
3022 target->name,
3023 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
3024 TOK2LOC(parser, operator),
3025 value
3026 );
3027
3028 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3029
3030 // The target is no longer necessary because we've reused its children.
3031 // It is arena-allocated so no explicit free is needed.
3032
3033 return node;
3034}
3035
3039static pm_index_operator_write_node_t *
3040pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3041 pm_index_arguments_check(parser, target->arguments, target->block);
3042
3043 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3044
3045 pm_index_operator_write_node_t *node = pm_index_operator_write_node_new(
3046 parser->arena,
3047 ++parser->node_id,
3048 FL(target),
3049 PM_LOCATION_INIT_NODES(target, value),
3050 target->receiver,
3051 target->call_operator_loc,
3052 target->opening_loc,
3053 target->arguments,
3054 target->closing_loc,
3055 (pm_block_argument_node_t *) target->block,
3056 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
3057 TOK2LOC(parser, operator),
3058 value
3059 );
3060
3061 // The target is no longer necessary because we've reused its children.
3062 // It is arena-allocated so no explicit free is needed.
3063
3064 return node;
3065}
3066
3070static pm_call_or_write_node_t *
3071pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3072 assert(target->block == NULL);
3073 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3074
3075 pm_call_or_write_node_t *node = pm_call_or_write_node_new(
3076 parser->arena,
3077 ++parser->node_id,
3078 FL(target),
3079 PM_LOCATION_INIT_NODES(target, value),
3080 target->receiver,
3081 target->call_operator_loc,
3082 target->message_loc,
3083 0,
3084 target->name,
3085 TOK2LOC(parser, operator),
3086 value
3087 );
3088
3089 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3090
3091 // The target is no longer necessary because we've reused its children.
3092 // It is arena-allocated so no explicit free is needed.
3093
3094 return node;
3095}
3096
3100static pm_index_or_write_node_t *
3101pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3102 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3103
3104 pm_index_arguments_check(parser, target->arguments, target->block);
3105
3106 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3107
3108 pm_index_or_write_node_t *node = pm_index_or_write_node_new(
3109 parser->arena,
3110 ++parser->node_id,
3111 FL(target),
3112 PM_LOCATION_INIT_NODES(target, value),
3113 target->receiver,
3114 target->call_operator_loc,
3115 target->opening_loc,
3116 target->arguments,
3117 target->closing_loc,
3118 (pm_block_argument_node_t *) target->block,
3119 TOK2LOC(parser, operator),
3120 value
3121 );
3122
3123 // The target is no longer necessary because we've reused its children.
3124 // It is arena-allocated so no explicit free is needed.
3125
3126 return node;
3127}
3128
3133static pm_call_target_node_t *
3134pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3135 pm_call_target_node_t *node = pm_call_target_node_new(
3136 parser->arena,
3137 ++parser->node_id,
3138 FL(target),
3139 PM_LOCATION_INIT_NODE(target),
3140 target->receiver,
3141 target->call_operator_loc,
3142 target->name,
3143 target->message_loc
3144 );
3145
3146 /* It is possible to get here where we have parsed an invalid syntax tree
3147 * where the call operator was not present. In that case we will have a
3148 * problem because it is a required location. In this case we need to fill
3149 * it in with a fake location so that the syntax tree remains valid. */
3150 if (node->call_operator_loc.length == 0) {
3151 node->call_operator_loc = target->base.location;
3152 }
3153
3154 // The target is no longer necessary because we've reused its children.
3155 // It is arena-allocated so no explicit free is needed.
3156
3157 return node;
3158}
3159
3164static pm_index_target_node_t *
3165pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3166 pm_index_arguments_check(parser, target->arguments, target->block);
3167 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3168
3169 pm_index_target_node_t *node = pm_index_target_node_new(
3170 parser->arena,
3171 ++parser->node_id,
3172 FL(target) | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
3173 PM_LOCATION_INIT_NODE(target),
3174 target->receiver,
3175 target->opening_loc,
3176 target->arguments,
3177 target->closing_loc,
3178 (pm_block_argument_node_t *) target->block
3179 );
3180
3181 // The target is no longer necessary because we've reused its children.
3182 // It is arena-allocated so no explicit free is needed.
3183
3184 return node;
3185}
3186
3190static pm_capture_pattern_node_t *
3191pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
3192 return pm_capture_pattern_node_new(
3193 parser->arena,
3194 ++parser->node_id,
3195 0,
3196 PM_LOCATION_INIT_NODES(value, target),
3197 value,
3198 target,
3199 TOK2LOC(parser, operator)
3200 );
3201}
3202
3206static pm_case_node_t *
3207pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3208 return pm_case_node_new(
3209 parser->arena,
3210 ++parser->node_id,
3211 0,
3212 PM_LOCATION_INIT_TOKENS(parser, case_keyword, end_keyword == NULL ? case_keyword : end_keyword),
3213 predicate,
3214 ((pm_node_list_t) { 0 }),
3215 NULL,
3216 TOK2LOC(parser, case_keyword),
3217 NTOK2LOC(parser, end_keyword)
3218 );
3219}
3220
3224static void
3225pm_case_node_condition_append(pm_arena_t *arena, pm_case_node_t *node, pm_node_t *condition) {
3226 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
3227
3228 pm_node_list_append(arena, &node->conditions, condition);
3229 PM_NODE_LENGTH_SET_NODE(node, condition);
3230}
3231
3235static void
3236pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
3237 node->else_clause = else_clause;
3238 PM_NODE_LENGTH_SET_NODE(node, else_clause);
3239}
3240
3244static void
3245pm_case_node_end_keyword_loc_set(const pm_parser_t *parser, pm_case_node_t *node, const pm_token_t *end_keyword) {
3246 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
3247 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
3248}
3249
3253static pm_case_match_node_t *
3254pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate) {
3255 return pm_case_match_node_new(
3256 parser->arena,
3257 ++parser->node_id,
3258 0,
3259 PM_LOCATION_INIT_TOKEN(parser, case_keyword),
3260 predicate,
3261 ((pm_node_list_t) { 0 }),
3262 NULL,
3263 TOK2LOC(parser, case_keyword),
3264 ((pm_location_t) { 0 })
3265 );
3266}
3267
3271static void
3272pm_case_match_node_condition_append(pm_arena_t *arena, pm_case_match_node_t *node, pm_node_t *condition) {
3273 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
3274
3275 pm_node_list_append(arena, &node->conditions, condition);
3276 PM_NODE_LENGTH_SET_NODE(node, condition);
3277}
3278
3282static void
3283pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
3284 node->else_clause = else_clause;
3285 PM_NODE_LENGTH_SET_NODE(node, else_clause);
3286}
3287
3291static void
3292pm_case_match_node_end_keyword_loc_set(const pm_parser_t *parser, pm_case_match_node_t *node, const pm_token_t *end_keyword) {
3293 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
3294 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
3295}
3296
3300static pm_class_node_t *
3301pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
3302 return pm_class_node_new(
3303 parser->arena,
3304 ++parser->node_id,
3305 0,
3306 PM_LOCATION_INIT_TOKENS(parser, class_keyword, end_keyword),
3307 *locals,
3308 TOK2LOC(parser, class_keyword),
3309 constant_path,
3310 NTOK2LOC(parser, inheritance_operator),
3311 superclass,
3312 body,
3313 TOK2LOC(parser, end_keyword),
3314 pm_parser_constant_id_token(parser, name)
3315 );
3316}
3317
3321static pm_class_variable_and_write_node_t *
3322pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3323 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3324
3325 return pm_class_variable_and_write_node_new(
3326 parser->arena,
3327 ++parser->node_id,
3328 0,
3329 PM_LOCATION_INIT_NODES(target, value),
3330 target->name,
3331 target->base.location,
3332 TOK2LOC(parser, operator),
3333 value
3334 );
3335}
3336
3340static pm_class_variable_operator_write_node_t *
3341pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3342 return pm_class_variable_operator_write_node_new(
3343 parser->arena,
3344 ++parser->node_id,
3345 0,
3346 PM_LOCATION_INIT_NODES(target, value),
3347 target->name,
3348 target->base.location,
3349 TOK2LOC(parser, operator),
3350 value,
3351 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
3352 );
3353}
3354
3358static pm_class_variable_or_write_node_t *
3359pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3360 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3361
3362 return pm_class_variable_or_write_node_new(
3363 parser->arena,
3364 ++parser->node_id,
3365 0,
3366 PM_LOCATION_INIT_NODES(target, value),
3367 target->name,
3368 target->base.location,
3369 TOK2LOC(parser, operator),
3370 value
3371 );
3372}
3373
3377static pm_class_variable_read_node_t *
3378pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
3379 assert(token->type == PM_TOKEN_CLASS_VARIABLE);
3380
3381 return pm_class_variable_read_node_new(
3382 parser->arena,
3383 ++parser->node_id,
3384 0,
3385 PM_LOCATION_INIT_TOKEN(parser, token),
3386 pm_parser_constant_id_token(parser, token)
3387 );
3388}
3389
3396static inline pm_node_flags_t
3397pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
3398 if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.length == 0) {
3399 return flags;
3400 }
3401 return 0;
3402}
3403
3407static pm_class_variable_write_node_t *
3408pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
3409 return pm_class_variable_write_node_new(
3410 parser->arena,
3411 ++parser->node_id,
3412 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3413 PM_LOCATION_INIT_NODES(read_node, value),
3414 read_node->name,
3415 read_node->base.location,
3416 value,
3417 TOK2LOC(parser, operator)
3418 );
3419}
3420
3424static pm_constant_path_and_write_node_t *
3425pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3426 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3427
3428 return pm_constant_path_and_write_node_new(
3429 parser->arena,
3430 ++parser->node_id,
3431 0,
3432 PM_LOCATION_INIT_NODES(target, value),
3433 target,
3434 TOK2LOC(parser, operator),
3435 value
3436 );
3437}
3438
3442static pm_constant_path_operator_write_node_t *
3443pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3444 return pm_constant_path_operator_write_node_new(
3445 parser->arena,
3446 ++parser->node_id,
3447 0,
3448 PM_LOCATION_INIT_NODES(target, value),
3449 target,
3450 TOK2LOC(parser, operator),
3451 value,
3452 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
3453 );
3454}
3455
3459static pm_constant_path_or_write_node_t *
3460pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3461 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3462
3463 return pm_constant_path_or_write_node_new(
3464 parser->arena,
3465 ++parser->node_id,
3466 0,
3467 PM_LOCATION_INIT_NODES(target, value),
3468 target,
3469 TOK2LOC(parser, operator),
3470 value
3471 );
3472}
3473
3477static pm_constant_path_node_t *
3478pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3479 pm_assert_value_expression(parser, parent);
3480
3481 pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3482 if (name_token->type == PM_TOKEN_CONSTANT) {
3483 name = pm_parser_constant_id_token(parser, name_token);
3484 }
3485
3486 return pm_constant_path_node_new(
3487 parser->arena,
3488 ++parser->node_id,
3489 0,
3490 (parent == NULL) ? PM_LOCATION_INIT_TOKENS(parser, delimiter, name_token) : PM_LOCATION_INIT_NODE_TOKEN(parser, parent, name_token),
3491 parent,
3492 name,
3493 TOK2LOC(parser, delimiter),
3494 TOK2LOC(parser, name_token)
3495 );
3496}
3497
3501static pm_constant_path_write_node_t *
3502pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3503 return pm_constant_path_write_node_new(
3504 parser->arena,
3505 ++parser->node_id,
3506 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3507 PM_LOCATION_INIT_NODES(target, value),
3508 target,
3509 TOK2LOC(parser, operator),
3510 value
3511 );
3512}
3513
3517static pm_constant_and_write_node_t *
3518pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3519 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3520
3521 return pm_constant_and_write_node_new(
3522 parser->arena,
3523 ++parser->node_id,
3524 0,
3525 PM_LOCATION_INIT_NODES(target, value),
3526 target->name,
3527 target->base.location,
3528 TOK2LOC(parser, operator),
3529 value
3530 );
3531}
3532
3536static pm_constant_operator_write_node_t *
3537pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3538 return pm_constant_operator_write_node_new(
3539 parser->arena,
3540 ++parser->node_id,
3541 0,
3542 PM_LOCATION_INIT_NODES(target, value),
3543 target->name,
3544 target->base.location,
3545 TOK2LOC(parser, operator),
3546 value,
3547 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
3548 );
3549}
3550
3554static pm_constant_or_write_node_t *
3555pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3556 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3557
3558 return pm_constant_or_write_node_new(
3559 parser->arena,
3560 ++parser->node_id,
3561 0,
3562 PM_LOCATION_INIT_NODES(target, value),
3563 target->name,
3564 target->base.location,
3565 TOK2LOC(parser, operator),
3566 value
3567 );
3568}
3569
3573static pm_constant_read_node_t *
3574pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
3575 assert(name->type == PM_TOKEN_CONSTANT || name->type == 0);
3576
3577 return pm_constant_read_node_new(
3578 parser->arena,
3579 ++parser->node_id,
3580 0,
3581 PM_LOCATION_INIT_TOKEN(parser, name),
3582 pm_parser_constant_id_token(parser, name)
3583 );
3584}
3585
3589static pm_constant_write_node_t *
3590pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3591 return pm_constant_write_node_new(
3592 parser->arena,
3593 ++parser->node_id,
3594 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3595 PM_LOCATION_INIT_NODES(target, value),
3596 target->name,
3597 target->base.location,
3598 value,
3599 TOK2LOC(parser, operator)
3600 );
3601}
3602
3606static void
3607pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3608 switch (PM_NODE_TYPE(node)) {
3609 case PM_BEGIN_NODE: {
3610 const pm_begin_node_t *cast = (pm_begin_node_t *) node;
3611 if (cast->statements != NULL) pm_def_node_receiver_check(parser, UP(cast->statements));
3612 break;
3613 }
3614 case PM_PARENTHESES_NODE: {
3615 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
3616 if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
3617 break;
3618 }
3619 case PM_STATEMENTS_NODE: {
3620 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
3621 pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
3622 break;
3623 }
3624 case PM_ARRAY_NODE:
3625 case PM_FLOAT_NODE:
3626 case PM_IMAGINARY_NODE:
3627 case PM_INTEGER_NODE:
3628 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
3629 case PM_INTERPOLATED_STRING_NODE:
3630 case PM_INTERPOLATED_SYMBOL_NODE:
3631 case PM_INTERPOLATED_X_STRING_NODE:
3632 case PM_RATIONAL_NODE:
3633 case PM_REGULAR_EXPRESSION_NODE:
3634 case PM_SOURCE_ENCODING_NODE:
3635 case PM_SOURCE_FILE_NODE:
3636 case PM_SOURCE_LINE_NODE:
3637 case PM_STRING_NODE:
3638 case PM_SYMBOL_NODE:
3639 case PM_X_STRING_NODE:
3640 pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
3641 break;
3642 default:
3643 break;
3644 }
3645}
3646
3650static pm_def_node_t *
3651pm_def_node_create(
3652 pm_parser_t *parser,
3653 pm_constant_id_t name,
3654 const pm_token_t *name_loc,
3655 pm_node_t *receiver,
3656 pm_parameters_node_t *parameters,
3657 pm_node_t *body,
3658 pm_constant_id_list_t *locals,
3659 const pm_token_t *def_keyword,
3660 const pm_token_t *operator,
3661 const pm_token_t *lparen,
3662 const pm_token_t *rparen,
3663 const pm_token_t *equal,
3664 const pm_token_t *end_keyword
3665) {
3666 if (receiver != NULL) {
3667 pm_def_node_receiver_check(parser, receiver);
3668 }
3669
3670 return pm_def_node_new(
3671 parser->arena,
3672 ++parser->node_id,
3673 0,
3674 (end_keyword == NULL) ? PM_LOCATION_INIT_TOKEN_NODE(parser, def_keyword, body) : PM_LOCATION_INIT_TOKENS(parser, def_keyword, end_keyword),
3675 name,
3676 TOK2LOC(parser, name_loc),
3677 receiver,
3678 parameters,
3679 body,
3680 *locals,
3681 TOK2LOC(parser, def_keyword),
3682 NTOK2LOC(parser, operator),
3683 NTOK2LOC(parser, lparen),
3684 NTOK2LOC(parser, rparen),
3685 NTOK2LOC(parser, equal),
3686 NTOK2LOC(parser, end_keyword)
3687 );
3688}
3689
3693static pm_defined_node_t *
3694pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_token_t *keyword) {
3695 return pm_defined_node_new(
3696 parser->arena,
3697 ++parser->node_id,
3698 0,
3699 (rparen == NULL) ? PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, value) : PM_LOCATION_INIT_TOKENS(parser, keyword, rparen),
3700 NTOK2LOC(parser, lparen),
3701 value,
3702 NTOK2LOC(parser, rparen),
3703 TOK2LOC(parser, keyword)
3704 );
3705}
3706
3710static pm_else_node_t *
3711pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3712 return pm_else_node_new(
3713 parser->arena,
3714 ++parser->node_id,
3715 0,
3716 ((end_keyword == NULL) && (statements != NULL)) ? PM_LOCATION_INIT_TOKEN_NODE(parser, else_keyword, statements) : PM_LOCATION_INIT_TOKENS(parser, else_keyword, end_keyword),
3717 TOK2LOC(parser, else_keyword),
3718 statements,
3719 NTOK2LOC(parser, end_keyword)
3720 );
3721}
3722
3726static pm_embedded_statements_node_t *
3727pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
3728 return pm_embedded_statements_node_new(
3729 parser->arena,
3730 ++parser->node_id,
3731 0,
3732 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
3733 TOK2LOC(parser, opening),
3734 statements,
3735 TOK2LOC(parser, closing)
3736 );
3737}
3738
3742static pm_embedded_variable_node_t *
3743pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
3744 return pm_embedded_variable_node_new(
3745 parser->arena,
3746 ++parser->node_id,
3747 0,
3748 PM_LOCATION_INIT_TOKEN_NODE(parser, operator, variable),
3749 TOK2LOC(parser, operator),
3750 variable
3751 );
3752}
3753
3757static pm_ensure_node_t *
3758pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3759 return pm_ensure_node_new(
3760 parser->arena,
3761 ++parser->node_id,
3762 0,
3763 PM_LOCATION_INIT_TOKENS(parser, ensure_keyword, end_keyword),
3764 TOK2LOC(parser, ensure_keyword),
3765 statements,
3766 TOK2LOC(parser, end_keyword)
3767 );
3768}
3769
3773static pm_false_node_t *
3774pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
3775 assert(token->type == PM_TOKEN_KEYWORD_FALSE);
3776
3777 return pm_false_node_new(
3778 parser->arena,
3779 ++parser->node_id,
3780 PM_NODE_FLAG_STATIC_LITERAL,
3781 PM_LOCATION_INIT_TOKEN(parser, token)
3782 );
3783}
3784
3789static pm_find_pattern_node_t *
3790pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
3791 pm_node_t *left = nodes->nodes[0];
3792 assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
3793 pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left;
3794
3795 pm_node_t *right;
3796
3797 if (nodes->size == 1) {
3798 right = UP(pm_missing_node_create(parser, PM_NODE_END(left), 0));
3799 } else {
3800 right = nodes->nodes[nodes->size - 1];
3801 assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
3802 }
3803
3804#if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
3805 // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode.
3806 // The resulting AST will anyway be ignored, but this file still needs to compile.
3807 pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node;
3808#else
3809 pm_node_t *right_splat_node = right;
3810#endif
3811
3812 pm_find_pattern_node_t *node = pm_find_pattern_node_new(
3813 parser->arena,
3814 ++parser->node_id,
3815 0,
3816 PM_LOCATION_INIT_NODES(left, right),
3817 NULL,
3818 left_splat_node,
3819 ((pm_node_list_t) { 0 }),
3820 right_splat_node,
3821 ((pm_location_t) { 0 }),
3822 ((pm_location_t) { 0 })
3823 );
3824
3825 // For now we're going to just copy over each pointer manually. This could be
3826 // much more efficient, as we could instead resize the node list to only point
3827 // to 1...-1.
3828 for (size_t index = 1; index < nodes->size - 1; index++) {
3829 pm_node_list_append(parser->arena, &node->requireds, nodes->nodes[index]);
3830 }
3831
3832 return node;
3833}
3834
3839static double
3840pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
3841 ptrdiff_t diff = token->end - token->start;
3842 if (diff <= 0) return 0.0;
3843
3844 // First, get a buffer of the content.
3845 size_t length = (size_t) diff;
3846 const size_t buffer_size = sizeof(char) * (length + 1);
3847 char *buffer = xmalloc(buffer_size);
3848 memcpy((void *) buffer, token->start, length);
3849
3850 // Next, determine if we need to replace the decimal point because of
3851 // locale-specific options, and then normalize them if we have to.
3852 char decimal_point = *localeconv()->decimal_point;
3853 if (decimal_point != '.') {
3854 for (size_t index = 0; index < length; index++) {
3855 if (buffer[index] == '.') buffer[index] = decimal_point;
3856 }
3857 }
3858
3859 // Next, handle underscores by removing them from the buffer.
3860 for (size_t index = 0; index < length; index++) {
3861 if (buffer[index] == '_') {
3862 memmove((void *) (buffer + index), (void *) (buffer + index + 1), length - index);
3863 length--;
3864 }
3865 }
3866
3867 // Null-terminate the buffer so that strtod cannot read off the end.
3868 buffer[length] = '\0';
3869
3870 // Now, call strtod to parse the value. Note that CRuby has their own
3871 // version of strtod which avoids locales. We're okay using the locale-aware
3872 // version because we've already validated through the parser that the token
3873 // is in a valid format.
3874 errno = 0;
3875 char *eptr;
3876 double value = strtod(buffer, &eptr);
3877
3878 // This should never happen, because we've already checked that the token
3879 // is in a valid format. However it's good to be safe.
3880 if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
3881 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, PM_ERR_FLOAT_PARSE);
3882 xfree_sized(buffer, buffer_size);
3883 return 0.0;
3884 }
3885
3886 // If errno is set, then it should only be ERANGE. At this point we need to
3887 // check if it's infinity (it should be).
3888 if (errno == ERANGE && PRISM_ISINF(value)) {
3889 int warn_width;
3890 const char *ellipsis;
3891
3892 if (length > 20) {
3893 warn_width = 20;
3894 ellipsis = "...";
3895 } else {
3896 warn_width = (int) length;
3897 ellipsis = "";
3898 }
3899
3900 pm_diagnostic_list_append_format(&parser->warning_list, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
3901 value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
3902 }
3903
3904 // Finally we can free the buffer and return the value.
3905 xfree_sized(buffer, buffer_size);
3906 return value;
3907}
3908
3912static pm_float_node_t *
3913pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
3914 assert(token->type == PM_TOKEN_FLOAT);
3915
3916 return pm_float_node_new(
3917 parser->arena,
3918 ++parser->node_id,
3919 PM_NODE_FLAG_STATIC_LITERAL,
3920 PM_LOCATION_INIT_TOKEN(parser, token),
3921 pm_double_parse(parser, token)
3922 );
3923}
3924
3928static pm_imaginary_node_t *
3929pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
3930 assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
3931
3932 return pm_imaginary_node_new(
3933 parser->arena,
3934 ++parser->node_id,
3935 PM_NODE_FLAG_STATIC_LITERAL,
3936 PM_LOCATION_INIT_TOKEN(parser, token),
3937 UP(pm_float_node_create(parser, &((pm_token_t) {
3938 .type = PM_TOKEN_FLOAT,
3939 .start = token->start,
3940 .end = token->end - 1
3941 })))
3942 );
3943}
3944
3948static pm_rational_node_t *
3949pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
3950 assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
3951
3952 pm_rational_node_t *node = pm_rational_node_new(
3953 parser->arena,
3954 ++parser->node_id,
3955 PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
3956 PM_LOCATION_INIT_TOKEN(parser, token),
3957 ((pm_integer_t) { 0 }),
3958 ((pm_integer_t) { 0 })
3959 );
3960
3961 const uint8_t *start = token->start;
3962 const uint8_t *end = token->end - 1; // r
3963
3964 while (start < end && *start == '0') start++; // 0.1 -> .1
3965 while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
3966
3967 size_t length = (size_t) (end - start);
3968 if (length == 1) {
3969 node->denominator.value = 1;
3970 return node;
3971 }
3972
3973 const uint8_t *point = memchr(start, '.', length);
3974 assert(point && "should have a decimal point");
3975
3976 uint8_t *digits = xmalloc(length);
3977 if (digits == NULL) {
3978 fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
3979 abort();
3980 }
3981
3982 memcpy(digits, start, (unsigned long) (point - start));
3983 memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
3984 pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
3985
3986 size_t fract_length = 0;
3987 for (const uint8_t *fract = point; fract < end; ++fract) {
3988 if (*fract != '_') ++fract_length;
3989 }
3990 digits[0] = '1';
3991 if (fract_length > 1) memset(digits + 1, '0', fract_length - 1);
3992 pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + fract_length);
3993 xfree_sized(digits, length);
3994
3995 pm_integers_reduce(&node->numerator, &node->denominator);
3996 pm_integer_arena_move(parser->arena, &node->numerator);
3997 pm_integer_arena_move(parser->arena, &node->denominator);
3998 return node;
3999}
4000
4005static pm_imaginary_node_t *
4006pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4007 assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
4008
4009 return pm_imaginary_node_new(
4010 parser->arena,
4011 ++parser->node_id,
4012 PM_NODE_FLAG_STATIC_LITERAL,
4013 PM_LOCATION_INIT_TOKEN(parser, token),
4014 UP(pm_float_node_rational_create(parser, &((pm_token_t) {
4015 .type = PM_TOKEN_FLOAT_RATIONAL,
4016 .start = token->start,
4017 .end = token->end - 1
4018 })))
4019 );
4020}
4021
4025static pm_for_node_t *
4026pm_for_node_create(
4027 pm_parser_t *parser,
4028 pm_node_t *index,
4029 pm_node_t *collection,
4030 pm_statements_node_t *statements,
4031 const pm_token_t *for_keyword,
4032 const pm_token_t *in_keyword,
4033 const pm_token_t *do_keyword,
4034 const pm_token_t *end_keyword
4035) {
4036 return pm_for_node_new(
4037 parser->arena,
4038 ++parser->node_id,
4039 0,
4040 PM_LOCATION_INIT_TOKENS(parser, for_keyword, end_keyword),
4041 index,
4042 collection,
4043 statements,
4044 TOK2LOC(parser, for_keyword),
4045 TOK2LOC(parser, in_keyword),
4046 NTOK2LOC(parser, do_keyword),
4047 TOK2LOC(parser, end_keyword)
4048 );
4049}
4050
4054static pm_forwarding_arguments_node_t *
4055pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
4056 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4057
4058 return pm_forwarding_arguments_node_new(
4059 parser->arena,
4060 ++parser->node_id,
4061 0,
4062 PM_LOCATION_INIT_TOKEN(parser, token)
4063 );
4064}
4065
4069static pm_forwarding_parameter_node_t *
4070pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
4071 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4072
4073 return pm_forwarding_parameter_node_new(
4074 parser->arena,
4075 ++parser->node_id,
4076 0,
4077 PM_LOCATION_INIT_TOKEN(parser, token)
4078 );
4079}
4080
4084static pm_forwarding_super_node_t *
4085pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
4086 assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
4087 assert(token->type == PM_TOKEN_KEYWORD_SUPER);
4088
4089 pm_block_node_t *block = NULL;
4090 if (arguments->block != NULL) {
4091 block = (pm_block_node_t *) arguments->block;
4092 }
4093
4094 return pm_forwarding_super_node_new(
4095 parser->arena,
4096 ++parser->node_id,
4097 0,
4098 (block == NULL) ? PM_LOCATION_INIT_TOKEN(parser, token) : PM_LOCATION_INIT_TOKEN_NODE(parser, token, block),
4099 block
4100 );
4101}
4102
4107static pm_hash_pattern_node_t *
4108pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4109 return pm_hash_pattern_node_new(
4110 parser->arena,
4111 ++parser->node_id,
4112 0,
4113 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
4114 NULL,
4115 ((pm_node_list_t) { 0 }),
4116 NULL,
4117 TOK2LOC(parser, opening),
4118 TOK2LOC(parser, closing)
4119 );
4120}
4121
4125static pm_hash_pattern_node_t *
4126pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
4127 uint32_t start;
4128 uint32_t end;
4129
4130 if (elements->size > 0) {
4131 if (rest) {
4132 start = MIN(PM_NODE_START(rest), PM_NODE_START(elements->nodes[0]));
4133 end = MAX(PM_NODE_END(rest), PM_NODE_END(elements->nodes[elements->size - 1]));
4134 } else {
4135 start = PM_NODE_START(elements->nodes[0]);
4136 end = PM_NODE_END(elements->nodes[elements->size - 1]);
4137 }
4138 } else {
4139 assert(rest != NULL);
4140 start = PM_NODE_START(rest);
4141 end = PM_NODE_END(rest);
4142 }
4143
4144 pm_hash_pattern_node_t *node = pm_hash_pattern_node_new(
4145 parser->arena,
4146 ++parser->node_id,
4147 0,
4148 ((pm_location_t) { .start = start, .length = U32(end - start) }),
4149 NULL,
4150 ((pm_node_list_t) { 0 }),
4151 rest,
4152 ((pm_location_t) { 0 }),
4153 ((pm_location_t) { 0 })
4154 );
4155
4156 pm_node_list_concat(parser->arena, &node->elements, elements);
4157 return node;
4158}
4159
4163static pm_constant_id_t
4164pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
4165 switch (PM_NODE_TYPE(target)) {
4166 case PM_GLOBAL_VARIABLE_READ_NODE:
4167 return ((pm_global_variable_read_node_t *) target)->name;
4168 case PM_BACK_REFERENCE_READ_NODE:
4169 return ((pm_back_reference_read_node_t *) target)->name;
4170 case PM_NUMBERED_REFERENCE_READ_NODE:
4171 // This will only ever happen in the event of a syntax error, but we
4172 // still need to provide something for the node.
4173 return pm_parser_constant_id_raw(parser, parser->start + PM_NODE_START(target), parser->start + PM_NODE_END(target));
4174 default:
4175 assert(false && "unreachable");
4176 return (pm_constant_id_t) -1;
4177 }
4178}
4179
4183static pm_global_variable_and_write_node_t *
4184pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4185 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4186
4187 return pm_global_variable_and_write_node_new(
4188 parser->arena,
4189 ++parser->node_id,
4190 0,
4191 PM_LOCATION_INIT_NODES(target, value),
4192 pm_global_variable_write_name(parser, target),
4193 target->location,
4194 TOK2LOC(parser, operator),
4195 value
4196 );
4197}
4198
4202static pm_global_variable_operator_write_node_t *
4203pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4204 return pm_global_variable_operator_write_node_new(
4205 parser->arena,
4206 ++parser->node_id,
4207 0,
4208 PM_LOCATION_INIT_NODES(target, value),
4209 pm_global_variable_write_name(parser, target),
4210 target->location,
4211 TOK2LOC(parser, operator),
4212 value,
4213 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
4214 );
4215}
4216
4220static pm_global_variable_or_write_node_t *
4221pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4222 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4223
4224 return pm_global_variable_or_write_node_new(
4225 parser->arena,
4226 ++parser->node_id,
4227 0,
4228 PM_LOCATION_INIT_NODES(target, value),
4229 pm_global_variable_write_name(parser, target),
4230 target->location,
4231 TOK2LOC(parser, operator),
4232 value
4233 );
4234}
4235
4239static pm_global_variable_read_node_t *
4240pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4241 return pm_global_variable_read_node_new(
4242 parser->arena,
4243 ++parser->node_id,
4244 0,
4245 PM_LOCATION_INIT_TOKEN(parser, name),
4246 pm_parser_constant_id_token(parser, name)
4247 );
4248}
4249
4253static pm_global_variable_read_node_t *
4254pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
4255 return pm_global_variable_read_node_new(
4256 parser->arena,
4257 ++parser->node_id,
4258 0,
4259 PM_LOCATION_INIT_UNSET,
4260 name
4261 );
4262}
4263
4267static pm_global_variable_write_node_t *
4268pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4269 return pm_global_variable_write_node_new(
4270 parser->arena,
4271 ++parser->node_id,
4272 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
4273 PM_LOCATION_INIT_NODES(target, value),
4274 pm_global_variable_write_name(parser, target),
4275 target->location,
4276 value,
4277 TOK2LOC(parser, operator)
4278 );
4279}
4280
4284static pm_global_variable_write_node_t *
4285pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
4286 return pm_global_variable_write_node_new(
4287 parser->arena,
4288 ++parser->node_id,
4289 0,
4290 PM_LOCATION_INIT_UNSET,
4291 name,
4292 ((pm_location_t) { 0 }),
4293 value,
4294 ((pm_location_t) { 0 })
4295 );
4296}
4297
4301static pm_hash_node_t *
4302pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4303 assert(opening != NULL);
4304
4305 return pm_hash_node_new(
4306 parser->arena,
4307 ++parser->node_id,
4308 PM_NODE_FLAG_STATIC_LITERAL,
4309 PM_LOCATION_INIT_TOKEN(parser, opening),
4310 TOK2LOC(parser, opening),
4311 ((pm_node_list_t) { 0 }),
4312 ((pm_location_t) { 0 })
4313 );
4314}
4315
4319static inline void
4320pm_hash_node_elements_append(pm_arena_t *arena, pm_hash_node_t *hash, pm_node_t *element) {
4321 pm_node_list_append(arena, &hash->elements, element);
4322
4323 bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
4324 if (static_literal) {
4325 pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
4326 static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
4327 static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
4328 static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
4329 }
4330
4331 if (!static_literal) {
4332 pm_node_flag_unset(UP(hash), PM_NODE_FLAG_STATIC_LITERAL);
4333 }
4334}
4335
4336static inline void
4337pm_hash_node_closing_loc_set(const pm_parser_t *parser, pm_hash_node_t *hash, pm_token_t *token) {
4338 PM_NODE_LENGTH_SET_TOKEN(parser, hash, token);
4339 hash->closing_loc = TOK2LOC(parser, token);
4340}
4341
4345static pm_if_node_t *
4346pm_if_node_create(pm_parser_t *parser,
4347 const pm_token_t *if_keyword,
4348 pm_node_t *predicate,
4349 const pm_token_t *then_keyword,
4350 pm_statements_node_t *statements,
4351 pm_node_t *subsequent,
4352 const pm_token_t *end_keyword
4353) {
4354 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4355
4356 uint32_t start = PM_TOKEN_START(parser, if_keyword);
4357 uint32_t end;
4358
4359 if (end_keyword != NULL) {
4360 end = PM_TOKEN_END(parser, end_keyword);
4361 } else if (subsequent != NULL) {
4362 end = PM_NODE_END(subsequent);
4363 } else if (pm_statements_node_body_length(statements) != 0) {
4364 end = PM_NODE_END(statements);
4365 } else {
4366 end = PM_NODE_END(predicate);
4367 }
4368
4369 return pm_if_node_new(
4370 parser->arena,
4371 ++parser->node_id,
4372 PM_NODE_FLAG_NEWLINE,
4373 ((pm_location_t) { .start = start, .length = U32(end - start) }),
4374 TOK2LOC(parser, if_keyword),
4375 predicate,
4376 NTOK2LOC(parser, then_keyword),
4377 statements,
4378 subsequent,
4379 NTOK2LOC(parser, end_keyword)
4380 );
4381}
4382
4386static pm_if_node_t *
4387pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
4388 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4389
4390 pm_statements_node_t *statements = pm_statements_node_create(parser);
4391 pm_statements_node_body_append(parser, statements, statement, true);
4392
4393 return pm_if_node_new(
4394 parser->arena,
4395 ++parser->node_id,
4396 PM_NODE_FLAG_NEWLINE,
4397 PM_LOCATION_INIT_NODES(statement, predicate),
4398 TOK2LOC(parser, if_keyword),
4399 predicate,
4400 ((pm_location_t) { 0 }),
4401 statements,
4402 NULL,
4403 ((pm_location_t) { 0 })
4404 );
4405}
4406
4410static pm_if_node_t *
4411pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
4412 pm_assert_value_expression(parser, predicate);
4413 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4414
4415 pm_statements_node_t *if_statements = pm_statements_node_create(parser);
4416 pm_statements_node_body_append(parser, if_statements, true_expression, true);
4417
4418 pm_statements_node_t *else_statements = pm_statements_node_create(parser);
4419 pm_statements_node_body_append(parser, else_statements, false_expression, true);
4420
4421 pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, NULL);
4422 return pm_if_node_new(
4423 parser->arena,
4424 ++parser->node_id,
4425 PM_NODE_FLAG_NEWLINE,
4426 PM_LOCATION_INIT_NODES(predicate, false_expression),
4427 ((pm_location_t) { 0 }),
4428 predicate,
4429 TOK2LOC(parser, qmark),
4430 if_statements,
4431 UP(else_node),
4432 ((pm_location_t) { 0 })
4433 );
4434}
4435
4436static inline void
4437pm_if_node_end_keyword_loc_set(const pm_parser_t *parser, pm_if_node_t *node, const pm_token_t *keyword) {
4438 PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword);
4439 node->end_keyword_loc = TOK2LOC(parser, keyword);
4440}
4441
4442static inline void
4443pm_else_node_end_keyword_loc_set(const pm_parser_t *parser, pm_else_node_t *node, const pm_token_t *keyword) {
4444 PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword);
4445 node->end_keyword_loc = TOK2LOC(parser, keyword);
4446}
4447
4451static pm_implicit_node_t *
4452pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
4453 return pm_implicit_node_new(
4454 parser->arena,
4455 ++parser->node_id,
4456 0,
4457 PM_LOCATION_INIT_NODE(value),
4458 value
4459 );
4460}
4461
4465static pm_implicit_rest_node_t *
4466pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
4467 assert(token->type == PM_TOKEN_COMMA);
4468
4469 return pm_implicit_rest_node_new(
4470 parser->arena,
4471 ++parser->node_id,
4472 0,
4473 PM_LOCATION_INIT_TOKEN(parser, token)
4474 );
4475}
4476
4480static pm_integer_node_t *
4481pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4482 assert(token->type == PM_TOKEN_INTEGER);
4483
4484 pm_integer_node_t *node = pm_integer_node_new(
4485 parser->arena,
4486 ++parser->node_id,
4487 base | PM_NODE_FLAG_STATIC_LITERAL,
4488 PM_LOCATION_INIT_TOKEN(parser, token),
4489 ((pm_integer_t) { 0 })
4490 );
4491
4492 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4493 switch (base) {
4494 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4495 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4496 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4497 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4498 default: assert(false && "unreachable"); break;
4499 }
4500
4501 pm_integer_parse(&node->value, integer_base, token->start, token->end);
4502 pm_integer_arena_move(parser->arena, &node->value);
4503 return node;
4504}
4505
4510static pm_imaginary_node_t *
4511pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4512 assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
4513
4514 return pm_imaginary_node_new(
4515 parser->arena,
4516 ++parser->node_id,
4517 PM_NODE_FLAG_STATIC_LITERAL,
4518 PM_LOCATION_INIT_TOKEN(parser, token),
4519 UP(pm_integer_node_create(parser, base, &((pm_token_t) {
4520 .type = PM_TOKEN_INTEGER,
4521 .start = token->start,
4522 .end = token->end - 1
4523 })))
4524 );
4525}
4526
4531static pm_rational_node_t *
4532pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4533 assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
4534
4535 pm_rational_node_t *node = pm_rational_node_new(
4536 parser->arena,
4537 ++parser->node_id,
4538 base | PM_NODE_FLAG_STATIC_LITERAL,
4539 PM_LOCATION_INIT_TOKEN(parser, token),
4540 ((pm_integer_t) { 0 }),
4541 ((pm_integer_t) { .value = 1 })
4542 );
4543
4544 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4545 switch (base) {
4546 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4547 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4548 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4549 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4550 default: assert(false && "unreachable"); break;
4551 }
4552
4553 pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
4554 pm_integer_arena_move(parser->arena, &node->numerator);
4555
4556 return node;
4557}
4558
4563static pm_imaginary_node_t *
4564pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4565 assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
4566
4567 return pm_imaginary_node_new(
4568 parser->arena,
4569 ++parser->node_id,
4570 PM_NODE_FLAG_STATIC_LITERAL,
4571 PM_LOCATION_INIT_TOKEN(parser, token),
4572 UP(pm_integer_node_rational_create(parser, base, &((pm_token_t) {
4573 .type = PM_TOKEN_INTEGER_RATIONAL,
4574 .start = token->start,
4575 .end = token->end - 1
4576 })))
4577 );
4578}
4579
4583static pm_in_node_t *
4584pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
4585 uint32_t start = PM_TOKEN_START(parser, in_keyword);
4586 uint32_t end;
4587
4588 if (statements != NULL) {
4589 end = PM_NODE_END(statements);
4590 } else if (then_keyword != NULL) {
4591 end = PM_TOKEN_END(parser, then_keyword);
4592 } else {
4593 end = PM_NODE_END(pattern);
4594 }
4595
4596 return pm_in_node_new(
4597 parser->arena,
4598 ++parser->node_id,
4599 0,
4600 ((pm_location_t) { .start = start, .length = U32(end - start) }),
4601 pattern,
4602 statements,
4603 TOK2LOC(parser, in_keyword),
4604 NTOK2LOC(parser, then_keyword)
4605 );
4606}
4607
4611static pm_instance_variable_and_write_node_t *
4612pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4613 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4614
4615 return pm_instance_variable_and_write_node_new(
4616 parser->arena,
4617 ++parser->node_id,
4618 0,
4619 PM_LOCATION_INIT_NODES(target, value),
4620 target->name,
4621 target->base.location,
4622 TOK2LOC(parser, operator),
4623 value
4624 );
4625}
4626
4630static pm_instance_variable_operator_write_node_t *
4631pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4632 return pm_instance_variable_operator_write_node_new(
4633 parser->arena,
4634 ++parser->node_id,
4635 0,
4636 PM_LOCATION_INIT_NODES(target, value),
4637 target->name,
4638 target->base.location,
4639 TOK2LOC(parser, operator),
4640 value,
4641 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
4642 );
4643}
4644
4648static pm_instance_variable_or_write_node_t *
4649pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4650 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4651
4652 return pm_instance_variable_or_write_node_new(
4653 parser->arena,
4654 ++parser->node_id,
4655 0,
4656 PM_LOCATION_INIT_NODES(target, value),
4657 target->name,
4658 target->base.location,
4659 TOK2LOC(parser, operator),
4660 value
4661 );
4662}
4663
4667static pm_instance_variable_read_node_t *
4668pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
4669 assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
4670
4671 return pm_instance_variable_read_node_new(
4672 parser->arena,
4673 ++parser->node_id,
4674 0,
4675 PM_LOCATION_INIT_TOKEN(parser, token),
4676 pm_parser_constant_id_token(parser, token)
4677 );
4678}
4679
4684static pm_instance_variable_write_node_t *
4685pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
4686 return pm_instance_variable_write_node_new(
4687 parser->arena,
4688 ++parser->node_id,
4689 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
4690 PM_LOCATION_INIT_NODES(read_node, value),
4691 read_node->name,
4692 read_node->base.location,
4693 value,
4694 TOK2LOC(parser, operator)
4695 );
4696}
4697
4703static void
4704pm_interpolated_node_append(pm_arena_t *arena, pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
4705 switch (PM_NODE_TYPE(part)) {
4706 case PM_STRING_NODE:
4707 pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4708 break;
4709 case PM_EMBEDDED_STATEMENTS_NODE: {
4710 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
4711 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
4712
4713 if (embedded == NULL) {
4714 // If there are no statements or more than one statement, then
4715 // we lose the static literal flag.
4716 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
4717 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
4718 // If the embedded statement is a string, then we can keep the
4719 // static literal flag and mark the string as frozen.
4720 pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4721 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
4722 // If the embedded statement is an interpolated string and it's
4723 // a static literal, then we can keep the static literal flag.
4724 } else {
4725 // Otherwise we lose the static literal flag.
4726 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
4727 }
4728
4729 break;
4730 }
4731 case PM_EMBEDDED_VARIABLE_NODE:
4732 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
4733 break;
4734 default:
4735 assert(false && "unexpected node type");
4736 break;
4737 }
4738
4739 pm_node_list_append(arena, parts, part);
4740}
4741
4745static pm_interpolated_regular_expression_node_t *
4746pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4747 return pm_interpolated_regular_expression_node_new(
4748 parser->arena,
4749 ++parser->node_id,
4750 PM_NODE_FLAG_STATIC_LITERAL,
4751 PM_LOCATION_INIT_TOKEN(parser, opening),
4752 TOK2LOC(parser, opening),
4753 ((pm_node_list_t) { 0 }),
4754 TOK2LOC(parser, opening)
4755 );
4756}
4757
4758static inline void
4759pm_interpolated_regular_expression_node_append(pm_arena_t *arena, pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
4760 if (PM_NODE_START(node) > PM_NODE_START(part)) {
4761 PM_NODE_START_SET_NODE(node, part);
4762 }
4763 if (PM_NODE_END(node) < PM_NODE_END(part)) {
4764 PM_NODE_LENGTH_SET_NODE(node, part);
4765 }
4766
4767 pm_interpolated_node_append(arena, UP(node), &node->parts, part);
4768}
4769
4770static inline void
4771pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
4772 node->closing_loc = TOK2LOC(parser, closing);
4773 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
4774 pm_node_flag_set(UP(node), pm_regular_expression_flags_create(parser, closing));
4775}
4776
4800static inline void
4801pm_interpolated_string_node_append(pm_arena_t *arena, pm_interpolated_string_node_t *node, pm_node_t *part) {
4802#define CLEAR_FLAGS(node) \
4803 node->base.flags = (pm_node_flags_t) (FL(node) & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
4804
4805#define MUTABLE_FLAGS(node) \
4806 node->base.flags = (pm_node_flags_t) ((FL(node) | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
4807
4808 if (node->parts.size == 0 && node->opening_loc.length == 0) {
4809 PM_NODE_START_SET_NODE(node, part);
4810 }
4811
4812 if (PM_NODE_END(part) > PM_NODE_END(node)) {
4813 PM_NODE_LENGTH_SET_NODE(node, part);
4814 }
4815
4816 switch (PM_NODE_TYPE(part)) {
4817 case PM_STRING_NODE:
4818 // If inner string is not frozen, it stops being a static literal. We should *not* clear other flags,
4819 // because concatenating two frozen strings (`'foo' 'bar'`) is still frozen. This holds true for
4820 // as long as this interpolation only consists of other string literals.
4821 if (!PM_NODE_FLAG_P(part, PM_STRING_FLAGS_FROZEN)) {
4822 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
4823 }
4824 part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
4825 break;
4826 case PM_INTERPOLATED_STRING_NODE:
4827 if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
4828 // If the string that we're concatenating is a static literal,
4829 // then we can keep the static literal flag for this string.
4830 } else {
4831 // Otherwise, we lose the static literal flag here and we should
4832 // also clear the mutability flags.
4833 CLEAR_FLAGS(node);
4834 }
4835 break;
4836 case PM_EMBEDDED_STATEMENTS_NODE: {
4837 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
4838 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
4839
4840 if (embedded == NULL) {
4841 // If we're embedding multiple statements or no statements, then
4842 // the string is not longer a static literal.
4843 CLEAR_FLAGS(node);
4844 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
4845 // If the embedded statement is a string, then we can make that
4846 // string as frozen and static literal, and not touch the static
4847 // literal status of this string.
4848 embedded->flags = (pm_node_flags_t) ((embedded->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
4849
4850 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
4851 MUTABLE_FLAGS(node);
4852 }
4853 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
4854 // If the embedded statement is an interpolated string, but that
4855 // string is marked as static literal, then we can keep our
4856 // static literal status for this string.
4857 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
4858 MUTABLE_FLAGS(node);
4859 }
4860 } else {
4861 // In all other cases, we lose the static literal flag here and
4862 // become mutable.
4863 CLEAR_FLAGS(node);
4864 }
4865
4866 break;
4867 }
4868 case PM_EMBEDDED_VARIABLE_NODE:
4869 // Embedded variables clear static literal, which means we also
4870 // should clear the mutability flags.
4871 CLEAR_FLAGS(node);
4872 break;
4873 case PM_X_STRING_NODE:
4874 case PM_INTERPOLATED_X_STRING_NODE:
4875 case PM_SYMBOL_NODE:
4876 case PM_INTERPOLATED_SYMBOL_NODE:
4877 // These will only happen in error cases. But we want to handle it
4878 // here so that we don't fail the assertion.
4879 CLEAR_FLAGS(node);
4880 break;
4881 default:
4882 assert(false && "unexpected node type");
4883 break;
4884 }
4885
4886 pm_node_list_append(arena, &node->parts, part);
4887
4888#undef CLEAR_FLAGS
4889#undef MUTABLE_FLAGS
4890}
4891
4895static pm_interpolated_string_node_t *
4896pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
4897 pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
4898
4899 switch (parser->frozen_string_literal) {
4900 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
4901 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
4902 break;
4903 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
4904 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
4905 break;
4906 }
4907
4908 uint32_t start = opening == NULL ? 0 : PM_TOKEN_START(parser, opening);
4909 uint32_t end = closing == NULL ? 0 : PM_TOKEN_END(parser, closing);
4910
4911 pm_interpolated_string_node_t *node = pm_interpolated_string_node_new(
4912 parser->arena,
4913 ++parser->node_id,
4914 flags,
4915 ((pm_location_t) { .start = start, .length = U32(end - start) }),
4916 NTOK2LOC(parser, opening),
4917 ((pm_node_list_t) { 0 }),
4918 NTOK2LOC(parser, closing)
4919 );
4920
4921 if (parts != NULL) {
4922 pm_node_t *part;
4923 PM_NODE_LIST_FOREACH(parts, index, part) {
4924 pm_interpolated_string_node_append(parser->arena, node, part);
4925 }
4926 }
4927
4928 return node;
4929}
4930
4934static void
4935pm_interpolated_string_node_closing_set(const pm_parser_t *parser, pm_interpolated_string_node_t *node, const pm_token_t *closing) {
4936 node->closing_loc = TOK2LOC(parser, closing);
4937 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
4938}
4939
4940static void
4941pm_interpolated_symbol_node_append(pm_arena_t *arena, pm_interpolated_symbol_node_t *node, pm_node_t *part) {
4942 if (node->parts.size == 0 && node->opening_loc.length == 0) {
4943 PM_NODE_START_SET_NODE(node, part);
4944 }
4945
4946 pm_interpolated_node_append(arena, UP(node), &node->parts, part);
4947
4948 if (PM_NODE_END(part) > PM_NODE_END(node)) {
4949 PM_NODE_LENGTH_SET_NODE(node, part);
4950 }
4951}
4952
4953static void
4954pm_interpolated_symbol_node_closing_loc_set(const pm_parser_t *parser, pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
4955 node->closing_loc = TOK2LOC(parser, closing);
4956 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
4957}
4958
4962static pm_interpolated_symbol_node_t *
4963pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
4964 uint32_t start = opening == NULL ? 0 : PM_TOKEN_START(parser, opening);
4965 uint32_t end = closing == NULL ? 0 : PM_TOKEN_END(parser, closing);
4966
4967 pm_interpolated_symbol_node_t *node = pm_interpolated_symbol_node_new(
4968 parser->arena,
4969 ++parser->node_id,
4970 PM_NODE_FLAG_STATIC_LITERAL,
4971 ((pm_location_t) { .start = start, .length = U32(end - start) }),
4972 NTOK2LOC(parser, opening),
4973 ((pm_node_list_t) { 0 }),
4974 NTOK2LOC(parser, closing)
4975 );
4976
4977 if (parts != NULL) {
4978 pm_node_t *part;
4979 PM_NODE_LIST_FOREACH(parts, index, part) {
4980 pm_interpolated_symbol_node_append(parser->arena, node, part);
4981 }
4982 }
4983
4984 return node;
4985}
4986
4990static pm_interpolated_x_string_node_t *
4991pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4992 return pm_interpolated_x_string_node_new(
4993 parser->arena,
4994 ++parser->node_id,
4995 0,
4996 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
4997 TOK2LOC(parser, opening),
4998 ((pm_node_list_t) { 0 }),
4999 TOK2LOC(parser, closing)
5000 );
5001}
5002
5003static inline void
5004pm_interpolated_xstring_node_append(pm_arena_t *arena, pm_interpolated_x_string_node_t *node, pm_node_t *part) {
5005 pm_interpolated_node_append(arena, UP(node), &node->parts, part);
5006 PM_NODE_LENGTH_SET_NODE(node, part);
5007}
5008
5009static inline void
5010pm_interpolated_xstring_node_closing_set(const pm_parser_t *parser, pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
5011 node->closing_loc = TOK2LOC(parser, closing);
5012 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
5013}
5014
5018static pm_it_local_variable_read_node_t *
5019pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5020 return pm_it_local_variable_read_node_new(
5021 parser->arena,
5022 ++parser->node_id,
5023 0,
5024 PM_LOCATION_INIT_TOKEN(parser, name)
5025 );
5026}
5027
5031static pm_it_parameters_node_t *
5032pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5033 return pm_it_parameters_node_new(
5034 parser->arena,
5035 ++parser->node_id,
5036 0,
5037 PM_LOCATION_INIT_TOKENS(parser, opening, closing)
5038 );
5039}
5040
5044static pm_keyword_hash_node_t *
5045pm_keyword_hash_node_create(pm_parser_t *parser) {
5046 return pm_keyword_hash_node_new(
5047 parser->arena,
5048 ++parser->node_id,
5049 PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS,
5050 PM_LOCATION_INIT_UNSET,
5051 ((pm_node_list_t) { 0 })
5052 );
5053}
5054
5058static void
5059pm_keyword_hash_node_elements_append(pm_arena_t *arena, pm_keyword_hash_node_t *hash, pm_node_t *element) {
5060 // If the element being added is not an AssocNode or does not have a symbol
5061 // key, then we want to turn the SYMBOL_KEYS flag off.
5062 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
5063 pm_node_flag_unset(UP(hash), PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
5064 }
5065
5066 pm_node_list_append(arena, &hash->elements, element);
5067 if (PM_NODE_LENGTH(hash) == 0) {
5068 PM_NODE_START_SET_NODE(hash, element);
5069 }
5070 PM_NODE_LENGTH_SET_NODE(hash, element);
5071}
5072
5076static pm_required_keyword_parameter_node_t *
5077pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
5078 return pm_required_keyword_parameter_node_new(
5079 parser->arena,
5080 ++parser->node_id,
5081 0,
5082 PM_LOCATION_INIT_TOKEN(parser, name),
5083 pm_parser_constant_id_raw(parser, name->start, name->end - 1),
5084 TOK2LOC(parser, name)
5085 );
5086}
5087
5091static pm_optional_keyword_parameter_node_t *
5092pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
5093 return pm_optional_keyword_parameter_node_new(
5094 parser->arena,
5095 ++parser->node_id,
5096 0,
5097 PM_LOCATION_INIT_TOKEN_NODE(parser, name, value),
5098 pm_parser_constant_id_raw(parser, name->start, name->end - 1),
5099 TOK2LOC(parser, name),
5100 value
5101 );
5102}
5103
5107static pm_keyword_rest_parameter_node_t *
5108pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
5109 return pm_keyword_rest_parameter_node_new(
5110 parser->arena,
5111 ++parser->node_id,
5112 0,
5113 (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
5114 name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
5115 NTOK2LOC(parser, name),
5116 TOK2LOC(parser, operator)
5117 );
5118}
5119
5123static pm_lambda_node_t *
5124pm_lambda_node_create(
5125 pm_parser_t *parser,
5126 pm_constant_id_list_t *locals,
5127 const pm_token_t *operator,
5128 const pm_token_t *opening,
5129 const pm_token_t *closing,
5130 pm_node_t *parameters,
5131 pm_node_t *body
5132) {
5133 return pm_lambda_node_new(
5134 parser->arena,
5135 ++parser->node_id,
5136 0,
5137 PM_LOCATION_INIT_TOKENS(parser, operator, closing),
5138 *locals,
5139 TOK2LOC(parser, operator),
5140 TOK2LOC(parser, opening),
5141 TOK2LOC(parser, closing),
5142 parameters,
5143 body
5144 );
5145}
5146
5150static pm_local_variable_and_write_node_t *
5151pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5152 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5153 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5154
5155 return pm_local_variable_and_write_node_new(
5156 parser->arena,
5157 ++parser->node_id,
5158 0,
5159 PM_LOCATION_INIT_NODES(target, value),
5160 target->location,
5161 TOK2LOC(parser, operator),
5162 value,
5163 name,
5164 depth
5165 );
5166}
5167
5171static pm_local_variable_operator_write_node_t *
5172pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5173 return pm_local_variable_operator_write_node_new(
5174 parser->arena,
5175 ++parser->node_id,
5176 0,
5177 PM_LOCATION_INIT_NODES(target, value),
5178 target->location,
5179 TOK2LOC(parser, operator),
5180 value,
5181 name,
5182 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
5183 depth
5184 );
5185}
5186
5190static pm_local_variable_or_write_node_t *
5191pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5192 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5193 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5194
5195 return pm_local_variable_or_write_node_new(
5196 parser->arena,
5197 ++parser->node_id,
5198 0,
5199 PM_LOCATION_INIT_NODES(target, value),
5200 target->location,
5201 TOK2LOC(parser, operator),
5202 value,
5203 name,
5204 depth
5205 );
5206}
5207
5211static pm_local_variable_read_node_t *
5212pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
5213 if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
5214
5215 return pm_local_variable_read_node_new(
5216 parser->arena,
5217 ++parser->node_id,
5218 0,
5219 PM_LOCATION_INIT_TOKEN(parser, name),
5220 name_id,
5221 depth
5222 );
5223}
5224
5228static pm_local_variable_read_node_t *
5229pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5230 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5231 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, false);
5232}
5233
5238static pm_local_variable_read_node_t *
5239pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5240 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5241 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, true);
5242}
5243
5247static pm_local_variable_write_node_t *
5248pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
5249 return pm_local_variable_write_node_new(
5250 parser->arena,
5251 ++parser->node_id,
5252 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5253 ((pm_location_t) { .start = name_loc->start, .length = PM_NODE_END(value) - name_loc->start }),
5254 name,
5255 depth,
5256 *name_loc,
5257 value,
5258 TOK2LOC(parser, operator)
5259 );
5260}
5261
5265static inline bool
5266pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5267 return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5268}
5269
5274static inline bool
5275pm_token_is_numbered_parameter(const pm_parser_t *parser, uint32_t start, uint32_t length) {
5276 return (
5277 (length == 2) &&
5278 (parser->start[start] == '_') &&
5279 (parser->start[start + 1] != '0') &&
5280 pm_char_is_decimal_digit(parser->start[start + 1])
5281 );
5282}
5283
5288static inline void
5289pm_refute_numbered_parameter(pm_parser_t *parser, uint32_t start, uint32_t length) {
5290 if (pm_token_is_numbered_parameter(parser, start, length)) {
5291 PM_PARSER_ERR_FORMAT(parser, start, length, PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + start);
5292 }
5293}
5294
5299static pm_local_variable_target_node_t *
5300pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
5301 pm_refute_numbered_parameter(parser, location->start, location->length);
5302
5303 return pm_local_variable_target_node_new(
5304 parser->arena,
5305 ++parser->node_id,
5306 0,
5307 ((pm_location_t) { .start = location->start, .length = location->length }),
5308 name,
5309 depth
5310 );
5311}
5312
5316static pm_match_predicate_node_t *
5317pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5318 pm_assert_value_expression(parser, value);
5319
5320 return pm_match_predicate_node_new(
5321 parser->arena,
5322 ++parser->node_id,
5323 0,
5324 PM_LOCATION_INIT_NODES(value, pattern),
5325 value,
5326 pattern,
5327 TOK2LOC(parser, operator)
5328 );
5329}
5330
5334static pm_match_required_node_t *
5335pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5336 pm_assert_value_expression(parser, value);
5337
5338 return pm_match_required_node_new(
5339 parser->arena,
5340 ++parser->node_id,
5341 0,
5342 PM_LOCATION_INIT_NODES(value, pattern),
5343 value,
5344 pattern,
5345 TOK2LOC(parser, operator)
5346 );
5347}
5348
5352static pm_match_write_node_t *
5353pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
5354 return pm_match_write_node_new(
5355 parser->arena,
5356 ++parser->node_id,
5357 0,
5358 PM_LOCATION_INIT_NODE(call),
5359 call,
5360 ((pm_node_list_t) { 0 })
5361 );
5362}
5363
5367static pm_module_node_t *
5368pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
5369 return pm_module_node_new(
5370 parser->arena,
5371 ++parser->node_id,
5372 0,
5373 PM_LOCATION_INIT_TOKENS(parser, module_keyword, end_keyword),
5374 (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
5375 TOK2LOC(parser, module_keyword),
5376 constant_path,
5377 body,
5378 TOK2LOC(parser, end_keyword),
5379 pm_parser_constant_id_token(parser, name)
5380 );
5381}
5382
5386static pm_multi_target_node_t *
5387pm_multi_target_node_create(pm_parser_t *parser) {
5388 return pm_multi_target_node_new(
5389 parser->arena,
5390 ++parser->node_id,
5391 0,
5392 PM_LOCATION_INIT_UNSET,
5393 ((pm_node_list_t) { 0 }),
5394 NULL,
5395 ((pm_node_list_t) { 0 }),
5396 ((pm_location_t) { 0 }),
5397 ((pm_location_t) { 0 })
5398 );
5399}
5400
5404static void
5405pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
5406 if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
5407 if (node->rest == NULL) {
5408 node->rest = target;
5409 } else {
5410 pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
5411 pm_node_list_append(parser->arena, &node->rights, target);
5412 }
5413 } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
5414 if (node->rest == NULL) {
5415 node->rest = target;
5416 } else {
5417 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
5418 pm_node_list_append(parser->arena, &node->rights, target);
5419 }
5420 } else if (node->rest == NULL) {
5421 pm_node_list_append(parser->arena, &node->lefts, target);
5422 } else {
5423 pm_node_list_append(parser->arena, &node->rights, target);
5424 }
5425
5426 if (PM_NODE_LENGTH(node) == 0 || (PM_NODE_START(node) > PM_NODE_START(target))) {
5427 PM_NODE_START_SET_NODE(node, target);
5428 }
5429
5430 if (PM_NODE_LENGTH(node) == 0 || (PM_NODE_END(node) < PM_NODE_END(target))) {
5431 PM_NODE_LENGTH_SET_NODE(node, target);
5432 }
5433}
5434
5438static void
5439pm_multi_target_node_opening_set(const pm_parser_t *parser, pm_multi_target_node_t *node, const pm_token_t *lparen) {
5440 PM_NODE_START_SET_TOKEN(parser, node, lparen);
5441 PM_NODE_LENGTH_SET_TOKEN(parser, node, lparen);
5442 node->lparen_loc = TOK2LOC(parser, lparen);
5443}
5444
5448static void
5449pm_multi_target_node_closing_set(const pm_parser_t *parser, pm_multi_target_node_t *node, const pm_token_t *rparen) {
5450 PM_NODE_LENGTH_SET_TOKEN(parser, node, rparen);
5451 node->rparen_loc = TOK2LOC(parser, rparen);
5452}
5453
5457static pm_multi_write_node_t *
5458pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5459 /* The target is no longer necessary because we have reused its children. It
5460 * is arena-allocated so no explicit free is needed. */
5461 return pm_multi_write_node_new(
5462 parser->arena,
5463 ++parser->node_id,
5464 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5465 PM_LOCATION_INIT_NODES(target, value),
5466 target->lefts,
5467 target->rest,
5468 target->rights,
5469 target->lparen_loc,
5470 target->rparen_loc,
5471 TOK2LOC(parser, operator),
5472 value
5473 );
5474}
5475
5479static pm_next_node_t *
5480pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
5481 assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
5482
5483 return pm_next_node_new(
5484 parser->arena,
5485 ++parser->node_id,
5486 0,
5487 (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
5488 arguments,
5489 TOK2LOC(parser, keyword)
5490 );
5491}
5492
5496static pm_nil_node_t *
5497pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
5498 assert(token->type == PM_TOKEN_KEYWORD_NIL);
5499
5500 return pm_nil_node_new(
5501 parser->arena,
5502 ++parser->node_id,
5503 PM_NODE_FLAG_STATIC_LITERAL,
5504 PM_LOCATION_INIT_TOKEN(parser, token)
5505 );
5506}
5507
5511static pm_no_block_parameter_node_t *
5512pm_no_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
5513 assert(operator->type == PM_TOKEN_AMPERSAND || operator->type == PM_TOKEN_UAMPERSAND);
5514 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
5515
5516 return pm_no_block_parameter_node_new(
5517 parser->arena,
5518 ++parser->node_id,
5519 0,
5520 PM_LOCATION_INIT_TOKENS(parser, operator, keyword),
5521 TOK2LOC(parser, operator),
5522 TOK2LOC(parser, keyword)
5523 );
5524}
5525
5529static pm_no_keywords_parameter_node_t *
5530pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
5531 assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
5532 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
5533
5534 return pm_no_keywords_parameter_node_new(
5535 parser->arena,
5536 ++parser->node_id,
5537 0,
5538 PM_LOCATION_INIT_TOKENS(parser, operator, keyword),
5539 TOK2LOC(parser, operator),
5540 TOK2LOC(parser, keyword)
5541 );
5542}
5543
5547static pm_numbered_parameters_node_t *
5548pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing, uint8_t maximum) {
5549 return pm_numbered_parameters_node_new(
5550 parser->arena,
5551 ++parser->node_id,
5552 0,
5553 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
5554 maximum
5555 );
5556}
5557
5562#define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1))
5563
5570static uint32_t
5571pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) {
5572 const uint8_t *start = token->start + 1;
5573 const uint8_t *end = token->end;
5574
5575 ptrdiff_t diff = end - start;
5576 assert(diff > 0);
5577#if PTRDIFF_MAX > SIZE_MAX
5578 assert(diff < (ptrdiff_t) SIZE_MAX);
5579#endif
5580 size_t length = (size_t) diff;
5581
5582 char *digits = xcalloc(length + 1, sizeof(char));
5583 memcpy(digits, start, length);
5584 digits[length] = '\0';
5585
5586 char *endptr;
5587 errno = 0;
5588 unsigned long value = strtoul(digits, &endptr, 10);
5589
5590 if ((digits == endptr) || (*endptr != '\0')) {
5591 pm_parser_err(parser, U32(start - parser->start), U32(length), PM_ERR_INVALID_NUMBER_DECIMAL);
5592 value = 0;
5593 }
5594
5595 xfree_sized(digits, sizeof(char) * (length + 1));
5596
5597 if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
5598 PM_PARSER_WARN_FORMAT(parser, U32(start - parser->start), U32(length), PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
5599 value = 0;
5600 }
5601
5602 return (uint32_t) value;
5603}
5604
5605#undef NTH_REF_MAX
5606
5610static pm_numbered_reference_read_node_t *
5611pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5612 assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
5613
5614 return pm_numbered_reference_read_node_new(
5615 parser->arena,
5616 ++parser->node_id,
5617 0,
5618 PM_LOCATION_INIT_TOKEN(parser, name),
5619 pm_numbered_reference_read_node_number(parser, name)
5620 );
5621}
5622
5626static pm_optional_parameter_node_t *
5627pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
5628 return pm_optional_parameter_node_new(
5629 parser->arena,
5630 ++parser->node_id,
5631 0,
5632 PM_LOCATION_INIT_TOKEN_NODE(parser, name, value),
5633 pm_parser_constant_id_token(parser, name),
5634 TOK2LOC(parser, name),
5635 TOK2LOC(parser, operator),
5636 value
5637 );
5638}
5639
5643static pm_or_node_t *
5644pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
5645 pm_assert_value_expression(parser, left);
5646
5647 return pm_or_node_new(
5648 parser->arena,
5649 ++parser->node_id,
5650 0,
5651 PM_LOCATION_INIT_NODES(left, right),
5652 left,
5653 right,
5654 TOK2LOC(parser, operator)
5655 );
5656}
5657
5661static pm_parameters_node_t *
5662pm_parameters_node_create(pm_parser_t *parser) {
5663 return pm_parameters_node_new(
5664 parser->arena,
5665 ++parser->node_id,
5666 0,
5667 PM_LOCATION_INIT_UNSET,
5668 ((pm_node_list_t) { 0 }),
5669 ((pm_node_list_t) { 0 }),
5670 NULL,
5671 ((pm_node_list_t) { 0 }),
5672 ((pm_node_list_t) { 0 }),
5673 NULL,
5674 NULL
5675 );
5676}
5677
5681static void
5682pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
5683 if ((params->base.location.length == 0) || PM_NODE_START(params) > PM_NODE_START(param)) {
5684 PM_NODE_START_SET_NODE(params, param);
5685 }
5686
5687 if ((params->base.location.length == 0) || (PM_NODE_END(params) < PM_NODE_END(param))) {
5688 PM_NODE_LENGTH_SET_NODE(params, param);
5689 }
5690}
5691
5695static void
5696pm_parameters_node_requireds_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
5697 pm_parameters_node_location_set(params, param);
5698 pm_node_list_append(arena, &params->requireds, param);
5699}
5700
5704static void
5705pm_parameters_node_optionals_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
5706 pm_parameters_node_location_set(params, UP(param));
5707 pm_node_list_append(arena, &params->optionals, UP(param));
5708}
5709
5713static void
5714pm_parameters_node_posts_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
5715 pm_parameters_node_location_set(params, param);
5716 pm_node_list_append(arena, &params->posts, param);
5717}
5718
5722static void
5723pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
5724 pm_parameters_node_location_set(params, param);
5725 params->rest = param;
5726}
5727
5731static void
5732pm_parameters_node_keywords_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
5733 pm_parameters_node_location_set(params, param);
5734 pm_node_list_append(arena, &params->keywords, param);
5735}
5736
5740static void
5741pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
5742 assert(params->keyword_rest == NULL);
5743 pm_parameters_node_location_set(params, param);
5744 params->keyword_rest = param;
5745}
5746
5750static void
5751pm_parameters_node_block_set(pm_parameters_node_t *params, pm_node_t *param) {
5752 assert(params->block == NULL);
5753 pm_parameters_node_location_set(params, param);
5754 params->block = param;
5755}
5756
5760static pm_program_node_t *
5761pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
5762 return pm_program_node_new(
5763 parser->arena,
5764 ++parser->node_id,
5765 0,
5766 PM_LOCATION_INIT_NODE(statements),
5767 *locals,
5768 statements
5769 );
5770}
5771
5775static pm_parentheses_node_t *
5776pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing, pm_node_flags_t flags) {
5777 return pm_parentheses_node_new(
5778 parser->arena,
5779 ++parser->node_id,
5780 flags,
5781 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
5782 body,
5783 TOK2LOC(parser, opening),
5784 TOK2LOC(parser, closing)
5785 );
5786}
5787
5791static pm_pinned_expression_node_t *
5792pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
5793 return pm_pinned_expression_node_new(
5794 parser->arena,
5795 ++parser->node_id,
5796 0,
5797 PM_LOCATION_INIT_TOKENS(parser, operator, rparen),
5798 expression,
5799 TOK2LOC(parser, operator),
5800 TOK2LOC(parser, lparen),
5801 TOK2LOC(parser, rparen)
5802 );
5803}
5804
5808static pm_pinned_variable_node_t *
5809pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
5810 return pm_pinned_variable_node_new(
5811 parser->arena,
5812 ++parser->node_id,
5813 0,
5814 PM_LOCATION_INIT_TOKEN_NODE(parser, operator, variable),
5815 variable,
5816 TOK2LOC(parser, operator)
5817 );
5818}
5819
5823static pm_post_execution_node_t *
5824pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
5825 return pm_post_execution_node_new(
5826 parser->arena,
5827 ++parser->node_id,
5828 0,
5829 PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
5830 statements,
5831 TOK2LOC(parser, keyword),
5832 TOK2LOC(parser, opening),
5833 TOK2LOC(parser, closing)
5834 );
5835}
5836
5840static pm_pre_execution_node_t *
5841pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
5842 return pm_pre_execution_node_new(
5843 parser->arena,
5844 ++parser->node_id,
5845 0,
5846 PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
5847 statements,
5848 TOK2LOC(parser, keyword),
5849 TOK2LOC(parser, opening),
5850 TOK2LOC(parser, closing)
5851 );
5852}
5853
5857static pm_range_node_t *
5858pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
5859 pm_assert_value_expression(parser, left);
5860 pm_assert_value_expression(parser, right);
5861 pm_node_flags_t flags = 0;
5862
5863 // Indicate that this node is an exclusive range if the operator is `...`.
5864 if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
5865 flags |= PM_RANGE_FLAGS_EXCLUDE_END;
5866 }
5867
5868 // Indicate that this node is a static literal (i.e., can be compiled with
5869 // a putobject in CRuby) if the left and right are implicit nil, explicit
5870 // nil, or integers.
5871 if (
5872 (left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
5873 (right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
5874 ) {
5875 flags |= PM_NODE_FLAG_STATIC_LITERAL;
5876 }
5877
5878 uint32_t start = left == NULL ? PM_TOKEN_START(parser, operator) : PM_NODE_START(left);
5879 uint32_t end = right == NULL ? PM_TOKEN_END(parser, operator) : PM_NODE_END(right);
5880
5881 return pm_range_node_new(
5882 parser->arena,
5883 ++parser->node_id,
5884 flags,
5885 ((pm_location_t) { .start = start, .length = U32(end - start) }),
5886 left,
5887 right,
5888 TOK2LOC(parser, operator)
5889 );
5890}
5891
5895static pm_redo_node_t *
5896pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
5897 assert(token->type == PM_TOKEN_KEYWORD_REDO);
5898
5899 return pm_redo_node_new(
5900 parser->arena,
5901 ++parser->node_id,
5902 0,
5903 PM_LOCATION_INIT_TOKEN(parser, token)
5904 );
5905}
5906
5911static pm_regular_expression_node_t *
5912pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
5913 return pm_regular_expression_node_new(
5914 parser->arena,
5915 ++parser->node_id,
5916 pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL,
5917 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
5918 TOK2LOC(parser, opening),
5919 TOK2LOC(parser, content),
5920 TOK2LOC(parser, closing),
5921 *unescaped
5922 );
5923}
5924
5928static inline pm_regular_expression_node_t *
5929pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
5930 return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
5931}
5932
5936static pm_required_parameter_node_t *
5937pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
5938 return pm_required_parameter_node_new(
5939 parser->arena,
5940 ++parser->node_id,
5941 0,
5942 PM_LOCATION_INIT_TOKEN(parser, token),
5943 pm_parser_constant_id_token(parser, token)
5944 );
5945}
5946
5950static pm_rescue_modifier_node_t *
5951pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
5952 return pm_rescue_modifier_node_new(
5953 parser->arena,
5954 ++parser->node_id,
5955 0,
5956 PM_LOCATION_INIT_NODES(expression, rescue_expression),
5957 expression,
5958 TOK2LOC(parser, keyword),
5959 rescue_expression
5960 );
5961}
5962
5966static pm_rescue_node_t *
5967pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
5968 return pm_rescue_node_new(
5969 parser->arena,
5970 ++parser->node_id,
5971 0,
5972 PM_LOCATION_INIT_TOKEN(parser, keyword),
5973 TOK2LOC(parser, keyword),
5974 ((pm_node_list_t) { 0 }),
5975 ((pm_location_t) { 0 }),
5976 NULL,
5977 ((pm_location_t) { 0 }),
5978 NULL,
5979 NULL
5980 );
5981}
5982
5983static inline void
5984pm_rescue_node_operator_set(const pm_parser_t *parser, pm_rescue_node_t *node, const pm_token_t *operator) {
5985 node->operator_loc = TOK2LOC(parser, operator);
5986}
5987
5991static void
5992pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
5993 node->reference = reference;
5994 PM_NODE_LENGTH_SET_NODE(node, reference);
5995}
5996
6000static void
6001pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
6002 node->statements = statements;
6003 if (pm_statements_node_body_length(statements) > 0) {
6004 PM_NODE_LENGTH_SET_NODE(node, statements);
6005 }
6006}
6007
6011static void
6012pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
6013 node->subsequent = subsequent;
6014 PM_NODE_LENGTH_SET_NODE(node, subsequent);
6015}
6016
6020static void
6021pm_rescue_node_exceptions_append(pm_arena_t *arena, pm_rescue_node_t *node, pm_node_t *exception) {
6022 pm_node_list_append(arena, &node->exceptions, exception);
6023 PM_NODE_LENGTH_SET_NODE(node, exception);
6024}
6025
6029static pm_rest_parameter_node_t *
6030pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
6031 return pm_rest_parameter_node_new(
6032 parser->arena,
6033 ++parser->node_id,
6034 0,
6035 (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
6036 name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
6037 NTOK2LOC(parser, name),
6038 TOK2LOC(parser, operator)
6039 );
6040}
6041
6045static pm_retry_node_t *
6046pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
6047 assert(token->type == PM_TOKEN_KEYWORD_RETRY);
6048
6049 return pm_retry_node_new(
6050 parser->arena,
6051 ++parser->node_id,
6052 0,
6053 PM_LOCATION_INIT_TOKEN(parser, token)
6054 );
6055}
6056
6060static pm_return_node_t *
6061pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6062 return pm_return_node_new(
6063 parser->arena,
6064 ++parser->node_id,
6065 0,
6066 (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
6067 TOK2LOC(parser, keyword),
6068 arguments
6069 );
6070}
6071
6075static pm_self_node_t *
6076pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
6077 assert(token->type == PM_TOKEN_KEYWORD_SELF);
6078
6079 return pm_self_node_new(
6080 parser->arena,
6081 ++parser->node_id,
6082 0,
6083 PM_LOCATION_INIT_TOKEN(parser, token)
6084 );
6085}
6086
6090static pm_shareable_constant_node_t *
6091pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
6092 return pm_shareable_constant_node_new(
6093 parser->arena,
6094 ++parser->node_id,
6095 (pm_node_flags_t) value,
6096 PM_LOCATION_INIT_NODE(write),
6097 write
6098 );
6099}
6100
6104static pm_singleton_class_node_t *
6105pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
6106 return pm_singleton_class_node_new(
6107 parser->arena,
6108 ++parser->node_id,
6109 0,
6110 PM_LOCATION_INIT_TOKENS(parser, class_keyword, end_keyword),
6111 *locals,
6112 TOK2LOC(parser, class_keyword),
6113 TOK2LOC(parser, operator),
6114 expression,
6115 body,
6116 TOK2LOC(parser, end_keyword)
6117 );
6118}
6119
6123static pm_source_encoding_node_t *
6124pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
6125 assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
6126
6127 return pm_source_encoding_node_new(
6128 parser->arena,
6129 ++parser->node_id,
6130 PM_NODE_FLAG_STATIC_LITERAL,
6131 PM_LOCATION_INIT_TOKEN(parser, token)
6132 );
6133}
6134
6138static pm_source_file_node_t*
6139pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
6140 assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
6141
6142 pm_node_flags_t flags = 0;
6143
6144 switch (parser->frozen_string_literal) {
6145 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6146 flags |= PM_STRING_FLAGS_MUTABLE;
6147 break;
6148 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6149 flags |= PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6150 break;
6151 }
6152
6153 return pm_source_file_node_new(
6154 parser->arena,
6155 ++parser->node_id,
6156 flags,
6157 PM_LOCATION_INIT_TOKEN(parser, file_keyword),
6158 parser->filepath
6159 );
6160}
6161
6165static pm_source_line_node_t *
6166pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
6167 assert(token->type == PM_TOKEN_KEYWORD___LINE__);
6168
6169 return pm_source_line_node_new(
6170 parser->arena,
6171 ++parser->node_id,
6172 PM_NODE_FLAG_STATIC_LITERAL,
6173 PM_LOCATION_INIT_TOKEN(parser, token)
6174 );
6175}
6176
6180static pm_splat_node_t *
6181pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
6182 return pm_splat_node_new(
6183 parser->arena,
6184 ++parser->node_id,
6185 0,
6186 (expression == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, expression),
6187 TOK2LOC(parser, operator),
6188 expression
6189 );
6190}
6191
6195static pm_statements_node_t *
6196pm_statements_node_create(pm_parser_t *parser) {
6197 return pm_statements_node_new(
6198 parser->arena,
6199 ++parser->node_id,
6200 0,
6201 PM_LOCATION_INIT_UNSET,
6202 ((pm_node_list_t) { 0 })
6203 );
6204}
6205
6209static size_t
6210pm_statements_node_body_length(pm_statements_node_t *node) {
6211 return node && node->body.size;
6212}
6213
6218static inline void
6219pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
6220 if (pm_statements_node_body_length(node) == 0 || PM_NODE_START(statement) < PM_NODE_START(node)) {
6221 PM_NODE_START_SET_NODE(node, statement);
6222 }
6223
6224 if (PM_NODE_END(statement) > PM_NODE_END(node)) {
6225 PM_NODE_LENGTH_SET_NODE(node, statement);
6226 }
6227}
6228
6232static void
6233pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline) {
6234 pm_statements_node_body_update(node, statement);
6235
6236 if (node->body.size > 0) {
6237 const pm_node_t *previous = node->body.nodes[node->body.size - 1];
6238
6239 switch (PM_NODE_TYPE(previous)) {
6240 case PM_BREAK_NODE:
6241 case PM_NEXT_NODE:
6242 case PM_REDO_NODE:
6243 case PM_RETRY_NODE:
6244 case PM_RETURN_NODE:
6245 pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
6246 break;
6247 default:
6248 break;
6249 }
6250 }
6251
6252 pm_node_list_append(parser->arena, &node->body, statement);
6253 if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
6254}
6255
6259static void
6260pm_statements_node_body_prepend(pm_arena_t *arena, pm_statements_node_t *node, pm_node_t *statement) {
6261 pm_statements_node_body_update(node, statement);
6262 pm_node_list_prepend(arena, &node->body, statement);
6263 pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
6264}
6265
6269static inline pm_string_node_t *
6270pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
6271 pm_node_flags_t flags = 0;
6272
6273 switch (parser->frozen_string_literal) {
6274 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6275 flags = PM_STRING_FLAGS_MUTABLE;
6276 break;
6277 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6278 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6279 break;
6280 }
6281
6282 uint32_t start = PM_TOKEN_START(parser, opening == NULL ? content : opening);
6283 uint32_t end = PM_TOKEN_END(parser, closing == NULL ? content : closing);
6284
6285 return pm_string_node_new(
6286 parser->arena,
6287 ++parser->node_id,
6288 flags,
6289 ((pm_location_t) { .start = start, .length = U32(end - start) }),
6290 NTOK2LOC(parser, opening),
6291 TOK2LOC(parser, content),
6292 NTOK2LOC(parser, closing),
6293 *string
6294 );
6295}
6296
6300static pm_string_node_t *
6301pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6302 return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6303}
6304
6309static pm_string_node_t *
6310pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6311 pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
6312 parser->current_string = PM_STRING_EMPTY;
6313 return node;
6314}
6315
6319static pm_super_node_t *
6320pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
6321 assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
6322
6323 const pm_location_t *end = pm_arguments_end(arguments);
6324 assert(end != NULL && "unreachable");
6325
6326 return pm_super_node_new(
6327 parser->arena,
6328 ++parser->node_id,
6329 0,
6330 ((pm_location_t) { .start = PM_TOKEN_START(parser, keyword), .length = PM_LOCATION_END(end) - PM_TOKEN_START(parser, keyword) }),
6331 TOK2LOC(parser, keyword),
6332 arguments->opening_loc,
6333 arguments->arguments,
6334 arguments->closing_loc,
6335 arguments->block
6336 );
6337}
6338
6343static bool
6344pm_ascii_only_p(const pm_string_t *contents) {
6345 const size_t length = pm_string_length(contents);
6346 const uint8_t *source = pm_string_source(contents);
6347
6348 for (size_t index = 0; index < length; index++) {
6349 if (source[index] & 0x80) return false;
6350 }
6351
6352 return true;
6353}
6354
6358static void
6359parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
6360 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
6361 size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
6362
6363 if (width == 0) {
6364 pm_parser_err(parser, PM_TOKEN_START(parser, location), PM_TOKEN_LENGTH(location), PM_ERR_INVALID_SYMBOL);
6365 break;
6366 }
6367
6368 cursor += width;
6369 }
6370}
6371
6376static void
6377parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
6378 const pm_encoding_t *encoding = parser->encoding;
6379
6380 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
6381 size_t width = encoding->char_width(cursor, end - cursor);
6382
6383 if (width == 0) {
6384 pm_parser_err(parser, PM_TOKEN_START(parser, location), PM_TOKEN_LENGTH(location), PM_ERR_INVALID_SYMBOL);
6385 break;
6386 }
6387
6388 cursor += width;
6389 }
6390}
6391
6401static inline pm_node_flags_t
6402parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
6403 if (parser->explicit_encoding != NULL) {
6404 // A Symbol may optionally have its encoding explicitly set. This will
6405 // happen if an escape sequence results in a non-ASCII code point.
6406 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
6407 if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
6408 return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
6409 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6410 return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
6411 } else if (validate) {
6412 parse_symbol_encoding_validate_other(parser, location, contents);
6413 }
6414 } else if (pm_ascii_only_p(contents)) {
6415 // Ruby stipulates that all source files must use an ASCII-compatible
6416 // encoding. Thus, all symbols appearing in source are eligible for
6417 // "downgrading" to US-ASCII.
6418 return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
6419 } else if (validate) {
6420 parse_symbol_encoding_validate_other(parser, location, contents);
6421 }
6422
6423 return 0;
6424}
6425
6430static pm_symbol_node_t *
6431pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
6432 uint32_t start = opening == NULL ? PM_TOKEN_START(parser, value) : PM_TOKEN_START(parser, opening);
6433 uint32_t end = closing == NULL ? PM_TOKEN_END(parser, value) : PM_TOKEN_END(parser, closing);
6434
6435 return pm_symbol_node_new(
6436 parser->arena,
6437 ++parser->node_id,
6438 PM_NODE_FLAG_STATIC_LITERAL | flags,
6439 ((pm_location_t) { .start = start, .length = U32(end - start) }),
6440 NTOK2LOC(parser, opening),
6441 NTOK2LOC(parser, value),
6442 NTOK2LOC(parser, closing),
6443 *unescaped
6444 );
6445}
6446
6450static inline pm_symbol_node_t *
6451pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
6452 return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
6453}
6454
6458static pm_symbol_node_t *
6459pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
6460 pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
6461 parser->current_string = PM_STRING_EMPTY;
6462 return node;
6463}
6464
6468static pm_symbol_node_t *
6469pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
6470 assert(token->type == PM_TOKEN_LABEL);
6471
6472 pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
6473 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
6474 pm_symbol_node_t *node = pm_symbol_node_create(parser, NULL, &label, &closing);
6475
6476 assert((label.end - label.start) >= 0);
6477 pm_string_shared_init(&node->unescaped, label.start, label.end);
6478 pm_node_flag_set(UP(node), parse_symbol_encoding(parser, &label, &node->unescaped, false));
6479
6480 return node;
6481}
6482
6486static pm_symbol_node_t *
6487pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
6488 pm_symbol_node_t *node = pm_symbol_node_new(
6489 parser->arena,
6490 ++parser->node_id,
6491 PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
6492 PM_LOCATION_INIT_UNSET,
6493 ((pm_location_t) { 0 }),
6494 ((pm_location_t) { 0 }),
6495 ((pm_location_t) { 0 }),
6496 ((pm_string_t) { 0 })
6497 );
6498
6499 pm_string_constant_init(&node->unescaped, content, strlen(content));
6500 return node;
6501}
6502
6506static bool
6507pm_symbol_node_label_p(const pm_parser_t *parser, const pm_node_t *node) {
6508 const pm_location_t *location = NULL;
6509
6510 switch (PM_NODE_TYPE(node)) {
6511 case PM_SYMBOL_NODE: {
6512 const pm_symbol_node_t *cast = (pm_symbol_node_t *) node;
6513 if (cast->closing_loc.length > 0) {
6514 location = &cast->closing_loc;
6515 }
6516 break;
6517 }
6518 case PM_INTERPOLATED_SYMBOL_NODE: {
6519 const pm_interpolated_symbol_node_t *cast = (pm_interpolated_symbol_node_t *) node;
6520 if (cast->closing_loc.length > 0) {
6521 location = &cast->closing_loc;
6522 }
6523 break;
6524 }
6525 default:
6526 return false;
6527 }
6528
6529 return (location != NULL) && (parser->start[PM_LOCATION_END(location) - 1] == ':');
6530}
6531
6535static pm_symbol_node_t *
6536pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
6537 pm_symbol_node_t *new_node = pm_symbol_node_new(
6538 parser->arena,
6539 ++parser->node_id,
6540 PM_NODE_FLAG_STATIC_LITERAL,
6541 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
6542 TOK2LOC(parser, opening),
6543 node->content_loc,
6544 TOK2LOC(parser, closing),
6545 node->unescaped
6546 );
6547
6548 pm_token_t content = {
6549 .type = PM_TOKEN_IDENTIFIER,
6550 .start = parser->start + node->content_loc.start,
6551 .end = parser->start + node->content_loc.start + node->content_loc.length
6552 };
6553
6554 pm_node_flag_set(UP(new_node), parse_symbol_encoding(parser, &content, &node->unescaped, true));
6555
6556 /* The old node is arena-allocated so no explicit free is needed. */
6557 return new_node;
6558}
6559
6563static pm_string_node_t *
6564pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
6565 pm_node_flags_t flags = 0;
6566
6567 switch (parser->frozen_string_literal) {
6568 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6569 flags = PM_STRING_FLAGS_MUTABLE;
6570 break;
6571 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6572 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6573 break;
6574 }
6575
6576 pm_string_node_t *new_node = pm_string_node_new(
6577 parser->arena,
6578 ++parser->node_id,
6579 flags,
6580 PM_LOCATION_INIT_NODE(node),
6581 node->opening_loc,
6582 node->value_loc,
6583 node->closing_loc,
6584 node->unescaped
6585 );
6586
6587 /* The old node is arena-allocated so no explicit free is needed. */
6588 return new_node;
6589}
6590
6594static pm_true_node_t *
6595pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
6596 assert(token->type == PM_TOKEN_KEYWORD_TRUE);
6597
6598 return pm_true_node_new(
6599 parser->arena,
6600 ++parser->node_id,
6601 PM_NODE_FLAG_STATIC_LITERAL,
6602 PM_LOCATION_INIT_TOKEN(parser, token)
6603 );
6604}
6605
6609static pm_true_node_t *
6610pm_true_node_synthesized_create(pm_parser_t *parser) {
6611 return pm_true_node_new(
6612 parser->arena,
6613 ++parser->node_id,
6614 PM_NODE_FLAG_STATIC_LITERAL,
6615 PM_LOCATION_INIT_UNSET
6616 );
6617}
6618
6622static pm_undef_node_t *
6623pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
6624 assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
6625
6626 return pm_undef_node_new(
6627 parser->arena,
6628 ++parser->node_id,
6629 0,
6630 PM_LOCATION_INIT_TOKEN(parser, token),
6631 ((pm_node_list_t) { 0 }),
6632 TOK2LOC(parser, token)
6633 );
6634}
6635
6639static void
6640pm_undef_node_append(pm_arena_t *arena, pm_undef_node_t *node, pm_node_t *name) {
6641 PM_NODE_LENGTH_SET_NODE(node, name);
6642 pm_node_list_append(arena, &node->names, name);
6643}
6644
6648static pm_unless_node_t *
6649pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
6650 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6651 pm_node_t *end = statements == NULL ? predicate : UP(statements);
6652
6653 return pm_unless_node_new(
6654 parser->arena,
6655 ++parser->node_id,
6656 PM_NODE_FLAG_NEWLINE,
6657 PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, end),
6658 TOK2LOC(parser, keyword),
6659 predicate,
6660 NTOK2LOC(parser, then_keyword),
6661 statements,
6662 NULL,
6663 ((pm_location_t) { 0 })
6664 );
6665}
6666
6670static pm_unless_node_t *
6671pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
6672 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6673
6674 pm_statements_node_t *statements = pm_statements_node_create(parser);
6675 pm_statements_node_body_append(parser, statements, statement, true);
6676
6677 return pm_unless_node_new(
6678 parser->arena,
6679 ++parser->node_id,
6680 PM_NODE_FLAG_NEWLINE,
6681 PM_LOCATION_INIT_NODES(statement, predicate),
6682 TOK2LOC(parser, unless_keyword),
6683 predicate,
6684 ((pm_location_t) { 0 }),
6685 statements,
6686 NULL,
6687 ((pm_location_t) { 0 })
6688 );
6689}
6690
6691static inline void
6692pm_unless_node_end_keyword_loc_set(const pm_parser_t *parser, pm_unless_node_t *node, const pm_token_t *end_keyword) {
6693 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
6694 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
6695}
6696
6702static void
6703pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statements) {
6704 assert(parser->current_block_exits != NULL);
6705
6706 // All of the block exits that we want to remove should be within the
6707 // statements, and since we are modifying the statements, we shouldn't have
6708 // to check the end location.
6709 uint32_t start = statements->base.location.start;
6710
6711 for (size_t index = parser->current_block_exits->size; index > 0; index--) {
6712 pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
6713 if (block_exit->location.start < start) break;
6714
6715 // Implicitly remove from the list by lowering the size.
6716 parser->current_block_exits->size--;
6717 }
6718}
6719
6723static pm_until_node_t *
6724pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6725 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6726
6727 return pm_until_node_new(
6728 parser->arena,
6729 ++parser->node_id,
6730 flags,
6731 PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
6732 TOK2LOC(parser, keyword),
6733 NTOK2LOC(parser, do_keyword),
6734 TOK2LOC(parser, closing),
6735 predicate,
6736 statements
6737 );
6738}
6739
6743static pm_until_node_t *
6744pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6745 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6746 pm_loop_modifier_block_exits(parser, statements);
6747
6748 return pm_until_node_new(
6749 parser->arena,
6750 ++parser->node_id,
6751 flags,
6752 PM_LOCATION_INIT_NODES(statements, predicate),
6753 TOK2LOC(parser, keyword),
6754 ((pm_location_t) { 0 }),
6755 ((pm_location_t) { 0 }),
6756 predicate,
6757 statements
6758 );
6759}
6760
6764static pm_when_node_t *
6765pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
6766 return pm_when_node_new(
6767 parser->arena,
6768 ++parser->node_id,
6769 0,
6770 PM_LOCATION_INIT_TOKEN(parser, keyword),
6771 TOK2LOC(parser, keyword),
6772 ((pm_node_list_t) { 0 }),
6773 ((pm_location_t) { 0 }),
6774 NULL
6775 );
6776}
6777
6781static void
6782pm_when_node_conditions_append(pm_arena_t *arena, pm_when_node_t *node, pm_node_t *condition) {
6783 PM_NODE_LENGTH_SET_NODE(node, condition);
6784 pm_node_list_append(arena, &node->conditions, condition);
6785}
6786
6790static inline void
6791pm_when_node_then_keyword_loc_set(const pm_parser_t *parser, pm_when_node_t *node, const pm_token_t *then_keyword) {
6792 PM_NODE_LENGTH_SET_TOKEN(parser, node, then_keyword);
6793 node->then_keyword_loc = TOK2LOC(parser, then_keyword);
6794}
6795
6799static void
6800pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
6801 if (PM_NODE_END(statements) > PM_NODE_END(node)) {
6802 PM_NODE_LENGTH_SET_NODE(node, statements);
6803 }
6804
6805 node->statements = statements;
6806}
6807
6811static pm_while_node_t *
6812pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6813 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6814
6815 return pm_while_node_new(
6816 parser->arena,
6817 ++parser->node_id,
6818 flags,
6819 PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
6820 TOK2LOC(parser, keyword),
6821 NTOK2LOC(parser, do_keyword),
6822 TOK2LOC(parser, closing),
6823 predicate,
6824 statements
6825 );
6826}
6827
6831static pm_while_node_t *
6832pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6833 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6834 pm_loop_modifier_block_exits(parser, statements);
6835
6836 return pm_while_node_new(
6837 parser->arena,
6838 ++parser->node_id,
6839 flags,
6840 PM_LOCATION_INIT_NODES(statements, predicate),
6841 TOK2LOC(parser, keyword),
6842 ((pm_location_t) { 0 }),
6843 ((pm_location_t) { 0 }),
6844 predicate,
6845 statements
6846 );
6847}
6848
6852static pm_while_node_t *
6853pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
6854 return pm_while_node_new(
6855 parser->arena,
6856 ++parser->node_id,
6857 0,
6858 PM_LOCATION_INIT_UNSET,
6859 ((pm_location_t) { 0 }),
6860 ((pm_location_t) { 0 }),
6861 ((pm_location_t) { 0 }),
6862 predicate,
6863 statements
6864 );
6865}
6866
6871static pm_x_string_node_t *
6872pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
6873 return pm_x_string_node_new(
6874 parser->arena,
6875 ++parser->node_id,
6876 PM_STRING_FLAGS_FROZEN,
6877 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
6878 TOK2LOC(parser, opening),
6879 TOK2LOC(parser, content),
6880 TOK2LOC(parser, closing),
6881 *unescaped
6882 );
6883}
6884
6888static inline pm_x_string_node_t *
6889pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6890 return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6891}
6892
6896static pm_yield_node_t *
6897pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
6898 uint32_t start = PM_TOKEN_START(parser, keyword);
6899 uint32_t end;
6900
6901 if (rparen_loc->length > 0) {
6902 end = PM_LOCATION_END(rparen_loc);
6903 } else if (arguments != NULL) {
6904 end = PM_NODE_END(arguments);
6905 } else if (lparen_loc->length > 0) {
6906 end = PM_LOCATION_END(lparen_loc);
6907 } else {
6908 end = PM_TOKEN_END(parser, keyword);
6909 }
6910
6911 return pm_yield_node_new(
6912 parser->arena,
6913 ++parser->node_id,
6914 0,
6915 ((pm_location_t) { .start = start, .length = U32(end - start) }),
6916 TOK2LOC(parser, keyword),
6917 *lparen_loc,
6918 arguments,
6919 *rparen_loc
6920 );
6921}
6922
6927static int
6928pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
6929 pm_scope_t *scope = parser->current_scope;
6930 int depth = 0;
6931
6932 while (scope != NULL) {
6933 if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
6934 if (scope->closed) break;
6935
6936 scope = scope->previous;
6937 depth++;
6938 }
6939
6940 return -1;
6941}
6942
6948static inline int
6949pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
6950 return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
6951}
6952
6956static inline void
6957pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
6958 pm_locals_write(&parser->current_scope->locals, constant_id, U32(start - parser->start), U32(end - start), reads);
6959}
6960
6964static pm_constant_id_t
6965pm_parser_local_add_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
6966 pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, start, end);
6967 if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
6968 return constant_id;
6969}
6970
6974static inline pm_constant_id_t
6975pm_parser_local_add_location(pm_parser_t *parser, pm_location_t *location, uint32_t reads) {
6976 return pm_parser_local_add_raw(parser, parser->start + location->start, parser->start + location->start + location->length, reads);
6977}
6978
6982static inline pm_constant_id_t
6983pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
6984 return pm_parser_local_add_raw(parser, token->start, token->end, reads);
6985}
6986
6990static pm_constant_id_t
6991pm_parser_local_add_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
6992 pm_constant_id_t constant_id = pm_parser_constant_id_owned(parser, start, length);
6993 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
6994 return constant_id;
6995}
6996
7000static pm_constant_id_t
7001pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
7002 pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
7003 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
7004 return constant_id;
7005}
7006
7014static bool
7015pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
7016 // We want to check whether the parameter name is a numbered parameter or
7017 // not.
7018 pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, name), PM_TOKEN_LENGTH(name));
7019
7020 // Otherwise we'll fetch the constant id for the parameter name and check
7021 // whether it's already in the current scope.
7022 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
7023
7024 if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
7025 // Add an error if the parameter doesn't start with _ and has been seen before
7026 if ((name->start < name->end) && (*name->start != '_')) {
7027 pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
7028 }
7029 return true;
7030 }
7031 return false;
7032}
7033
7037static void
7038pm_parser_scope_pop(pm_parser_t *parser) {
7039 pm_scope_t *scope = parser->current_scope;
7040 parser->current_scope = scope->previous;
7041 pm_locals_free(&scope->locals);
7042 xfree_sized(scope, sizeof(pm_scope_t));
7043}
7044
7045/******************************************************************************/
7046/* Stack helpers */
7047/******************************************************************************/
7048
7052static inline void
7053pm_state_stack_push(pm_state_stack_t *stack, bool value) {
7054 *stack = (*stack << 1) | (value & 1);
7055}
7056
7060static inline void
7061pm_state_stack_pop(pm_state_stack_t *stack) {
7062 *stack >>= 1;
7063}
7064
7068static inline bool
7069pm_state_stack_p(const pm_state_stack_t *stack) {
7070 return *stack & 1;
7071}
7072
7073static inline void
7074pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
7075 // Use the negation of the value to prevent stack overflow.
7076 pm_state_stack_push(&parser->accepts_block_stack, !value);
7077}
7078
7079static inline void
7080pm_accepts_block_stack_pop(pm_parser_t *parser) {
7081 pm_state_stack_pop(&parser->accepts_block_stack);
7082}
7083
7084static inline bool
7085pm_accepts_block_stack_p(pm_parser_t *parser) {
7086 return !pm_state_stack_p(&parser->accepts_block_stack);
7087}
7088
7089static inline void
7090pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
7091 pm_state_stack_push(&parser->do_loop_stack, value);
7092}
7093
7094static inline void
7095pm_do_loop_stack_pop(pm_parser_t *parser) {
7096 pm_state_stack_pop(&parser->do_loop_stack);
7097}
7098
7099static inline bool
7100pm_do_loop_stack_p(pm_parser_t *parser) {
7101 return pm_state_stack_p(&parser->do_loop_stack);
7102}
7103
7104/******************************************************************************/
7105/* Lexer check helpers */
7106/******************************************************************************/
7107
7112static inline uint8_t
7113peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
7114 if (cursor < parser->end) {
7115 return *cursor;
7116 } else {
7117 return '\0';
7118 }
7119}
7120
7126static inline uint8_t
7127peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
7128 return peek_at(parser, parser->current.end + offset);
7129}
7130
7135static inline uint8_t
7136peek(const pm_parser_t *parser) {
7137 return peek_at(parser, parser->current.end);
7138}
7139
7144static inline bool
7145match(pm_parser_t *parser, uint8_t value) {
7146 if (peek(parser) == value) {
7147 parser->current.end++;
7148 return true;
7149 }
7150 return false;
7151}
7152
7157static inline size_t
7158match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
7159 if (peek_at(parser, cursor) == '\n') {
7160 return 1;
7161 }
7162 if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
7163 return 2;
7164 }
7165 return 0;
7166}
7167
7173static inline size_t
7174match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
7175 return match_eol_at(parser, parser->current.end + offset);
7176}
7177
7183static inline size_t
7184match_eol(pm_parser_t *parser) {
7185 return match_eol_at(parser, parser->current.end);
7186}
7187
7191static inline const uint8_t *
7192next_newline(const uint8_t *cursor, ptrdiff_t length) {
7193 assert(length >= 0);
7194
7195 // Note that it's okay for us to use memchr here to look for \n because none
7196 // of the encodings that we support have \n as a component of a multi-byte
7197 // character.
7198 return memchr(cursor, '\n', (size_t) length);
7199}
7200
7204static inline bool
7205ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
7206 return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
7207}
7208
7213static bool
7214parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
7215 const pm_encoding_t *encoding = pm_encoding_find(start, end);
7216
7217 if (encoding != NULL) {
7218 if (parser->encoding != encoding) {
7219 parser->encoding = encoding;
7220 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
7221 }
7222
7223 parser->encoding_changed = (encoding != PM_ENCODING_UTF_8_ENTRY);
7224 return true;
7225 }
7226
7227 return false;
7228}
7229
7234static void
7235parser_lex_magic_comment_encoding(pm_parser_t *parser) {
7236 const uint8_t *cursor = parser->current.start + 1;
7237 const uint8_t *end = parser->current.end;
7238
7239 bool separator = false;
7240 while (true) {
7241 if (end - cursor <= 6) return;
7242 switch (cursor[6]) {
7243 case 'C': case 'c': cursor += 6; continue;
7244 case 'O': case 'o': cursor += 5; continue;
7245 case 'D': case 'd': cursor += 4; continue;
7246 case 'I': case 'i': cursor += 3; continue;
7247 case 'N': case 'n': cursor += 2; continue;
7248 case 'G': case 'g': cursor += 1; continue;
7249 case '=': case ':':
7250 separator = true;
7251 cursor += 6;
7252 break;
7253 default:
7254 cursor += 6;
7255 if (pm_char_is_whitespace(*cursor)) break;
7256 continue;
7257 }
7258 if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
7259 separator = false;
7260 }
7261
7262 while (true) {
7263 do {
7264 if (++cursor >= end) return;
7265 } while (pm_char_is_whitespace(*cursor));
7266
7267 if (separator) break;
7268 if (*cursor != '=' && *cursor != ':') return;
7269
7270 separator = true;
7271 cursor++;
7272 }
7273
7274 const uint8_t *value_start = cursor;
7275 while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
7276
7277 if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
7278 // If we were unable to parse the encoding value, then we've got an
7279 // issue because we didn't understand the encoding that the user was
7280 // trying to use. In this case we'll keep using the default encoding but
7281 // add an error to the parser to indicate an unsuccessful parse.
7282 pm_parser_err(parser, U32(value_start - parser->start), U32(cursor - value_start), PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
7283 }
7284}
7285
7286typedef enum {
7287 PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE,
7288 PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE,
7289 PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID
7290} pm_magic_comment_boolean_value_t;
7291
7296static pm_magic_comment_boolean_value_t
7297parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t value_length) {
7298 if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "true", 4) == 0) {
7299 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE;
7300 } else if (value_length == 5 && pm_strncasecmp(value_start, (const uint8_t *) "false", 5) == 0) {
7301 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE;
7302 } else {
7303 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID;
7304 }
7305}
7306
7307static inline bool
7308pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
7309 return b == '\'' || b == '"' || b == ':' || b == ';';
7310}
7311
7317static inline const uint8_t *
7318parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
7319 while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
7320 if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
7321 return cursor;
7322 }
7323 cursor++;
7324 }
7325 return NULL;
7326}
7327
7338static inline bool
7339parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
7340 bool result = true;
7341
7342 const uint8_t *start = parser->current.start + 1;
7343 const uint8_t *end = parser->current.end;
7344 if (end - start <= 7) return false;
7345
7346 const uint8_t *cursor;
7347 bool indicator = false;
7348
7349 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
7350 start = cursor + 3;
7351
7352 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
7353 end = cursor;
7354 indicator = true;
7355 } else {
7356 // If we have a start marker but not an end marker, then we cannot
7357 // have a magic comment.
7358 return false;
7359 }
7360 }
7361
7362 cursor = start;
7363 while (cursor < end) {
7364 while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
7365
7366 const uint8_t *key_start = cursor;
7367 while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
7368
7369 const uint8_t *key_end = cursor;
7370 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7371 if (cursor == end) break;
7372
7373 if (*cursor == ':') {
7374 cursor++;
7375 } else {
7376 if (!indicator) return false;
7377 continue;
7378 }
7379
7380 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7381 if (cursor == end) break;
7382
7383 const uint8_t *value_start;
7384 const uint8_t *value_end;
7385
7386 if (*cursor == '"') {
7387 value_start = ++cursor;
7388 for (; cursor < end && *cursor != '"'; cursor++) {
7389 if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
7390 }
7391 value_end = cursor;
7392 if (cursor < end && *cursor == '"') cursor++;
7393 } else {
7394 value_start = cursor;
7395 while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
7396 value_end = cursor;
7397 }
7398
7399 if (indicator) {
7400 while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
7401 } else {
7402 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7403 if (cursor != end) return false;
7404 }
7405
7406 // Here, we need to do some processing on the key to swap out dashes for
7407 // underscores. We only need to do this if there _is_ a dash in the key.
7408 pm_string_t key;
7409 const size_t key_length = (size_t) (key_end - key_start);
7410 const uint8_t *dash = pm_memchr(key_start, '-', key_length, parser->encoding_changed, parser->encoding);
7411
7412 if (dash == NULL) {
7413 pm_string_shared_init(&key, key_start, key_end);
7414 } else {
7415 uint8_t *buffer = xmalloc(key_length);
7416 if (buffer == NULL) break;
7417
7418 memcpy(buffer, key_start, key_length);
7419 buffer[dash - key_start] = '_';
7420
7421 while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
7422 buffer[dash - key_start] = '_';
7423 }
7424
7425 pm_string_owned_init(&key, buffer, key_length);
7426 }
7427
7428 // Finally, we can start checking the key against the list of known
7429 // magic comment keys, and potentially change state based on that.
7430 const uint8_t *key_source = pm_string_source(&key);
7431 uint32_t value_length = (uint32_t) (value_end - value_start);
7432
7433 // We only want to attempt to compare against encoding comments if it's
7434 // the first line in the file (or the second in the case of a shebang).
7435 if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
7436 if (
7437 (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
7438 (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
7439 ) {
7440 result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
7441 }
7442 }
7443
7444 if (key_length == 11) {
7445 if (pm_strncasecmp(key_source, (const uint8_t *) "warn_indent", 11) == 0) {
7446 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
7447 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
7448 PM_PARSER_WARN_TOKEN_FORMAT(
7449 parser,
7450 &parser->current,
7451 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7452 (int) key_length,
7453 (const char *) key_source,
7454 (int) value_length,
7455 (const char *) value_start
7456 );
7457 break;
7458 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
7459 parser->warn_mismatched_indentation = false;
7460 break;
7461 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
7462 parser->warn_mismatched_indentation = true;
7463 break;
7464 }
7465 }
7466 } else if (key_length == 21) {
7467 if (pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
7468 // We only want to handle frozen string literal comments if it's
7469 // before any semantic tokens have been seen.
7470 if (semantic_token_seen) {
7471 pm_parser_warn_token(parser, &parser->current, PM_WARN_IGNORED_FROZEN_STRING_LITERAL);
7472 } else {
7473 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
7474 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
7475 PM_PARSER_WARN_TOKEN_FORMAT(
7476 parser,
7477 &parser->current,
7478 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7479 (int) key_length,
7480 (const char *) key_source,
7481 (int) value_length,
7482 (const char *) value_start
7483 );
7484 break;
7485 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
7487 break;
7488 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
7490 break;
7491 }
7492 }
7493 }
7494 } else if (key_length == 24) {
7495 if (pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
7496 const uint8_t *cursor = parser->current.start;
7497 while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
7498
7499 if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
7500 pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
7501 } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
7502 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
7503 } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
7504 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
7505 } else if (value_length == 23 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_everything", 23) == 0) {
7506 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING);
7507 } else if (value_length == 17 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_copy", 17) == 0) {
7508 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY);
7509 } else {
7510 PM_PARSER_WARN_TOKEN_FORMAT(
7511 parser,
7512 &parser->current,
7513 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7514 (int) key_length,
7515 (const char *) key_source,
7516 (int) value_length,
7517 (const char *) value_start
7518 );
7519 }
7520 }
7521 }
7522
7523 // When we're done, we want to free the string in case we had to
7524 // allocate memory for it.
7525 pm_string_free(&key);
7526
7527 // Allocate a new magic comment node to append to the parser's list.
7529 if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) {
7530 magic_comment->key = (pm_location_t) { .start = U32(key_start - parser->start), .length = U32(key_length) };
7531 magic_comment->value = (pm_location_t) { .start = U32(value_start - parser->start), .length = value_length };
7532 pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
7533 }
7534 }
7535
7536 return result;
7537}
7538
7539/******************************************************************************/
7540/* Context manipulations */
7541/******************************************************************************/
7542
7543static const uint32_t context_terminators[] = {
7544 [PM_CONTEXT_NONE] = 0,
7545 [PM_CONTEXT_BEGIN] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7546 [PM_CONTEXT_BEGIN_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7547 [PM_CONTEXT_BEGIN_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7548 [PM_CONTEXT_BEGIN_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7549 [PM_CONTEXT_BLOCK_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
7550 [PM_CONTEXT_BLOCK_KEYWORDS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7551 [PM_CONTEXT_BLOCK_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7552 [PM_CONTEXT_BLOCK_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7553 [PM_CONTEXT_BLOCK_PARAMETERS] = (1U << PM_TOKEN_PIPE),
7554 [PM_CONTEXT_BLOCK_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7555 [PM_CONTEXT_CASE_WHEN] = (1U << PM_TOKEN_KEYWORD_WHEN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
7556 [PM_CONTEXT_CASE_IN] = (1U << PM_TOKEN_KEYWORD_IN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
7557 [PM_CONTEXT_CLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7558 [PM_CONTEXT_CLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7559 [PM_CONTEXT_CLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7560 [PM_CONTEXT_CLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7561 [PM_CONTEXT_DEF] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7562 [PM_CONTEXT_DEF_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7563 [PM_CONTEXT_DEF_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7564 [PM_CONTEXT_DEF_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7565 [PM_CONTEXT_DEF_PARAMS] = (1U << PM_TOKEN_EOF),
7566 [PM_CONTEXT_DEFINED] = (1U << PM_TOKEN_EOF),
7567 [PM_CONTEXT_DEFAULT_PARAMS] = (1U << PM_TOKEN_COMMA) | (1U << PM_TOKEN_PARENTHESIS_RIGHT),
7568 [PM_CONTEXT_ELSE] = (1U << PM_TOKEN_KEYWORD_END),
7569 [PM_CONTEXT_ELSIF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
7570 [PM_CONTEXT_EMBEXPR] = (1U << PM_TOKEN_EMBEXPR_END),
7571 [PM_CONTEXT_FOR] = (1U << PM_TOKEN_KEYWORD_END),
7572 [PM_CONTEXT_FOR_INDEX] = (1U << PM_TOKEN_KEYWORD_IN),
7573 [PM_CONTEXT_IF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
7574 [PM_CONTEXT_LAMBDA_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
7575 [PM_CONTEXT_LAMBDA_DO_END] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7576 [PM_CONTEXT_LAMBDA_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7577 [PM_CONTEXT_LAMBDA_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7578 [PM_CONTEXT_LAMBDA_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7579 [PM_CONTEXT_LOOP_PREDICATE] = (1U << PM_TOKEN_KEYWORD_DO) | (1U << PM_TOKEN_KEYWORD_THEN),
7580 [PM_CONTEXT_MAIN] = (1U << PM_TOKEN_EOF),
7581 [PM_CONTEXT_MODULE] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7582 [PM_CONTEXT_MODULE_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7583 [PM_CONTEXT_MODULE_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7584 [PM_CONTEXT_MODULE_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7585 [PM_CONTEXT_MULTI_TARGET] = (1U << PM_TOKEN_EOF),
7586 [PM_CONTEXT_PARENS] = (1U << PM_TOKEN_PARENTHESIS_RIGHT),
7587 [PM_CONTEXT_POSTEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
7588 [PM_CONTEXT_PREDICATE] = (1U << PM_TOKEN_KEYWORD_THEN) | (1U << PM_TOKEN_NEWLINE) | (1U << PM_TOKEN_SEMICOLON),
7589 [PM_CONTEXT_PREEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
7590 [PM_CONTEXT_RESCUE_MODIFIER] = (1U << PM_TOKEN_EOF),
7591 [PM_CONTEXT_SCLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7592 [PM_CONTEXT_SCLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7593 [PM_CONTEXT_SCLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7594 [PM_CONTEXT_SCLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7595 [PM_CONTEXT_TERNARY] = (1U << PM_TOKEN_EOF),
7596 [PM_CONTEXT_UNLESS] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7597 [PM_CONTEXT_UNTIL] = (1U << PM_TOKEN_KEYWORD_END),
7598 [PM_CONTEXT_WHILE] = (1U << PM_TOKEN_KEYWORD_END),
7599};
7600
7601static inline bool
7602context_terminator(pm_context_t context, pm_token_t *token) {
7603 return token->type < 32 && (context_terminators[context] & (1U << token->type));
7604}
7605
7610static pm_context_t
7611context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
7612 pm_context_node_t *context_node = parser->current_context;
7613
7614 while (context_node != NULL) {
7615 if (context_terminator(context_node->context, token)) return context_node->context;
7616 context_node = context_node->prev;
7617 }
7618
7619 return PM_CONTEXT_NONE;
7620}
7621
7622static bool
7623context_push(pm_parser_t *parser, pm_context_t context) {
7624 pm_context_node_t *context_node = (pm_context_node_t *) xmalloc(sizeof(pm_context_node_t));
7625 if (context_node == NULL) return false;
7626
7627 *context_node = (pm_context_node_t) { .context = context, .prev = NULL };
7628
7629 if (parser->current_context == NULL) {
7630 parser->current_context = context_node;
7631 } else {
7632 context_node->prev = parser->current_context;
7633 parser->current_context = context_node;
7634 }
7635
7636 return true;
7637}
7638
7639static void
7640context_pop(pm_parser_t *parser) {
7641 pm_context_node_t *prev = parser->current_context->prev;
7642 xfree_sized(parser->current_context, sizeof(pm_context_node_t));
7643 parser->current_context = prev;
7644}
7645
7646static bool
7647context_p(const pm_parser_t *parser, pm_context_t context) {
7648 pm_context_node_t *context_node = parser->current_context;
7649
7650 while (context_node != NULL) {
7651 if (context_node->context == context) return true;
7652 context_node = context_node->prev;
7653 }
7654
7655 return false;
7656}
7657
7658static bool
7659context_def_p(const pm_parser_t *parser) {
7660 pm_context_node_t *context_node = parser->current_context;
7661
7662 while (context_node != NULL) {
7663 switch (context_node->context) {
7664 case PM_CONTEXT_DEF:
7669 return true;
7670 case PM_CONTEXT_CLASS:
7674 case PM_CONTEXT_MODULE:
7678 case PM_CONTEXT_SCLASS:
7682 return false;
7683 default:
7684 context_node = context_node->prev;
7685 }
7686 }
7687
7688 return false;
7689}
7690
7695static const char *
7696context_human(pm_context_t context) {
7697 switch (context) {
7698 case PM_CONTEXT_NONE:
7699 assert(false && "unreachable");
7700 return "";
7701 case PM_CONTEXT_BEGIN: return "begin statement";
7702 case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
7703 case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
7704 case PM_CONTEXT_BLOCK_PARAMETERS: return "'|'..'|' block parameter";
7705 case PM_CONTEXT_CASE_WHEN: return "'when' clause";
7706 case PM_CONTEXT_CASE_IN: return "'in' clause";
7707 case PM_CONTEXT_CLASS: return "class definition";
7708 case PM_CONTEXT_DEF: return "method definition";
7709 case PM_CONTEXT_DEF_PARAMS: return "method parameters";
7710 case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
7711 case PM_CONTEXT_DEFINED: return "'defined?' expression";
7712 case PM_CONTEXT_ELSE:
7719 case PM_CONTEXT_SCLASS_ELSE: return "'else' clause";
7720 case PM_CONTEXT_ELSIF: return "'elsif' clause";
7721 case PM_CONTEXT_EMBEXPR: return "embedded expression";
7728 case PM_CONTEXT_SCLASS_ENSURE: return "'ensure' clause";
7729 case PM_CONTEXT_FOR: return "for loop";
7730 case PM_CONTEXT_FOR_INDEX: return "for loop index";
7731 case PM_CONTEXT_IF: return "if statement";
7732 case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
7733 case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
7734 case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
7735 case PM_CONTEXT_MAIN: return "top level context";
7736 case PM_CONTEXT_MODULE: return "module definition";
7737 case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
7738 case PM_CONTEXT_PARENS: return "parentheses";
7739 case PM_CONTEXT_POSTEXE: return "'END' block";
7740 case PM_CONTEXT_PREDICATE: return "predicate";
7741 case PM_CONTEXT_PREEXE: return "'BEGIN' block";
7749 case PM_CONTEXT_SCLASS_RESCUE: return "'rescue' clause";
7750 case PM_CONTEXT_SCLASS: return "singleton class definition";
7751 case PM_CONTEXT_TERNARY: return "ternary expression";
7752 case PM_CONTEXT_UNLESS: return "unless statement";
7753 case PM_CONTEXT_UNTIL: return "until statement";
7754 case PM_CONTEXT_WHILE: return "while statement";
7755 }
7756
7757 assert(false && "unreachable");
7758 return "";
7759}
7760
7761/******************************************************************************/
7762/* Specific token lexers */
7763/******************************************************************************/
7764
7765static inline void
7766pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
7767 if (invalid != NULL) {
7768 pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
7769 pm_parser_err(parser, U32(invalid - parser->start), 1, diag_id);
7770 }
7771}
7772
7773static size_t
7774pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
7775 const uint8_t *invalid = NULL;
7776 size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
7777 pm_strspn_number_validate(parser, string, length, invalid);
7778 return length;
7779}
7780
7781static size_t
7782pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
7783 const uint8_t *invalid = NULL;
7784 size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
7785 pm_strspn_number_validate(parser, string, length, invalid);
7786 return length;
7787}
7788
7789static size_t
7790pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
7791 const uint8_t *invalid = NULL;
7792 size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
7793 pm_strspn_number_validate(parser, string, length, invalid);
7794 return length;
7795}
7796
7797static size_t
7798pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
7799 const uint8_t *invalid = NULL;
7800 size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
7801 pm_strspn_number_validate(parser, string, length, invalid);
7802 return length;
7803}
7804
7805static pm_token_type_t
7806lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
7807 pm_token_type_t type = PM_TOKEN_INTEGER;
7808
7809 // Here we're going to attempt to parse the optional decimal portion of a
7810 // float. If it's not there, then it's okay and we'll just continue on.
7811 if (peek(parser) == '.') {
7812 if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
7813 parser->current.end += 2;
7814 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
7815 type = PM_TOKEN_FLOAT;
7816 } else {
7817 // If we had a . and then something else, then it's not a float
7818 // suffix on a number it's a method call or something else.
7819 return type;
7820 }
7821 }
7822
7823 // Here we're going to attempt to parse the optional exponent portion of a
7824 // float. If it's not there, it's okay and we'll just continue on.
7825 if ((peek(parser) == 'e') || (peek(parser) == 'E')) {
7826 if ((peek_offset(parser, 1) == '+') || (peek_offset(parser, 1) == '-')) {
7827 parser->current.end += 2;
7828
7829 if (pm_char_is_decimal_digit(peek(parser))) {
7830 parser->current.end++;
7831 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
7832 } else {
7833 pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
7834 }
7835 } else if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
7836 parser->current.end++;
7837 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
7838 } else {
7839 return type;
7840 }
7841
7842 *seen_e = true;
7843 type = PM_TOKEN_FLOAT;
7844 }
7845
7846 return type;
7847}
7848
7849static pm_token_type_t
7850lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
7851 pm_token_type_t type = PM_TOKEN_INTEGER;
7852 *seen_e = false;
7853
7854 if (peek_offset(parser, -1) == '0') {
7855 switch (*parser->current.end) {
7856 // 0d1111 is a decimal number
7857 case 'd':
7858 case 'D':
7859 parser->current.end++;
7860 if (pm_char_is_decimal_digit(peek(parser))) {
7861 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
7862 } else {
7863 match(parser, '_');
7864 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
7865 }
7866
7867 break;
7868
7869 // 0b1111 is a binary number
7870 case 'b':
7871 case 'B':
7872 parser->current.end++;
7873 if (pm_char_is_binary_digit(peek(parser))) {
7874 parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
7875 } else {
7876 match(parser, '_');
7877 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
7878 }
7879
7880 parser->integer_base = PM_INTEGER_BASE_FLAGS_BINARY;
7881 break;
7882
7883 // 0o1111 is an octal number
7884 case 'o':
7885 case 'O':
7886 parser->current.end++;
7887 if (pm_char_is_octal_digit(peek(parser))) {
7888 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
7889 } else {
7890 match(parser, '_');
7891 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
7892 }
7893
7894 parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
7895 break;
7896
7897 // 01111 is an octal number
7898 case '_':
7899 case '0':
7900 case '1':
7901 case '2':
7902 case '3':
7903 case '4':
7904 case '5':
7905 case '6':
7906 case '7':
7907 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
7908 parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
7909 break;
7910
7911 // 0x1111 is a hexadecimal number
7912 case 'x':
7913 case 'X':
7914 parser->current.end++;
7915 if (pm_char_is_hexadecimal_digit(peek(parser))) {
7916 parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
7917 } else {
7918 match(parser, '_');
7919 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
7920 }
7921
7922 parser->integer_base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
7923 break;
7924
7925 // 0.xxx is a float
7926 case '.': {
7927 type = lex_optional_float_suffix(parser, seen_e);
7928 break;
7929 }
7930
7931 // 0exxx is a float
7932 case 'e':
7933 case 'E': {
7934 type = lex_optional_float_suffix(parser, seen_e);
7935 break;
7936 }
7937 }
7938 } else {
7939 // If it didn't start with a 0, then we'll lex as far as we can into a
7940 // decimal number.
7941 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
7942
7943 // Afterward, we'll lex as far as we can into an optional float suffix.
7944 type = lex_optional_float_suffix(parser, seen_e);
7945 }
7946
7947 // At this point we have a completed number, but we want to provide the user
7948 // with a good experience if they put an additional .xxx fractional
7949 // component on the end, so we'll check for that here.
7950 if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
7951 const uint8_t *fraction_start = parser->current.end;
7952 const uint8_t *fraction_end = parser->current.end + 2;
7953 fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
7954 pm_parser_err(parser, U32(fraction_start - parser->start), U32(fraction_end - fraction_start), PM_ERR_INVALID_NUMBER_FRACTION);
7955 }
7956
7957 return type;
7958}
7959
7960static pm_token_type_t
7961lex_numeric(pm_parser_t *parser) {
7962 pm_token_type_t type = PM_TOKEN_INTEGER;
7963 parser->integer_base = PM_INTEGER_BASE_FLAGS_DECIMAL;
7964
7965 if (parser->current.end < parser->end) {
7966 bool seen_e = false;
7967 type = lex_numeric_prefix(parser, &seen_e);
7968
7969 const uint8_t *end = parser->current.end;
7970 pm_token_type_t suffix_type = type;
7971
7972 if (type == PM_TOKEN_INTEGER) {
7973 if (match(parser, 'r')) {
7974 suffix_type = PM_TOKEN_INTEGER_RATIONAL;
7975
7976 if (match(parser, 'i')) {
7977 suffix_type = PM_TOKEN_INTEGER_RATIONAL_IMAGINARY;
7978 }
7979 } else if (match(parser, 'i')) {
7980 suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
7981 }
7982 } else {
7983 if (!seen_e && match(parser, 'r')) {
7984 suffix_type = PM_TOKEN_FLOAT_RATIONAL;
7985
7986 if (match(parser, 'i')) {
7987 suffix_type = PM_TOKEN_FLOAT_RATIONAL_IMAGINARY;
7988 }
7989 } else if (match(parser, 'i')) {
7990 suffix_type = PM_TOKEN_FLOAT_IMAGINARY;
7991 }
7992 }
7993
7994 const uint8_t b = peek(parser);
7995 if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
7996 parser->current.end = end;
7997 } else {
7998 type = suffix_type;
7999 }
8000 }
8001
8002 return type;
8003}
8004
8005static pm_token_type_t
8006lex_global_variable(pm_parser_t *parser) {
8007 if (parser->current.end >= parser->end) {
8008 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
8009 return PM_TOKEN_GLOBAL_VARIABLE;
8010 }
8011
8012 // True if multiple characters are allowed after the declaration of the
8013 // global variable. Not true when it starts with "$-".
8014 bool allow_multiple = true;
8015
8016 switch (*parser->current.end) {
8017 case '~': // $~: match-data
8018 case '*': // $*: argv
8019 case '$': // $$: pid
8020 case '?': // $?: last status
8021 case '!': // $!: error string
8022 case '@': // $@: error position
8023 case '/': // $/: input record separator
8024 case '\\': // $\: output record separator
8025 case ';': // $;: field separator
8026 case ',': // $,: output field separator
8027 case '.': // $.: last read line number
8028 case '=': // $=: ignorecase
8029 case ':': // $:: load path
8030 case '<': // $<: reading filename
8031 case '>': // $>: default output handle
8032 case '\"': // $": already loaded files
8033 parser->current.end++;
8034 return PM_TOKEN_GLOBAL_VARIABLE;
8035
8036 case '&': // $&: last match
8037 case '`': // $`: string before last match
8038 case '\'': // $': string after last match
8039 case '+': // $+: string matches last paren.
8040 parser->current.end++;
8041 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_BACK_REFERENCE;
8042
8043 case '0': {
8044 parser->current.end++;
8045 size_t width;
8046
8047 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
8048 do {
8049 parser->current.end += width;
8050 } while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
8051
8052 // $0 isn't allowed to be followed by anything.
8053 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8054 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->current, diag_id);
8055 }
8056
8057 return PM_TOKEN_GLOBAL_VARIABLE;
8058 }
8059
8060 case '1':
8061 case '2':
8062 case '3':
8063 case '4':
8064 case '5':
8065 case '6':
8066 case '7':
8067 case '8':
8068 case '9':
8069 parser->current.end += pm_strspn_decimal_digit(parser->current.end, parser->end - parser->current.end);
8070 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_NUMBERED_REFERENCE;
8071
8072 case '-':
8073 parser->current.end++;
8074 allow_multiple = false;
8076 default: {
8077 size_t width;
8078
8079 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
8080 do {
8081 parser->current.end += width;
8082 } while (allow_multiple && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
8083 } else if (pm_char_is_whitespace(peek(parser))) {
8084 // If we get here, then we have a $ followed by whitespace,
8085 // which is not allowed.
8086 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
8087 } else {
8088 // If we get here, then we have a $ followed by something that
8089 // isn't recognized as a global variable.
8090 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8091 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8092 PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), diag_id, (int) (PM_TOKEN_LENGTH(&parser->current) + U32(width)), (const char *) parser->current.start);
8093 }
8094
8095 return PM_TOKEN_GLOBAL_VARIABLE;
8096 }
8097 }
8098}
8099
8112static inline pm_token_type_t
8113lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
8114 if (memcmp(current_start, value, vlen) == 0) {
8115 pm_lex_state_t last_state = parser->lex_state;
8116
8117 if (parser->lex_state & PM_LEX_STATE_FNAME) {
8118 lex_state_set(parser, PM_LEX_STATE_ENDFN);
8119 } else {
8120 lex_state_set(parser, state);
8121 if (state == PM_LEX_STATE_BEG) {
8122 parser->command_start = true;
8123 }
8124
8125 if ((modifier_type != PM_TOKEN_EOF) && !(last_state & (PM_LEX_STATE_BEG | PM_LEX_STATE_LABELED | PM_LEX_STATE_CLASS))) {
8126 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
8127 return modifier_type;
8128 }
8129 }
8130
8131 return type;
8132 }
8133
8134 return PM_TOKEN_EOF;
8135}
8136
8137static pm_token_type_t
8138lex_identifier(pm_parser_t *parser, bool previous_command_start) {
8139 // Lex as far as we can into the current identifier.
8140 size_t width;
8141 const uint8_t *end = parser->end;
8142 const uint8_t *current_start = parser->current.start;
8143 const uint8_t *current_end = parser->current.end;
8144 bool encoding_changed = parser->encoding_changed;
8145
8146 if (encoding_changed) {
8147 while ((width = char_is_identifier(parser, current_end, end - current_end)) > 0) {
8148 current_end += width;
8149 }
8150 } else {
8151 while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
8152 current_end += width;
8153 }
8154 }
8155 parser->current.end = current_end;
8156
8157 // Now cache the length of the identifier so that we can quickly compare it
8158 // against known keywords.
8159 width = (size_t) (current_end - current_start);
8160
8161 if (current_end < end) {
8162 if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
8163 // First we'll attempt to extend the identifier by a ! or ?. Then we'll
8164 // check if we're returning the defined? keyword or just an identifier.
8165 width++;
8166
8167 if (
8168 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
8169 (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
8170 ) {
8171 // If we're in a position where we can accept a : at the end of an
8172 // identifier, then we'll optionally accept it.
8173 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
8174 (void) match(parser, ':');
8175 return PM_TOKEN_LABEL;
8176 }
8177
8178 if (parser->lex_state != PM_LEX_STATE_DOT) {
8179 if (width == 8 && (lex_keyword(parser, current_start, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
8180 return PM_TOKEN_KEYWORD_DEFINED;
8181 }
8182 }
8183
8184 return PM_TOKEN_METHOD_NAME;
8185 }
8186
8187 if (lex_state_p(parser, PM_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
8188 // If we're in a position where we can accept a = at the end of an
8189 // identifier, then we'll optionally accept it.
8190 return PM_TOKEN_IDENTIFIER;
8191 }
8192
8193 if (
8194 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
8195 peek(parser) == ':' && peek_offset(parser, 1) != ':'
8196 ) {
8197 // If we're in a position where we can accept a : at the end of an
8198 // identifier, then we'll optionally accept it.
8199 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
8200 (void) match(parser, ':');
8201 return PM_TOKEN_LABEL;
8202 }
8203 }
8204
8205 if (parser->lex_state != PM_LEX_STATE_DOT) {
8206 pm_token_type_t type;
8207 switch (width) {
8208 case 2:
8209 if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
8210 if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
8211 return PM_TOKEN_KEYWORD_DO;
8212 }
8213 if (pm_do_loop_stack_p(parser)) {
8214 return PM_TOKEN_KEYWORD_DO_LOOP;
8215 }
8216 if (!pm_accepts_block_stack_p(parser)) {
8217 return PM_TOKEN_KEYWORD_DO_BLOCK;
8218 }
8219 return PM_TOKEN_KEYWORD_DO;
8220 }
8221
8222 if ((type = lex_keyword(parser, current_start, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
8223 if ((type = lex_keyword(parser, current_start, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8224 if ((type = lex_keyword(parser, current_start, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8225 break;
8226 case 3:
8227 if ((type = lex_keyword(parser, current_start, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8228 if ((type = lex_keyword(parser, current_start, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8229 if ((type = lex_keyword(parser, current_start, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8230 if ((type = lex_keyword(parser, current_start, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8231 if ((type = lex_keyword(parser, current_start, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8232 if ((type = lex_keyword(parser, current_start, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8233 if ((type = lex_keyword(parser, current_start, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8234 break;
8235 case 4:
8236 if ((type = lex_keyword(parser, current_start, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8237 if ((type = lex_keyword(parser, current_start, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8238 if ((type = lex_keyword(parser, current_start, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8239 if ((type = lex_keyword(parser, current_start, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8240 if ((type = lex_keyword(parser, current_start, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8241 if ((type = lex_keyword(parser, current_start, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8242 if ((type = lex_keyword(parser, current_start, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8243 if ((type = lex_keyword(parser, current_start, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8244 break;
8245 case 5:
8246 if ((type = lex_keyword(parser, current_start, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8247 if ((type = lex_keyword(parser, current_start, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8248 if ((type = lex_keyword(parser, current_start, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8249 if ((type = lex_keyword(parser, current_start, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8250 if ((type = lex_keyword(parser, current_start, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8251 if ((type = lex_keyword(parser, current_start, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8252 if ((type = lex_keyword(parser, current_start, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8253 if ((type = lex_keyword(parser, current_start, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8254 if ((type = lex_keyword(parser, current_start, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8255 if ((type = lex_keyword(parser, current_start, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8256 if ((type = lex_keyword(parser, current_start, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
8257 if ((type = lex_keyword(parser, current_start, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
8258 if ((type = lex_keyword(parser, current_start, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8259 break;
8260 case 6:
8261 if ((type = lex_keyword(parser, current_start, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8262 if ((type = lex_keyword(parser, current_start, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8263 if ((type = lex_keyword(parser, current_start, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
8264 if ((type = lex_keyword(parser, current_start, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8265 if ((type = lex_keyword(parser, current_start, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
8266 break;
8267 case 8:
8268 if ((type = lex_keyword(parser, current_start, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8269 if ((type = lex_keyword(parser, current_start, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8270 break;
8271 case 12:
8272 if ((type = lex_keyword(parser, current_start, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8273 break;
8274 }
8275 }
8276
8277 if (encoding_changed) {
8278 return parser->encoding->isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
8279 }
8280 return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
8281}
8282
8287static bool
8288current_token_starts_line(pm_parser_t *parser) {
8289 return (parser->current.start == parser->start) || (parser->current.start[-1] == '\n');
8290}
8291
8306static pm_token_type_t
8307lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
8308 // If there is no content following this #, then we're at the end of
8309 // the string and we can safely return string content.
8310 if (pound + 1 >= parser->end) {
8311 parser->current.end = pound + 1;
8312 return PM_TOKEN_STRING_CONTENT;
8313 }
8314
8315 // Now we'll check against the character that follows the #. If it
8316 // constitutes valid interplation, we'll handle that, otherwise we'll return
8317 // 0.
8318 switch (pound[1]) {
8319 case '@': {
8320 // In this case we may have hit an embedded instance or class variable.
8321 if (pound + 2 >= parser->end) {
8322 parser->current.end = pound + 1;
8323 return PM_TOKEN_STRING_CONTENT;
8324 }
8325
8326 // If we're looking at a @ and there's another @, then we'll skip past the
8327 // second @.
8328 const uint8_t *variable = pound + 2;
8329 if (*variable == '@' && pound + 3 < parser->end) variable++;
8330
8331 if (char_is_identifier_start(parser, variable, parser->end - variable)) {
8332 // At this point we're sure that we've either hit an embedded instance
8333 // or class variable. In this case we'll first need to check if we've
8334 // already consumed content.
8335 if (pound > parser->current.start) {
8336 parser->current.end = pound;
8337 return PM_TOKEN_STRING_CONTENT;
8338 }
8339
8340 // Otherwise we need to return the embedded variable token
8341 // and then switch to the embedded variable lex mode.
8342 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
8343 parser->current.end = pound + 1;
8344 return PM_TOKEN_EMBVAR;
8345 }
8346
8347 // If we didn't get a valid interpolation, then this is just regular
8348 // string content. This is like if we get "#@-". In this case the caller
8349 // should keep lexing.
8350 parser->current.end = pound + 1;
8351 return 0;
8352 }
8353 case '$':
8354 // In this case we may have hit an embedded global variable. If there's
8355 // not enough room, then we'll just return string content.
8356 if (pound + 2 >= parser->end) {
8357 parser->current.end = pound + 1;
8358 return PM_TOKEN_STRING_CONTENT;
8359 }
8360
8361 // This is the character that we're going to check to see if it is the
8362 // start of an identifier that would indicate that this is a global
8363 // variable.
8364 const uint8_t *check = pound + 2;
8365
8366 if (pound[2] == '-') {
8367 if (pound + 3 >= parser->end) {
8368 parser->current.end = pound + 2;
8369 return PM_TOKEN_STRING_CONTENT;
8370 }
8371
8372 check++;
8373 }
8374
8375 // If the character that we're going to check is the start of an
8376 // identifier, or we don't have a - and the character is a decimal number
8377 // or a global name punctuation character, then we've hit an embedded
8378 // global variable.
8379 if (
8380 char_is_identifier_start(parser, check, parser->end - check) ||
8381 (pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
8382 ) {
8383 // In this case we've hit an embedded global variable. First check to
8384 // see if we've already consumed content. If we have, then we need to
8385 // return that content as string content first.
8386 if (pound > parser->current.start) {
8387 parser->current.end = pound;
8388 return PM_TOKEN_STRING_CONTENT;
8389 }
8390
8391 // Otherwise, we need to return the embedded variable token and switch
8392 // to the embedded variable lex mode.
8393 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
8394 parser->current.end = pound + 1;
8395 return PM_TOKEN_EMBVAR;
8396 }
8397
8398 // In this case we've hit a #$ that does not indicate a global variable.
8399 // In this case we'll continue lexing past it.
8400 parser->current.end = pound + 1;
8401 return 0;
8402 case '{':
8403 // In this case it's the start of an embedded expression. If we have
8404 // already consumed content, then we need to return that content as string
8405 // content first.
8406 if (pound > parser->current.start) {
8407 parser->current.end = pound;
8408 return PM_TOKEN_STRING_CONTENT;
8409 }
8410
8411 parser->enclosure_nesting++;
8412
8413 // Otherwise we'll skip past the #{ and begin lexing the embedded
8414 // expression.
8415 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBEXPR });
8416 parser->current.end = pound + 2;
8417 parser->command_start = true;
8418 pm_do_loop_stack_push(parser, false);
8419 return PM_TOKEN_EMBEXPR_BEGIN;
8420 default:
8421 // In this case we've hit a # that doesn't constitute interpolation. We'll
8422 // mark that by returning the not provided token type. This tells the
8423 // consumer to keep lexing forward.
8424 parser->current.end = pound + 1;
8425 return 0;
8426 }
8427}
8428
8429static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
8430static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
8431static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
8432static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
8433static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
8434
8438static const bool ascii_printable_chars[] = {
8439 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
8440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8441 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8442 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8443 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8444 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
8445 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8446 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
8447};
8448
8449static inline bool
8450char_is_ascii_printable(const uint8_t b) {
8451 return (b < 0x80) && ascii_printable_chars[b];
8452}
8453
8458static inline uint8_t
8459escape_hexadecimal_digit(const uint8_t value) {
8460 return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
8461}
8462
8468static inline uint32_t
8469escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length, const pm_location_t *error_location, const uint8_t flags) {
8470 uint32_t value = 0;
8471 for (size_t index = 0; index < length; index++) {
8472 if (index != 0) value <<= 4;
8473 value |= escape_hexadecimal_digit(string[index]);
8474 }
8475
8476 // Here we're going to verify that the value is actually a valid Unicode
8477 // codepoint and not a surrogate pair.
8478 if (value >= 0xD800 && value <= 0xDFFF) {
8479 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8480 // In regexp context, defer the error to regexp encoding
8481 // validation where we can produce a regexp-specific message.
8482 } else if (error_location != NULL) {
8483 pm_parser_err(parser, error_location->start, error_location->length, PM_ERR_ESCAPE_INVALID_UNICODE);
8484 } else {
8485 pm_parser_err(parser, U32(string - parser->start), U32(length), PM_ERR_ESCAPE_INVALID_UNICODE);
8486 }
8487 return 0xFFFD;
8488 }
8489
8490 return value;
8491}
8492
8496static inline uint8_t
8497escape_byte(uint8_t value, const uint8_t flags) {
8498 if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
8499 if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
8500 return value;
8501}
8502
8506static inline void
8507escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
8508 // \u escape sequences in string-like structures implicitly change the
8509 // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
8510 // literal.
8511 if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
8512 if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
8513 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8514 // In regexp context, suppress this error — the regexp encoding
8515 // validation will produce a more specific error message.
8516 } else {
8517 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(end - start), PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
8518 }
8519 }
8520
8522 }
8523
8524 if (!pm_buffer_append_unicode_codepoint(buffer, value)) {
8525 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8526 // In regexp context, defer the error to the regexp encoding
8527 // validation which produces a regexp-specific message.
8528 } else {
8529 pm_parser_err(parser, U32(start - parser->start), U32(end - start), PM_ERR_ESCAPE_INVALID_UNICODE);
8530 }
8531
8532 pm_buffer_append_byte(buffer, 0xEF);
8533 pm_buffer_append_byte(buffer, 0xBF);
8534 pm_buffer_append_byte(buffer, 0xBD);
8535 }
8536}
8537
8542static inline void
8543escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, uint8_t byte) {
8544 if (byte >= 0x80) {
8545 if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
8546 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8547 // In regexp context, suppress this error — the regexp encoding
8548 // validation will produce a more specific error message.
8549 } else {
8550 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
8551 }
8552 }
8553
8554 parser->explicit_encoding = parser->encoding;
8555 }
8556
8557 pm_buffer_append_byte(buffer, byte);
8558}
8559
8575static inline void
8576escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
8577 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8578 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
8579 }
8580
8581 escape_write_byte_encoded(parser, buffer, flags, byte);
8582}
8583
8587static inline void
8588escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
8589 size_t width;
8590 if (parser->encoding_changed) {
8591 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8592 } else {
8593 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
8594 }
8595
8596 if (width == 1) {
8597 if (*parser->current.end == '\n') pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
8598 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
8599 } else if (width > 1) {
8600 // Valid multibyte character. Just ignore escape.
8601 pm_buffer_t *b = (flags & PM_ESCAPE_FLAG_REGEXP) ? regular_expression_buffer : buffer;
8602 pm_buffer_append_bytes(b, parser->current.end, width);
8603 parser->current.end += width;
8604 } else {
8605 // Assume the next character wasn't meant to be part of this escape
8606 // sequence since it is invalid. Add an error and move on.
8607 parser->current.end++;
8608 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
8609 }
8610}
8611
8617static void
8618escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *type) {
8619#define FLAG(value) ((value & PM_ESCAPE_FLAG_CONTROL) ? "\\C-" : (value & PM_ESCAPE_FLAG_META) ? "\\M-" : "")
8620
8621 PM_PARSER_WARN_TOKEN_FORMAT(
8622 parser,
8623 &parser->current,
8624 PM_WARN_INVALID_CHARACTER,
8625 FLAG(flags),
8626 FLAG(flag),
8627 type
8628 );
8629
8630#undef FLAG
8631}
8632
8636static void
8637escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
8638 uint8_t peeked = peek(parser);
8639 switch (peeked) {
8640 case '\\': {
8641 parser->current.end++;
8642 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
8643 return;
8644 }
8645 case '\'': {
8646 parser->current.end++;
8647 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
8648 return;
8649 }
8650 case 'a': {
8651 parser->current.end++;
8652 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
8653 return;
8654 }
8655 case 'b': {
8656 parser->current.end++;
8657 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
8658 return;
8659 }
8660 case 'e': {
8661 parser->current.end++;
8662 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
8663 return;
8664 }
8665 case 'f': {
8666 parser->current.end++;
8667 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
8668 return;
8669 }
8670 case 'n': {
8671 parser->current.end++;
8672 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
8673 return;
8674 }
8675 case 'r': {
8676 parser->current.end++;
8677 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
8678 return;
8679 }
8680 case 's': {
8681 parser->current.end++;
8682 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
8683 return;
8684 }
8685 case 't': {
8686 parser->current.end++;
8687 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
8688 return;
8689 }
8690 case 'v': {
8691 parser->current.end++;
8692 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
8693 return;
8694 }
8695 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
8696 uint8_t value = (uint8_t) (*parser->current.end - '0');
8697 parser->current.end++;
8698
8699 if (pm_char_is_octal_digit(peek(parser))) {
8700 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
8701 parser->current.end++;
8702
8703 if (pm_char_is_octal_digit(peek(parser))) {
8704 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
8705 parser->current.end++;
8706 }
8707 }
8708
8709 value = escape_byte(value, flags);
8710 escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
8711 return;
8712 }
8713 case 'x': {
8714 const uint8_t *start = parser->current.end - 1;
8715
8716 parser->current.end++;
8717 uint8_t byte = peek(parser);
8718
8719 if (pm_char_is_hexadecimal_digit(byte)) {
8720 uint8_t value = escape_hexadecimal_digit(byte);
8721 parser->current.end++;
8722
8723 byte = peek(parser);
8724 if (pm_char_is_hexadecimal_digit(byte)) {
8725 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
8726 parser->current.end++;
8727 }
8728
8729 value = escape_byte(value, flags);
8730 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8731 if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
8732 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
8733 } else {
8734 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8735 }
8736 }
8737
8738 escape_write_byte_encoded(parser, buffer, flags, value);
8739 } else {
8740 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
8741 }
8742
8743 return;
8744 }
8745 case 'u': {
8746 const uint8_t *start = parser->current.end - 1;
8747 parser->current.end++;
8748
8749 if (parser->current.end == parser->end) {
8750 const uint8_t *start = parser->current.end - 2;
8751 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
8752 } else if (peek(parser) == '{') {
8753 const uint8_t *unicode_codepoints_start = parser->current.end - 2;
8754 parser->current.end++;
8755
8756 size_t whitespace;
8757 while (true) {
8758 if ((whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
8759 parser->current.end += whitespace;
8760 } else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
8761 // This is super hacky, but it gets us nicer error
8762 // messages because we can still pass it off to the
8763 // regular expression engine even if we hit an
8764 // unterminated regular expression.
8765 parser->current.end += 2;
8766 } else {
8767 break;
8768 }
8769 }
8770
8771 const uint8_t *extra_codepoints_start = NULL;
8772 int codepoints_count = 0;
8773
8774 while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
8775 const uint8_t *unicode_start = parser->current.end;
8776 size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
8777
8778 if (hexadecimal_length > 6) {
8779 // \u{nnnn} character literal allows only 1-6 hexadecimal digits
8780 pm_parser_err(parser, U32(unicode_start - parser->start), U32(hexadecimal_length), PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
8781 } else if (hexadecimal_length == 0) {
8782 // there are not hexadecimal characters
8783
8784 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8785 // If this is a regular expression, we are going to
8786 // let the regular expression engine handle this
8787 // error instead of us because we don't know at this
8788 // point if we're inside a comment in /x mode.
8789 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8790 } else {
8791 pm_parser_err(parser, PM_TOKEN_END(parser, &parser->current), 0, PM_ERR_ESCAPE_INVALID_UNICODE);
8792 pm_parser_err(parser, PM_TOKEN_END(parser, &parser->current), 0, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
8793 }
8794
8795 return;
8796 }
8797
8798 parser->current.end += hexadecimal_length;
8799 codepoints_count++;
8800 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
8801 extra_codepoints_start = unicode_start;
8802 }
8803
8804 uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length, NULL, flags);
8805 escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
8806
8807 parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
8808 }
8809
8810 // ?\u{nnnn} character literal should contain only one codepoint
8811 // and cannot be like ?\u{nnnn mmmm}.
8812 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
8813 pm_parser_err(parser, U32(extra_codepoints_start - parser->start), U32(parser->current.end - 1 - extra_codepoints_start), PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
8814 }
8815
8816 if (parser->current.end == parser->end) {
8817 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
8818 } else if (peek(parser) == '}') {
8819 parser->current.end++;
8820 } else {
8821 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8822 // If this is a regular expression, we are going to let
8823 // the regular expression engine handle this error
8824 // instead of us because we don't know at this point if
8825 // we're inside a comment in /x mode.
8826 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8827 } else {
8828 pm_parser_err(parser, U32(unicode_codepoints_start - parser->start), U32(parser->current.end - unicode_codepoints_start), PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
8829 }
8830 }
8831
8832 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8833 pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
8834 }
8835 } else {
8836 size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
8837
8838 if (length == 0) {
8839 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8840 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8841 } else {
8842 const uint8_t *start = parser->current.end - 2;
8843 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
8844 }
8845 } else if (length == 4) {
8846 uint32_t value = escape_unicode(parser, parser->current.end, 4, NULL, flags);
8847
8848 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8849 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
8850 }
8851
8852 escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
8853 parser->current.end += 4;
8854 } else {
8855 parser->current.end += length;
8856
8857 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8858 // If this is a regular expression, we are going to let
8859 // the regular expression engine handle this error
8860 // instead of us.
8861 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8862 } else {
8863 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
8864 }
8865 }
8866 }
8867
8868 return;
8869 }
8870 case 'c': {
8871 parser->current.end++;
8872 if (flags & PM_ESCAPE_FLAG_CONTROL) {
8873 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
8874 }
8875
8876 if (parser->current.end == parser->end) {
8877 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
8878 return;
8879 }
8880
8881 uint8_t peeked = peek(parser);
8882 switch (peeked) {
8883 case '?': {
8884 parser->current.end++;
8885 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
8886 return;
8887 }
8888 case '\\':
8889 parser->current.end++;
8890
8891 if (match(parser, 'u') || match(parser, 'U')) {
8892 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
8893 return;
8894 }
8895
8896 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
8897 return;
8898 case ' ':
8899 parser->current.end++;
8900 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
8901 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
8902 return;
8903 case '\t':
8904 parser->current.end++;
8905 escape_read_warn(parser, flags, 0, "\\t");
8906 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
8907 return;
8908 default: {
8909 if (!char_is_ascii_printable(peeked)) {
8910 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
8911 return;
8912 }
8913
8914 if (peeked == '\n') pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
8915 parser->current.end++;
8916 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
8917 return;
8918 }
8919 }
8920 }
8921 case 'C': {
8922 parser->current.end++;
8923 if (flags & PM_ESCAPE_FLAG_CONTROL) {
8924 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
8925 }
8926
8927 if (peek(parser) != '-') {
8928 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8929 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_CONTROL);
8930 return;
8931 }
8932
8933 parser->current.end++;
8934 if (parser->current.end == parser->end) {
8935 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
8936 return;
8937 }
8938
8939 uint8_t peeked = peek(parser);
8940 switch (peeked) {
8941 case '?': {
8942 parser->current.end++;
8943 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
8944 return;
8945 }
8946 case '\\':
8947 parser->current.end++;
8948
8949 if (match(parser, 'u') || match(parser, 'U')) {
8950 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
8951 return;
8952 }
8953
8954 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
8955 return;
8956 case ' ':
8957 parser->current.end++;
8958 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
8959 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
8960 return;
8961 case '\t':
8962 parser->current.end++;
8963 escape_read_warn(parser, flags, 0, "\\t");
8964 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
8965 return;
8966 default: {
8967 if (!char_is_ascii_printable(peeked)) {
8968 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8969 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_CONTROL);
8970 return;
8971 }
8972
8973 if (peeked == '\n') pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
8974 parser->current.end++;
8975 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
8976 return;
8977 }
8978 }
8979 }
8980 case 'M': {
8981 parser->current.end++;
8982 if (flags & PM_ESCAPE_FLAG_META) {
8983 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
8984 }
8985
8986 if (peek(parser) != '-') {
8987 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8988 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
8989 return;
8990 }
8991
8992 parser->current.end++;
8993 if (parser->current.end == parser->end) {
8994 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
8995 return;
8996 }
8997
8998 uint8_t peeked = peek(parser);
8999 switch (peeked) {
9000 case '\\':
9001 parser->current.end++;
9002
9003 if (match(parser, 'u') || match(parser, 'U')) {
9004 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
9005 return;
9006 }
9007
9008 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
9009 return;
9010 case ' ':
9011 parser->current.end++;
9012 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_META, "\\s");
9013 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9014 return;
9015 case '\t':
9016 parser->current.end++;
9017 escape_read_warn(parser, flags & ((uint8_t) ~PM_ESCAPE_FLAG_CONTROL), PM_ESCAPE_FLAG_META, "\\t");
9018 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9019 return;
9020 default:
9021 if (!char_is_ascii_printable(peeked)) {
9022 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9023 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
9024 return;
9025 }
9026
9027 if (peeked == '\n') pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
9028 parser->current.end++;
9029 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9030 return;
9031 }
9032 }
9033 case '\r': {
9034 if (peek_offset(parser, 1) == '\n') {
9035 pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 2);
9036 parser->current.end += 2;
9037 escape_write_byte_encoded(parser, buffer, flags, escape_byte('\n', flags));
9038 return;
9039 }
9041 }
9042 default: {
9043 if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
9044 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9045 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
9046 return;
9047 }
9048 if (parser->current.end < parser->end) {
9049 escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags);
9050 } else {
9051 pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
9052 }
9053 return;
9054 }
9055 }
9056}
9057
9083static pm_token_type_t
9084lex_question_mark(pm_parser_t *parser) {
9085 if (lex_state_end_p(parser)) {
9086 lex_state_set(parser, PM_LEX_STATE_BEG);
9087 return PM_TOKEN_QUESTION_MARK;
9088 }
9089
9090 if (parser->current.end >= parser->end) {
9091 pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
9092 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
9093 return PM_TOKEN_CHARACTER_LITERAL;
9094 }
9095
9096 if (pm_char_is_whitespace(*parser->current.end)) {
9097 lex_state_set(parser, PM_LEX_STATE_BEG);
9098 return PM_TOKEN_QUESTION_MARK;
9099 }
9100
9101 lex_state_set(parser, PM_LEX_STATE_BEG);
9102
9103 if (match(parser, '\\')) {
9104 lex_state_set(parser, PM_LEX_STATE_END);
9105
9106 pm_buffer_t buffer;
9107 pm_buffer_init_capacity(&buffer, 3);
9108
9109 escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
9110
9111 // Copy buffer data into the arena and free the heap buffer.
9112 void *arena_data = pm_arena_memdup(parser->arena, buffer.value, buffer.length, PRISM_ALIGNOF(uint8_t));
9113 pm_string_constant_init(&parser->current_string, (const char *) arena_data, buffer.length);
9114 pm_buffer_free(&buffer);
9115
9116 return PM_TOKEN_CHARACTER_LITERAL;
9117 } else {
9118 size_t encoding_width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9119
9120 // Ternary operators can have a ? immediately followed by an identifier
9121 // which starts with an underscore. We check for this case here.
9122 if (
9123 !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
9124 (
9125 (parser->current.end + encoding_width >= parser->end) ||
9126 !char_is_identifier(parser, parser->current.end + encoding_width, parser->end - (parser->current.end + encoding_width))
9127 )
9128 ) {
9129 lex_state_set(parser, PM_LEX_STATE_END);
9130 parser->current.end += encoding_width;
9131 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
9132 return PM_TOKEN_CHARACTER_LITERAL;
9133 }
9134 }
9135
9136 return PM_TOKEN_QUESTION_MARK;
9137}
9138
9143static pm_token_type_t
9144lex_at_variable(pm_parser_t *parser) {
9145 pm_token_type_t type = match(parser, '@') ? PM_TOKEN_CLASS_VARIABLE : PM_TOKEN_INSTANCE_VARIABLE;
9146 const uint8_t *end = parser->end;
9147
9148 size_t width;
9149 if ((width = char_is_identifier_start(parser, parser->current.end, end - parser->current.end)) > 0) {
9150 parser->current.end += width;
9151
9152 while ((width = char_is_identifier(parser, parser->current.end, end - parser->current.end)) > 0) {
9153 parser->current.end += width;
9154 }
9155 } else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) {
9156 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
9157 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) {
9158 diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
9159 }
9160
9161 size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end);
9162 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
9163 } else {
9164 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
9165 pm_parser_err_token(parser, &parser->current, diag_id);
9166 }
9167
9168 // If we're lexing an embedded variable, then we need to pop back into the
9169 // parent lex context.
9170 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
9171 lex_mode_pop(parser);
9172 }
9173
9174 return type;
9175}
9176
9180static inline void
9181parser_lex_callback(pm_parser_t *parser) {
9182 if (parser->lex_callback) {
9183 parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current);
9184 }
9185}
9186
9190static inline pm_comment_t *
9191parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
9192 pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t));
9193 if (comment == NULL) return NULL;
9194
9195 *comment = (pm_comment_t) {
9196 .type = type,
9197 .location = TOK2LOC(parser, &parser->current)
9198 };
9199
9200 return comment;
9201}
9202
9208static pm_token_type_t
9209lex_embdoc(pm_parser_t *parser) {
9210 // First, lex out the EMBDOC_BEGIN token.
9211 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9212
9213 if (newline == NULL) {
9214 parser->current.end = parser->end;
9215 } else {
9216 pm_line_offset_list_append(&parser->line_offsets, U32(newline - parser->start + 1));
9217 parser->current.end = newline + 1;
9218 }
9219
9220 parser->current.type = PM_TOKEN_EMBDOC_BEGIN;
9221 parser_lex_callback(parser);
9222
9223 // Now, create a comment that is going to be attached to the parser.
9224 const uint8_t *comment_start = parser->current.start;
9225 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
9226 if (comment == NULL) return PM_TOKEN_EOF;
9227
9228 // Now, loop until we find the end of the embedded documentation or the end
9229 // of the file.
9230 while (parser->current.end + 4 <= parser->end) {
9231 parser->current.start = parser->current.end;
9232
9233 // If we've hit the end of the embedded documentation then we'll return
9234 // that token here.
9235 if (
9236 (memcmp(parser->current.end, "=end", 4) == 0) &&
9237 (
9238 (parser->current.end + 4 == parser->end) || // end of file
9239 pm_char_is_whitespace(parser->current.end[4]) || // whitespace
9240 (parser->current.end[4] == '\0') || // NUL or end of script
9241 (parser->current.end[4] == '\004') || // ^D
9242 (parser->current.end[4] == '\032') // ^Z
9243 )
9244 ) {
9245 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9246
9247 if (newline == NULL) {
9248 parser->current.end = parser->end;
9249 } else {
9250 pm_line_offset_list_append(&parser->line_offsets, U32(newline - parser->start + 1));
9251 parser->current.end = newline + 1;
9252 }
9253
9254 parser->current.type = PM_TOKEN_EMBDOC_END;
9255 parser_lex_callback(parser);
9256
9257 comment->location.length = (uint32_t) (parser->current.end - comment_start);
9258 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9259
9260 return PM_TOKEN_EMBDOC_END;
9261 }
9262
9263 // Otherwise, we'll parse until the end of the line and return a line of
9264 // embedded documentation.
9265 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9266
9267 if (newline == NULL) {
9268 parser->current.end = parser->end;
9269 } else {
9270 pm_line_offset_list_append(&parser->line_offsets, U32(newline - parser->start + 1));
9271 parser->current.end = newline + 1;
9272 }
9273
9274 parser->current.type = PM_TOKEN_EMBDOC_LINE;
9275 parser_lex_callback(parser);
9276 }
9277
9278 pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
9279
9280 comment->location.length = (uint32_t) (parser->current.end - comment_start);
9281 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9282
9283 return PM_TOKEN_EOF;
9284}
9285
9291static inline void
9292parser_lex_ignored_newline(pm_parser_t *parser) {
9293 parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
9294 parser_lex_callback(parser);
9295}
9296
9306static inline void
9307parser_flush_heredoc_end(pm_parser_t *parser) {
9308 assert(parser->heredoc_end <= parser->end);
9309 parser->next_start = parser->heredoc_end;
9310 parser->heredoc_end = NULL;
9311}
9312
9316static bool
9317parser_end_of_line_p(const pm_parser_t *parser) {
9318 const uint8_t *cursor = parser->current.end;
9319
9320 while (cursor < parser->end && *cursor != '\n' && *cursor != '#') {
9321 if (!pm_char_is_inline_whitespace(*cursor++)) return false;
9322 }
9323
9324 return true;
9325}
9326
9345typedef struct {
9351
9356 const uint8_t *cursor;
9358
9378
9382static inline void
9383pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
9384 pm_buffer_append_byte(&token_buffer->buffer, byte);
9385}
9386
9387static inline void
9388pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
9389 pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
9390}
9391
9395static inline size_t
9396parser_char_width(const pm_parser_t *parser) {
9397 size_t width;
9398 if (parser->encoding_changed) {
9399 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9400 } else {
9401 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
9402 }
9403
9404 // TODO: If the character is invalid in the given encoding, then we'll just
9405 // push one byte into the buffer. This should actually be an error.
9406 return (width == 0 ? 1 : width);
9407}
9408
9412static void
9413pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
9414 size_t width = parser_char_width(parser);
9415 pm_buffer_append_bytes(&token_buffer->buffer, parser->current.end, width);
9416 parser->current.end += width;
9417}
9418
9419static void
9420pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
9421 size_t width = parser_char_width(parser);
9422 const uint8_t *start = parser->current.end;
9423 pm_buffer_append_bytes(&token_buffer->base.buffer, start, width);
9424 pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, width);
9425 parser->current.end += width;
9426}
9427
9434static inline void
9435pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9436 // Copy buffer data into the arena and free the heap buffer.
9437 size_t len = pm_buffer_length(&token_buffer->buffer);
9438 void *arena_data = pm_arena_memdup(parser->arena, pm_buffer_value(&token_buffer->buffer), len, PRISM_ALIGNOF(uint8_t));
9439 pm_string_constant_init(&parser->current_string, (const char *) arena_data, len);
9440 pm_buffer_free(&token_buffer->buffer);
9441}
9442
9443static inline void
9444pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9445 pm_token_buffer_copy(parser, &token_buffer->base);
9446 pm_buffer_free(&token_buffer->regexp_buffer);
9447}
9448
9458static void
9459pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9460 if (token_buffer->cursor == NULL) {
9461 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
9462 } else {
9463 pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
9464 pm_token_buffer_copy(parser, token_buffer);
9465 }
9466}
9467
9468static void
9469pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9470 if (token_buffer->base.cursor == NULL) {
9471 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
9472 } else {
9473 const uint8_t *cursor = token_buffer->base.cursor;
9474 size_t length = (size_t) (parser->current.end - cursor);
9475 pm_buffer_append_bytes(&token_buffer->base.buffer, cursor, length);
9476 pm_buffer_append_bytes(&token_buffer->regexp_buffer, cursor, length);
9477 pm_regexp_token_buffer_copy(parser, token_buffer);
9478 }
9479}
9480
9481#define PM_TOKEN_BUFFER_DEFAULT_SIZE 16
9482
9491static void
9492pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9493 const uint8_t *start;
9494 if (token_buffer->cursor == NULL) {
9495 pm_buffer_init_capacity(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9496 start = parser->current.start;
9497 } else {
9498 start = token_buffer->cursor;
9499 }
9500
9501 const uint8_t *end = parser->current.end - 1;
9502 assert(end >= start);
9503 pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
9504
9505 token_buffer->cursor = end;
9506}
9507
9508static void
9509pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9510 const uint8_t *start;
9511 if (token_buffer->base.cursor == NULL) {
9512 pm_buffer_init_capacity(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9513 pm_buffer_init_capacity(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9514 start = parser->current.start;
9515 } else {
9516 start = token_buffer->base.cursor;
9517 }
9518
9519 const uint8_t *end = parser->current.end - 1;
9520 pm_buffer_append_bytes(&token_buffer->base.buffer, start, (size_t) (end - start));
9521 pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, (size_t) (end - start));
9522
9523 token_buffer->base.cursor = end;
9524}
9525
9526#undef PM_TOKEN_BUFFER_DEFAULT_SIZE
9527
9532static inline size_t
9533pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
9534 size_t whitespace = 0;
9535
9536 switch (indent) {
9537 case PM_HEREDOC_INDENT_NONE:
9538 // Do nothing, we can't match a terminator with
9539 // indentation and there's no need to calculate common
9540 // whitespace.
9541 break;
9542 case PM_HEREDOC_INDENT_DASH:
9543 // Skip past inline whitespace.
9544 *cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
9545 break;
9546 case PM_HEREDOC_INDENT_TILDE:
9547 // Skip past inline whitespace and calculate common
9548 // whitespace.
9549 while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
9550 if (**cursor == '\t') {
9551 whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
9552 } else {
9553 whitespace++;
9554 }
9555 (*cursor)++;
9556 }
9557
9558 break;
9559 }
9560
9561 return whitespace;
9562}
9563
9568static uint8_t
9569pm_lex_percent_delimiter(pm_parser_t *parser) {
9570 size_t eol_length = match_eol(parser);
9571
9572 if (eol_length) {
9573 if (parser->heredoc_end) {
9574 // If we have already lexed a heredoc, then the newline has already
9575 // been added to the list. In this case we want to just flush the
9576 // heredoc end.
9577 parser_flush_heredoc_end(parser);
9578 } else {
9579 // Otherwise, we'll add the newline to the list of newlines.
9580 pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + U32(eol_length));
9581 }
9582
9583 uint8_t delimiter = *parser->current.end;
9584
9585 // If our delimiter is \r\n, we want to treat it as if it's \n.
9586 // For example, %\r\nfoo\r\n should be "foo"
9587 if (eol_length == 2) {
9588 delimiter = *(parser->current.end + 1);
9589 }
9590
9591 parser->current.end += eol_length;
9592 return delimiter;
9593 }
9594
9595 return *parser->current.end++;
9596}
9597
9602#define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
9603
9610static void
9611parser_lex(pm_parser_t *parser) {
9612 assert(parser->current.end <= parser->end);
9613 parser->previous = parser->current;
9614
9615 // This value mirrors cmd_state from CRuby.
9616 bool previous_command_start = parser->command_start;
9617 parser->command_start = false;
9618
9619 // This is used to communicate to the newline lexing function that we've
9620 // already seen a comment.
9621 bool lexed_comment = false;
9622
9623 // Here we cache the current value of the semantic token seen flag. This is
9624 // used to reset it in case we find a token that shouldn't flip this flag.
9625 unsigned int semantic_token_seen = parser->semantic_token_seen;
9626 parser->semantic_token_seen = true;
9627
9628 // We'll jump to this label when we are about to encounter an EOF.
9629 // If we still have lex_modes on the stack, we pop them so that cleanup
9630 // can happen. For example, we should still continue parsing after a heredoc
9631 // identifier, even if the heredoc body was syntax invalid.
9632 switch_lex_modes:
9633
9634 switch (parser->lex_modes.current->mode) {
9635 case PM_LEX_DEFAULT:
9636 case PM_LEX_EMBEXPR:
9637 case PM_LEX_EMBVAR:
9638
9639 // We have a specific named label here because we are going to jump back to
9640 // this location in the event that we have lexed a token that should not be
9641 // returned to the parser. This includes comments, ignored newlines, and
9642 // invalid tokens of some form.
9643 lex_next_token: {
9644 // If we have the special next_start pointer set, then we're going to jump
9645 // to that location and start lexing from there.
9646 if (parser->next_start != NULL) {
9647 parser->current.end = parser->next_start;
9648 parser->next_start = NULL;
9649 }
9650
9651 // This value mirrors space_seen from CRuby. It tracks whether or not
9652 // space has been eaten before the start of the next token.
9653 bool space_seen = false;
9654
9655 // First, we're going to skip past any whitespace at the front of the next
9656 // token.
9657 bool chomping = true;
9658 while (parser->current.end < parser->end && chomping) {
9659 switch (*parser->current.end) {
9660 case ' ':
9661 case '\t':
9662 case '\f':
9663 case '\v':
9664 parser->current.end++;
9665 space_seen = true;
9666 break;
9667 case '\r':
9668 if (match_eol_offset(parser, 1)) {
9669 chomping = false;
9670 } else {
9671 pm_parser_warn(parser, PM_TOKEN_END(parser, &parser->current), 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
9672 parser->current.end++;
9673 space_seen = true;
9674 }
9675 break;
9676 case '\\': {
9677 size_t eol_length = match_eol_offset(parser, 1);
9678 if (eol_length) {
9679 if (parser->heredoc_end) {
9680 parser->current.end = parser->heredoc_end;
9681 parser->heredoc_end = NULL;
9682 } else {
9683 parser->current.end += eol_length + 1;
9684 pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
9685 space_seen = true;
9686 }
9687 } else if (pm_char_is_inline_whitespace(*parser->current.end)) {
9688 parser->current.end += 2;
9689 } else {
9690 chomping = false;
9691 }
9692
9693 break;
9694 }
9695 default:
9696 chomping = false;
9697 break;
9698 }
9699 }
9700
9701 // Next, we'll set to start of this token to be the current end.
9702 parser->current.start = parser->current.end;
9703
9704 // We'll check if we're at the end of the file. If we are, then we
9705 // need to return the EOF token.
9706 if (parser->current.end >= parser->end) {
9707 // We may be missing closing tokens. We should pop modes one by one
9708 // to do the appropriate cleanup like moving next_start for heredocs.
9709 // Only when no mode is remaining will we actually emit the EOF token.
9710 if (parser->lex_modes.current->mode != PM_LEX_DEFAULT) {
9711 lex_mode_pop(parser);
9712 goto switch_lex_modes;
9713 }
9714
9715 // If we hit EOF, but the EOF came immediately after a newline,
9716 // set the start of the token to the newline. This way any EOF
9717 // errors will be reported as happening on that line rather than
9718 // a line after. For example "foo(\n" should report an error
9719 // on line 1 even though EOF technically occurs on line 2.
9720 if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
9721 parser->current.start -= 1;
9722 }
9723 LEX(PM_TOKEN_EOF);
9724 }
9725
9726 // Finally, we'll check the current character to determine the next
9727 // token.
9728 switch (*parser->current.end++) {
9729 case '\0': // NUL or end of script
9730 case '\004': // ^D
9731 case '\032': // ^Z
9732 parser->current.end--;
9733 LEX(PM_TOKEN_EOF);
9734
9735 case '#': { // comments
9736 const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
9737 parser->current.end = ending == NULL ? parser->end : ending;
9738
9739 // If we found a comment while lexing, then we're going to
9740 // add it to the list of comments in the file and keep
9741 // lexing.
9742 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_INLINE);
9743 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9744
9745 if (ending) parser->current.end++;
9746 parser->current.type = PM_TOKEN_COMMENT;
9747 parser_lex_callback(parser);
9748
9749 // Here, parse the comment to see if it's a magic comment
9750 // and potentially change state on the parser.
9751 if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
9752 ptrdiff_t length = parser->current.end - parser->current.start;
9753
9754 // If we didn't find a magic comment within the first
9755 // pass and we're at the start of the file, then we need
9756 // to do another pass to potentially find other patterns
9757 // for encoding comments.
9758 if (length >= 10 && !parser->encoding_locked) {
9759 parser_lex_magic_comment_encoding(parser);
9760 }
9761 }
9762
9763 lexed_comment = true;
9764 }
9766 case '\r':
9767 case '\n': {
9768 parser->semantic_token_seen = semantic_token_seen & 0x1;
9769 size_t eol_length = match_eol_at(parser, parser->current.end - 1);
9770
9771 if (eol_length) {
9772 // The only way you can have carriage returns in this
9773 // particular loop is if you have a carriage return
9774 // followed by a newline. In that case we'll just skip
9775 // over the carriage return and continue lexing, in
9776 // order to make it so that the newline token
9777 // encapsulates both the carriage return and the
9778 // newline. Note that we need to check that we haven't
9779 // already lexed a comment here because that falls
9780 // through into here as well.
9781 if (!lexed_comment) {
9782 parser->current.end += eol_length - 1; // skip CR
9783 }
9784
9785 if (parser->heredoc_end == NULL) {
9786 pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
9787 }
9788 }
9789
9790 if (parser->heredoc_end) {
9791 parser_flush_heredoc_end(parser);
9792 }
9793
9794 // If this is an ignored newline, then we can continue lexing after
9795 // calling the callback with the ignored newline token.
9796 switch (lex_state_ignored_p(parser)) {
9797 case PM_IGNORED_NEWLINE_NONE:
9798 break;
9799 case PM_IGNORED_NEWLINE_PATTERN:
9800 if (parser->pattern_matching_newlines || parser->in_keyword_arg) {
9801 if (!lexed_comment) parser_lex_ignored_newline(parser);
9802 lex_state_set(parser, PM_LEX_STATE_BEG);
9803 parser->command_start = true;
9804 parser->current.type = PM_TOKEN_NEWLINE;
9805 return;
9806 }
9808 case PM_IGNORED_NEWLINE_ALL:
9809 if (!lexed_comment) parser_lex_ignored_newline(parser);
9810 lexed_comment = false;
9811 goto lex_next_token;
9812 }
9813
9814 // Here we need to look ahead and see if there is a call operator
9815 // (either . or &.) that starts the next line. If there is, then this
9816 // is going to become an ignored newline and we're going to instead
9817 // return the call operator.
9818 const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
9819 next_content += pm_strspn_inline_whitespace(next_content, parser->end - next_content);
9820
9821 if (next_content < parser->end) {
9822 // If we hit a comment after a newline, then we're going to check
9823 // if it's ignored or if it's followed by a method call ('.').
9824 // If it is, then we're going to call the
9825 // callback with an ignored newline and then continue lexing.
9826 // Otherwise we'll return a regular newline.
9827 if (next_content[0] == '#') {
9828 // Here we look for a "." or "&." following a "\n".
9829 const uint8_t *following = next_newline(next_content, parser->end - next_content);
9830
9831 while (following && (following + 1 < parser->end)) {
9832 following++;
9833 following += pm_strspn_inline_whitespace(following, parser->end - following);
9834
9835 // If this is not followed by a comment, then we can break out
9836 // of this loop.
9837 if (peek_at(parser, following) != '#') break;
9838
9839 // If there is a comment, then we need to find the end of the
9840 // comment and continue searching from there.
9841 following = next_newline(following, parser->end - following);
9842 }
9843
9844 // If the lex state was ignored, we will lex the
9845 // ignored newline.
9846 if (lex_state_ignored_p(parser)) {
9847 if (!lexed_comment) parser_lex_ignored_newline(parser);
9848 lexed_comment = false;
9849 goto lex_next_token;
9850 }
9851
9852 // If we hit a '.' or a '&.' we will lex the ignored
9853 // newline.
9854 if (following && (
9855 (peek_at(parser, following) == '.') ||
9856 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
9857 )) {
9858 if (!lexed_comment) parser_lex_ignored_newline(parser);
9859 lexed_comment = false;
9860 goto lex_next_token;
9861 }
9862
9863
9864 // If we are parsing as CRuby 4.0 or later and we
9865 // hit a '&&' or a '||' then we will lex the ignored
9866 // newline.
9867 if (
9869 following && (
9870 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '&') ||
9871 (peek_at(parser, following) == '|' && peek_at(parser, following + 1) == '|') ||
9872 (
9873 peek_at(parser, following) == 'a' &&
9874 peek_at(parser, following + 1) == 'n' &&
9875 peek_at(parser, following + 2) == 'd' &&
9876 peek_at(parser, next_content + 3) != '!' &&
9877 peek_at(parser, next_content + 3) != '?' &&
9878 !char_is_identifier(parser, following + 3, parser->end - (following + 3))
9879 ) ||
9880 (
9881 peek_at(parser, following) == 'o' &&
9882 peek_at(parser, following + 1) == 'r' &&
9883 peek_at(parser, next_content + 2) != '!' &&
9884 peek_at(parser, next_content + 2) != '?' &&
9885 !char_is_identifier(parser, following + 2, parser->end - (following + 2))
9886 )
9887 )
9888 ) {
9889 if (!lexed_comment) parser_lex_ignored_newline(parser);
9890 lexed_comment = false;
9891 goto lex_next_token;
9892 }
9893 }
9894
9895 // If we hit a . after a newline, then we're in a call chain and
9896 // we need to return the call operator.
9897 if (next_content[0] == '.') {
9898 // To match ripper, we need to emit an ignored newline even though
9899 // it's a real newline in the case that we have a beginless range
9900 // on a subsequent line.
9901 if (peek_at(parser, next_content + 1) == '.') {
9902 if (!lexed_comment) parser_lex_ignored_newline(parser);
9903 lex_state_set(parser, PM_LEX_STATE_BEG);
9904 parser->command_start = true;
9905 parser->current.type = PM_TOKEN_NEWLINE;
9906 return;
9907 }
9908
9909 if (!lexed_comment) parser_lex_ignored_newline(parser);
9910 lex_state_set(parser, PM_LEX_STATE_DOT);
9911 parser->current.start = next_content;
9912 parser->current.end = next_content + 1;
9913 parser->next_start = NULL;
9914 LEX(PM_TOKEN_DOT);
9915 }
9916
9917 // If we hit a &. after a newline, then we're in a call chain and
9918 // we need to return the call operator.
9919 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
9920 if (!lexed_comment) parser_lex_ignored_newline(parser);
9921 lex_state_set(parser, PM_LEX_STATE_DOT);
9922 parser->current.start = next_content;
9923 parser->current.end = next_content + 2;
9924 parser->next_start = NULL;
9925 LEX(PM_TOKEN_AMPERSAND_DOT);
9926 }
9927
9928 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
9929 // If we hit an && then we are in a logical chain
9930 // and we need to return the logical operator.
9931 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '&') {
9932 if (!lexed_comment) parser_lex_ignored_newline(parser);
9933 lex_state_set(parser, PM_LEX_STATE_BEG);
9934 parser->current.start = next_content;
9935 parser->current.end = next_content + 2;
9936 parser->next_start = NULL;
9937 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
9938 }
9939
9940 // If we hit a || then we are in a logical chain and
9941 // we need to return the logical operator.
9942 if (peek_at(parser, next_content) == '|' && peek_at(parser, next_content + 1) == '|') {
9943 if (!lexed_comment) parser_lex_ignored_newline(parser);
9944 lex_state_set(parser, PM_LEX_STATE_BEG);
9945 parser->current.start = next_content;
9946 parser->current.end = next_content + 2;
9947 parser->next_start = NULL;
9948 LEX(PM_TOKEN_PIPE_PIPE);
9949 }
9950
9951 // If we hit an 'and' then we are in a logical chain
9952 // and we need to return the logical operator.
9953 if (
9954 peek_at(parser, next_content) == 'a' &&
9955 peek_at(parser, next_content + 1) == 'n' &&
9956 peek_at(parser, next_content + 2) == 'd' &&
9957 peek_at(parser, next_content + 3) != '!' &&
9958 peek_at(parser, next_content + 3) != '?' &&
9959 !char_is_identifier(parser, next_content + 3, parser->end - (next_content + 3))
9960 ) {
9961 if (!lexed_comment) parser_lex_ignored_newline(parser);
9962 lex_state_set(parser, PM_LEX_STATE_BEG);
9963 parser->current.start = next_content;
9964 parser->current.end = next_content + 3;
9965 parser->next_start = NULL;
9966 parser->command_start = true;
9967 LEX(PM_TOKEN_KEYWORD_AND);
9968 }
9969
9970 // If we hit a 'or' then we are in a logical chain
9971 // and we need to return the logical operator.
9972 if (
9973 peek_at(parser, next_content) == 'o' &&
9974 peek_at(parser, next_content + 1) == 'r' &&
9975 peek_at(parser, next_content + 2) != '!' &&
9976 peek_at(parser, next_content + 2) != '?' &&
9977 !char_is_identifier(parser, next_content + 2, parser->end - (next_content + 2))
9978 ) {
9979 if (!lexed_comment) parser_lex_ignored_newline(parser);
9980 lex_state_set(parser, PM_LEX_STATE_BEG);
9981 parser->current.start = next_content;
9982 parser->current.end = next_content + 2;
9983 parser->next_start = NULL;
9984 parser->command_start = true;
9985 LEX(PM_TOKEN_KEYWORD_OR);
9986 }
9987 }
9988 }
9989
9990 // At this point we know this is a regular newline, and we can set the
9991 // necessary state and return the token.
9992 lex_state_set(parser, PM_LEX_STATE_BEG);
9993 parser->command_start = true;
9994 parser->current.type = PM_TOKEN_NEWLINE;
9995 if (!lexed_comment) parser_lex_callback(parser);
9996 return;
9997 }
9998
9999 // ,
10000 case ',':
10001 if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
10002 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
10003 }
10004
10005 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10006 LEX(PM_TOKEN_COMMA);
10007
10008 // (
10009 case '(': {
10010 pm_token_type_t type = PM_TOKEN_PARENTHESIS_LEFT;
10011
10012 if (space_seen && (lex_state_arg_p(parser) || parser->lex_state == (PM_LEX_STATE_END | PM_LEX_STATE_LABEL))) {
10013 type = PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES;
10014 }
10015
10016 parser->enclosure_nesting++;
10017 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10018 pm_do_loop_stack_push(parser, false);
10019 LEX(type);
10020 }
10021
10022 // )
10023 case ')':
10024 parser->enclosure_nesting--;
10025 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10026 pm_do_loop_stack_pop(parser);
10027 LEX(PM_TOKEN_PARENTHESIS_RIGHT);
10028
10029 // ;
10030 case ';':
10031 lex_state_set(parser, PM_LEX_STATE_BEG);
10032 parser->command_start = true;
10033 LEX(PM_TOKEN_SEMICOLON);
10034
10035 // [ [] []=
10036 case '[':
10037 parser->enclosure_nesting++;
10038 pm_token_type_t type = PM_TOKEN_BRACKET_LEFT;
10039
10040 if (lex_state_operator_p(parser)) {
10041 if (match(parser, ']')) {
10042 parser->enclosure_nesting--;
10043 lex_state_set(parser, PM_LEX_STATE_ARG);
10044 LEX(match(parser, '=') ? PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL : PM_TOKEN_BRACKET_LEFT_RIGHT);
10045 }
10046
10047 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABEL);
10048 LEX(type);
10049 }
10050
10051 if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, PM_LEX_STATE_LABELED)))) {
10052 type = PM_TOKEN_BRACKET_LEFT_ARRAY;
10053 }
10054
10055 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10056 pm_do_loop_stack_push(parser, false);
10057 LEX(type);
10058
10059 // ]
10060 case ']':
10061 parser->enclosure_nesting--;
10062 lex_state_set(parser, PM_LEX_STATE_END);
10063 pm_do_loop_stack_pop(parser);
10064 LEX(PM_TOKEN_BRACKET_RIGHT);
10065
10066 // {
10067 case '{': {
10068 pm_token_type_t type = PM_TOKEN_BRACE_LEFT;
10069
10070 if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
10071 // This { begins a lambda
10072 parser->command_start = true;
10073 lex_state_set(parser, PM_LEX_STATE_BEG);
10074 type = PM_TOKEN_LAMBDA_BEGIN;
10075 } else if (lex_state_p(parser, PM_LEX_STATE_LABELED)) {
10076 // This { begins a hash literal
10077 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10078 } else if (lex_state_p(parser, PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_END | PM_LEX_STATE_ENDFN)) {
10079 // This { begins a block
10080 parser->command_start = true;
10081 lex_state_set(parser, PM_LEX_STATE_BEG);
10082 } else if (lex_state_p(parser, PM_LEX_STATE_ENDARG)) {
10083 // This { begins a block on a command
10084 parser->command_start = true;
10085 lex_state_set(parser, PM_LEX_STATE_BEG);
10086 } else {
10087 // This { begins a hash literal
10088 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10089 }
10090
10091 parser->enclosure_nesting++;
10092 parser->brace_nesting++;
10093 pm_do_loop_stack_push(parser, false);
10094
10095 LEX(type);
10096 }
10097
10098 // }
10099 case '}':
10100 parser->enclosure_nesting--;
10101 pm_do_loop_stack_pop(parser);
10102
10103 if ((parser->lex_modes.current->mode == PM_LEX_EMBEXPR) && (parser->brace_nesting == 0)) {
10104 lex_mode_pop(parser);
10105 LEX(PM_TOKEN_EMBEXPR_END);
10106 }
10107
10108 parser->brace_nesting--;
10109 lex_state_set(parser, PM_LEX_STATE_END);
10110 LEX(PM_TOKEN_BRACE_RIGHT);
10111
10112 // * ** **= *=
10113 case '*': {
10114 if (match(parser, '*')) {
10115 if (match(parser, '=')) {
10116 lex_state_set(parser, PM_LEX_STATE_BEG);
10117 LEX(PM_TOKEN_STAR_STAR_EQUAL);
10118 }
10119
10120 pm_token_type_t type = PM_TOKEN_STAR_STAR;
10121
10122 if (lex_state_spcarg_p(parser, space_seen)) {
10123 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR);
10124 type = PM_TOKEN_USTAR_STAR;
10125 } else if (lex_state_beg_p(parser)) {
10126 type = PM_TOKEN_USTAR_STAR;
10127 } else if (ambiguous_operator_p(parser, space_seen)) {
10128 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
10129 }
10130
10131 if (lex_state_operator_p(parser)) {
10132 lex_state_set(parser, PM_LEX_STATE_ARG);
10133 } else {
10134 lex_state_set(parser, PM_LEX_STATE_BEG);
10135 }
10136
10137 LEX(type);
10138 }
10139
10140 if (match(parser, '=')) {
10141 lex_state_set(parser, PM_LEX_STATE_BEG);
10142 LEX(PM_TOKEN_STAR_EQUAL);
10143 }
10144
10145 pm_token_type_t type = PM_TOKEN_STAR;
10146
10147 if (lex_state_spcarg_p(parser, space_seen)) {
10148 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
10149 type = PM_TOKEN_USTAR;
10150 } else if (lex_state_beg_p(parser)) {
10151 type = PM_TOKEN_USTAR;
10152 } else if (ambiguous_operator_p(parser, space_seen)) {
10153 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
10154 }
10155
10156 if (lex_state_operator_p(parser)) {
10157 lex_state_set(parser, PM_LEX_STATE_ARG);
10158 } else {
10159 lex_state_set(parser, PM_LEX_STATE_BEG);
10160 }
10161
10162 LEX(type);
10163 }
10164
10165 // ! != !~ !@
10166 case '!':
10167 if (lex_state_operator_p(parser)) {
10168 lex_state_set(parser, PM_LEX_STATE_ARG);
10169 if (match(parser, '@')) {
10170 LEX(PM_TOKEN_BANG);
10171 }
10172 } else {
10173 lex_state_set(parser, PM_LEX_STATE_BEG);
10174 }
10175
10176 if (match(parser, '=')) {
10177 LEX(PM_TOKEN_BANG_EQUAL);
10178 }
10179
10180 if (match(parser, '~')) {
10181 LEX(PM_TOKEN_BANG_TILDE);
10182 }
10183
10184 LEX(PM_TOKEN_BANG);
10185
10186 // = => =~ == === =begin
10187 case '=':
10188 if (
10189 current_token_starts_line(parser) &&
10190 (parser->current.end + 5 <= parser->end) &&
10191 memcmp(parser->current.end, "begin", 5) == 0 &&
10192 (pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) == '\0'))
10193 ) {
10194 pm_token_type_t type = lex_embdoc(parser);
10195 if (type == PM_TOKEN_EOF) {
10196 LEX(type);
10197 }
10198
10199 goto lex_next_token;
10200 }
10201
10202 if (lex_state_operator_p(parser)) {
10203 lex_state_set(parser, PM_LEX_STATE_ARG);
10204 } else {
10205 lex_state_set(parser, PM_LEX_STATE_BEG);
10206 }
10207
10208 if (match(parser, '>')) {
10209 LEX(PM_TOKEN_EQUAL_GREATER);
10210 }
10211
10212 if (match(parser, '~')) {
10213 LEX(PM_TOKEN_EQUAL_TILDE);
10214 }
10215
10216 if (match(parser, '=')) {
10217 LEX(match(parser, '=') ? PM_TOKEN_EQUAL_EQUAL_EQUAL : PM_TOKEN_EQUAL_EQUAL);
10218 }
10219
10220 LEX(PM_TOKEN_EQUAL);
10221
10222 // < << <<= <= <=>
10223 case '<':
10224 if (match(parser, '<')) {
10225 if (
10226 !lex_state_p(parser, PM_LEX_STATE_DOT | PM_LEX_STATE_CLASS) &&
10227 !lex_state_end_p(parser) &&
10228 (!lex_state_p(parser, PM_LEX_STATE_ARG_ANY) || lex_state_p(parser, PM_LEX_STATE_LABELED) || space_seen)
10229 ) {
10230 const uint8_t *end = parser->current.end;
10231
10232 pm_heredoc_quote_t quote = PM_HEREDOC_QUOTE_NONE;
10233 pm_heredoc_indent_t indent = PM_HEREDOC_INDENT_NONE;
10234
10235 if (match(parser, '-')) {
10236 indent = PM_HEREDOC_INDENT_DASH;
10237 }
10238 else if (match(parser, '~')) {
10239 indent = PM_HEREDOC_INDENT_TILDE;
10240 }
10241
10242 if (match(parser, '`')) {
10243 quote = PM_HEREDOC_QUOTE_BACKTICK;
10244 }
10245 else if (match(parser, '"')) {
10246 quote = PM_HEREDOC_QUOTE_DOUBLE;
10247 }
10248 else if (match(parser, '\'')) {
10249 quote = PM_HEREDOC_QUOTE_SINGLE;
10250 }
10251
10252 const uint8_t *ident_start = parser->current.end;
10253 size_t width = 0;
10254
10255 if (parser->current.end >= parser->end) {
10256 parser->current.end = end;
10257 } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) == 0) {
10258 parser->current.end = end;
10259 } else {
10260 if (quote == PM_HEREDOC_QUOTE_NONE) {
10261 parser->current.end += width;
10262
10263 while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end))) {
10264 parser->current.end += width;
10265 }
10266 } else {
10267 // If we have quotes, then we're going to go until we find the
10268 // end quote.
10269 while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
10270 if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
10271 parser->current.end++;
10272 }
10273 }
10274
10275 size_t ident_length = (size_t) (parser->current.end - ident_start);
10276 bool ident_error = false;
10277
10278 if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
10279 pm_parser_err(parser, U32(ident_start - parser->start), U32(ident_length), PM_ERR_HEREDOC_IDENTIFIER);
10280 ident_error = true;
10281 }
10282
10283 parser->explicit_encoding = NULL;
10284 lex_mode_push(parser, (pm_lex_mode_t) {
10285 .mode = PM_LEX_HEREDOC,
10286 .as.heredoc = {
10287 .base = {
10288 .ident_start = ident_start,
10289 .ident_length = ident_length,
10290 .quote = quote,
10291 .indent = indent
10292 },
10293 .next_start = parser->current.end,
10294 .common_whitespace = NULL,
10295 .line_continuation = false
10296 }
10297 });
10298
10299 if (parser->heredoc_end == NULL) {
10300 const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
10301
10302 if (body_start == NULL) {
10303 // If there is no newline after the heredoc identifier, then
10304 // this is not a valid heredoc declaration. In this case we
10305 // will add an error, but we will still return a heredoc
10306 // start.
10307 if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
10308 body_start = parser->end;
10309 } else {
10310 // Otherwise, we want to indicate that the body of the
10311 // heredoc starts on the character after the next newline.
10312 pm_line_offset_list_append(&parser->line_offsets, U32(body_start - parser->start + 1));
10313 body_start++;
10314 }
10315
10316 parser->next_start = body_start;
10317 } else {
10318 parser->next_start = parser->heredoc_end;
10319 }
10320
10321 LEX(PM_TOKEN_HEREDOC_START);
10322 }
10323 }
10324
10325 if (match(parser, '=')) {
10326 lex_state_set(parser, PM_LEX_STATE_BEG);
10327 LEX(PM_TOKEN_LESS_LESS_EQUAL);
10328 }
10329
10330 if (ambiguous_operator_p(parser, space_seen)) {
10331 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
10332 }
10333
10334 if (lex_state_operator_p(parser)) {
10335 lex_state_set(parser, PM_LEX_STATE_ARG);
10336 } else {
10337 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
10338 lex_state_set(parser, PM_LEX_STATE_BEG);
10339 }
10340
10341 LEX(PM_TOKEN_LESS_LESS);
10342 }
10343
10344 if (lex_state_operator_p(parser)) {
10345 lex_state_set(parser, PM_LEX_STATE_ARG);
10346 } else {
10347 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
10348 lex_state_set(parser, PM_LEX_STATE_BEG);
10349 }
10350
10351 if (match(parser, '=')) {
10352 if (match(parser, '>')) {
10353 LEX(PM_TOKEN_LESS_EQUAL_GREATER);
10354 }
10355
10356 LEX(PM_TOKEN_LESS_EQUAL);
10357 }
10358
10359 LEX(PM_TOKEN_LESS);
10360
10361 // > >> >>= >=
10362 case '>':
10363 if (match(parser, '>')) {
10364 if (lex_state_operator_p(parser)) {
10365 lex_state_set(parser, PM_LEX_STATE_ARG);
10366 } else {
10367 lex_state_set(parser, PM_LEX_STATE_BEG);
10368 }
10369 LEX(match(parser, '=') ? PM_TOKEN_GREATER_GREATER_EQUAL : PM_TOKEN_GREATER_GREATER);
10370 }
10371
10372 if (lex_state_operator_p(parser)) {
10373 lex_state_set(parser, PM_LEX_STATE_ARG);
10374 } else {
10375 lex_state_set(parser, PM_LEX_STATE_BEG);
10376 }
10377
10378 LEX(match(parser, '=') ? PM_TOKEN_GREATER_EQUAL : PM_TOKEN_GREATER);
10379
10380 // double-quoted string literal
10381 case '"': {
10382 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
10383 lex_mode_push_string(parser, true, label_allowed, '\0', '"');
10384 LEX(PM_TOKEN_STRING_BEGIN);
10385 }
10386
10387 // xstring literal
10388 case '`': {
10389 if (lex_state_p(parser, PM_LEX_STATE_FNAME)) {
10390 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10391 LEX(PM_TOKEN_BACKTICK);
10392 }
10393
10394 if (lex_state_p(parser, PM_LEX_STATE_DOT)) {
10395 if (previous_command_start) {
10396 lex_state_set(parser, PM_LEX_STATE_CMDARG);
10397 } else {
10398 lex_state_set(parser, PM_LEX_STATE_ARG);
10399 }
10400
10401 LEX(PM_TOKEN_BACKTICK);
10402 }
10403
10404 lex_mode_push_string(parser, true, false, '\0', '`');
10405 LEX(PM_TOKEN_BACKTICK);
10406 }
10407
10408 // single-quoted string literal
10409 case '\'': {
10410 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
10411 lex_mode_push_string(parser, false, label_allowed, '\0', '\'');
10412 LEX(PM_TOKEN_STRING_BEGIN);
10413 }
10414
10415 // ? character literal
10416 case '?':
10417 LEX(lex_question_mark(parser));
10418
10419 // & && &&= &=
10420 case '&': {
10421 if (match(parser, '&')) {
10422 lex_state_set(parser, PM_LEX_STATE_BEG);
10423
10424 if (match(parser, '=')) {
10425 LEX(PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
10426 }
10427
10428 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
10429 }
10430
10431 if (match(parser, '=')) {
10432 lex_state_set(parser, PM_LEX_STATE_BEG);
10433 LEX(PM_TOKEN_AMPERSAND_EQUAL);
10434 }
10435
10436 if (match(parser, '.')) {
10437 lex_state_set(parser, PM_LEX_STATE_DOT);
10438 LEX(PM_TOKEN_AMPERSAND_DOT);
10439 }
10440
10441 pm_token_type_t type = PM_TOKEN_AMPERSAND;
10442 if (lex_state_spcarg_p(parser, space_seen)) {
10443 if ((peek(parser) != ':') || (peek_offset(parser, 1) == '\0')) {
10444 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
10445 } else {
10446 const uint8_t delim = peek_offset(parser, 1);
10447
10448 if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1, parser->end - (parser->current.end + 1))) {
10449 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
10450 }
10451 }
10452
10453 type = PM_TOKEN_UAMPERSAND;
10454 } else if (lex_state_beg_p(parser)) {
10455 type = PM_TOKEN_UAMPERSAND;
10456 } else if (ambiguous_operator_p(parser, space_seen)) {
10457 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
10458 }
10459
10460 if (lex_state_operator_p(parser)) {
10461 lex_state_set(parser, PM_LEX_STATE_ARG);
10462 } else {
10463 lex_state_set(parser, PM_LEX_STATE_BEG);
10464 }
10465
10466 LEX(type);
10467 }
10468
10469 // | || ||= |=
10470 case '|':
10471 if (match(parser, '|')) {
10472 if (match(parser, '=')) {
10473 lex_state_set(parser, PM_LEX_STATE_BEG);
10474 LEX(PM_TOKEN_PIPE_PIPE_EQUAL);
10475 }
10476
10477 if (lex_state_p(parser, PM_LEX_STATE_BEG)) {
10478 parser->current.end--;
10479 LEX(PM_TOKEN_PIPE);
10480 }
10481
10482 lex_state_set(parser, PM_LEX_STATE_BEG);
10483 LEX(PM_TOKEN_PIPE_PIPE);
10484 }
10485
10486 if (match(parser, '=')) {
10487 lex_state_set(parser, PM_LEX_STATE_BEG);
10488 LEX(PM_TOKEN_PIPE_EQUAL);
10489 }
10490
10491 if (lex_state_operator_p(parser)) {
10492 lex_state_set(parser, PM_LEX_STATE_ARG);
10493 } else {
10494 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10495 }
10496
10497 LEX(PM_TOKEN_PIPE);
10498
10499 // + += +@
10500 case '+': {
10501 if (lex_state_operator_p(parser)) {
10502 lex_state_set(parser, PM_LEX_STATE_ARG);
10503
10504 if (match(parser, '@')) {
10505 LEX(PM_TOKEN_UPLUS);
10506 }
10507
10508 LEX(PM_TOKEN_PLUS);
10509 }
10510
10511 if (match(parser, '=')) {
10512 lex_state_set(parser, PM_LEX_STATE_BEG);
10513 LEX(PM_TOKEN_PLUS_EQUAL);
10514 }
10515
10516 if (
10517 lex_state_beg_p(parser) ||
10518 (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
10519 ) {
10520 lex_state_set(parser, PM_LEX_STATE_BEG);
10521
10522 if (pm_char_is_decimal_digit(peek(parser))) {
10523 parser->current.end++;
10524 pm_token_type_t type = lex_numeric(parser);
10525 lex_state_set(parser, PM_LEX_STATE_END);
10526 LEX(type);
10527 }
10528
10529 LEX(PM_TOKEN_UPLUS);
10530 }
10531
10532 if (ambiguous_operator_p(parser, space_seen)) {
10533 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
10534 }
10535
10536 lex_state_set(parser, PM_LEX_STATE_BEG);
10537 LEX(PM_TOKEN_PLUS);
10538 }
10539
10540 // - -= -@
10541 case '-': {
10542 if (lex_state_operator_p(parser)) {
10543 lex_state_set(parser, PM_LEX_STATE_ARG);
10544
10545 if (match(parser, '@')) {
10546 LEX(PM_TOKEN_UMINUS);
10547 }
10548
10549 LEX(PM_TOKEN_MINUS);
10550 }
10551
10552 if (match(parser, '=')) {
10553 lex_state_set(parser, PM_LEX_STATE_BEG);
10554 LEX(PM_TOKEN_MINUS_EQUAL);
10555 }
10556
10557 if (match(parser, '>')) {
10558 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10559 LEX(PM_TOKEN_MINUS_GREATER);
10560 }
10561
10562 bool spcarg = lex_state_spcarg_p(parser, space_seen);
10563 bool is_beg = lex_state_beg_p(parser);
10564 if (!is_beg && spcarg) {
10565 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
10566 }
10567
10568 if (is_beg || spcarg) {
10569 lex_state_set(parser, PM_LEX_STATE_BEG);
10570 LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
10571 }
10572
10573 if (ambiguous_operator_p(parser, space_seen)) {
10574 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
10575 }
10576
10577 lex_state_set(parser, PM_LEX_STATE_BEG);
10578 LEX(PM_TOKEN_MINUS);
10579 }
10580
10581 // . .. ...
10582 case '.': {
10583 bool beg_p = lex_state_beg_p(parser);
10584
10585 if (match(parser, '.')) {
10586 if (match(parser, '.')) {
10587 // If we're _not_ inside a range within default parameters
10588 if (!context_p(parser, PM_CONTEXT_DEFAULT_PARAMS) && context_p(parser, PM_CONTEXT_DEF_PARAMS)) {
10589 if (lex_state_p(parser, PM_LEX_STATE_END)) {
10590 lex_state_set(parser, PM_LEX_STATE_BEG);
10591 } else {
10592 lex_state_set(parser, PM_LEX_STATE_ENDARG);
10593 }
10594 LEX(PM_TOKEN_UDOT_DOT_DOT);
10595 }
10596
10597 if (parser->enclosure_nesting == 0 && parser_end_of_line_p(parser)) {
10598 pm_parser_warn_token(parser, &parser->current, PM_WARN_DOT_DOT_DOT_EOL);
10599 }
10600
10601 lex_state_set(parser, PM_LEX_STATE_BEG);
10602 LEX(beg_p ? PM_TOKEN_UDOT_DOT_DOT : PM_TOKEN_DOT_DOT_DOT);
10603 }
10604
10605 lex_state_set(parser, PM_LEX_STATE_BEG);
10606 LEX(beg_p ? PM_TOKEN_UDOT_DOT : PM_TOKEN_DOT_DOT);
10607 }
10608
10609 lex_state_set(parser, PM_LEX_STATE_DOT);
10610 LEX(PM_TOKEN_DOT);
10611 }
10612
10613 // integer
10614 case '0':
10615 case '1':
10616 case '2':
10617 case '3':
10618 case '4':
10619 case '5':
10620 case '6':
10621 case '7':
10622 case '8':
10623 case '9': {
10624 pm_token_type_t type = lex_numeric(parser);
10625 lex_state_set(parser, PM_LEX_STATE_END);
10626 LEX(type);
10627 }
10628
10629 // :: symbol
10630 case ':':
10631 if (match(parser, ':')) {
10632 if (lex_state_beg_p(parser) || lex_state_p(parser, PM_LEX_STATE_CLASS) || (lex_state_p(parser, PM_LEX_STATE_ARG_ANY) && space_seen)) {
10633 lex_state_set(parser, PM_LEX_STATE_BEG);
10634 LEX(PM_TOKEN_UCOLON_COLON);
10635 }
10636
10637 lex_state_set(parser, PM_LEX_STATE_DOT);
10638 LEX(PM_TOKEN_COLON_COLON);
10639 }
10640
10641 if (lex_state_end_p(parser) || pm_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
10642 lex_state_set(parser, PM_LEX_STATE_BEG);
10643 LEX(PM_TOKEN_COLON);
10644 }
10645
10646 if (peek(parser) == '"' || peek(parser) == '\'') {
10647 lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
10648 parser->current.end++;
10649 }
10650
10651 lex_state_set(parser, PM_LEX_STATE_FNAME);
10652 LEX(PM_TOKEN_SYMBOL_BEGIN);
10653
10654 // / /=
10655 case '/':
10656 if (lex_state_beg_p(parser)) {
10657 lex_mode_push_regexp(parser, '\0', '/');
10658 LEX(PM_TOKEN_REGEXP_BEGIN);
10659 }
10660
10661 if (match(parser, '=')) {
10662 lex_state_set(parser, PM_LEX_STATE_BEG);
10663 LEX(PM_TOKEN_SLASH_EQUAL);
10664 }
10665
10666 if (lex_state_spcarg_p(parser, space_seen)) {
10667 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
10668 lex_mode_push_regexp(parser, '\0', '/');
10669 LEX(PM_TOKEN_REGEXP_BEGIN);
10670 }
10671
10672 if (ambiguous_operator_p(parser, space_seen)) {
10673 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
10674 }
10675
10676 if (lex_state_operator_p(parser)) {
10677 lex_state_set(parser, PM_LEX_STATE_ARG);
10678 } else {
10679 lex_state_set(parser, PM_LEX_STATE_BEG);
10680 }
10681
10682 LEX(PM_TOKEN_SLASH);
10683
10684 // ^ ^=
10685 case '^':
10686 if (lex_state_operator_p(parser)) {
10687 lex_state_set(parser, PM_LEX_STATE_ARG);
10688 } else {
10689 lex_state_set(parser, PM_LEX_STATE_BEG);
10690 }
10691 LEX(match(parser, '=') ? PM_TOKEN_CARET_EQUAL : PM_TOKEN_CARET);
10692
10693 // ~ ~@
10694 case '~':
10695 if (lex_state_operator_p(parser)) {
10696 (void) match(parser, '@');
10697 lex_state_set(parser, PM_LEX_STATE_ARG);
10698 } else {
10699 lex_state_set(parser, PM_LEX_STATE_BEG);
10700 }
10701
10702 LEX(PM_TOKEN_TILDE);
10703
10704 // % %= %i %I %q %Q %w %W
10705 case '%': {
10706 // If there is no subsequent character then we have an
10707 // invalid token. We're going to say it's the percent
10708 // operator because we don't want to move into the string
10709 // lex mode unnecessarily.
10710 if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
10711 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
10712 LEX(PM_TOKEN_PERCENT);
10713 }
10714
10715 if (!lex_state_beg_p(parser) && match(parser, '=')) {
10716 lex_state_set(parser, PM_LEX_STATE_BEG);
10717 LEX(PM_TOKEN_PERCENT_EQUAL);
10718 } else if (
10719 lex_state_beg_p(parser) ||
10720 (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
10721 lex_state_spcarg_p(parser, space_seen)
10722 ) {
10723 if (!parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end)) {
10724 if (*parser->current.end >= 0x80) {
10725 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
10726 }
10727
10728 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10729 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10730 LEX(PM_TOKEN_STRING_BEGIN);
10731 }
10732
10733 // Delimiters for %-literals cannot be alphanumeric. We
10734 // validate that here.
10735 uint8_t delimiter = peek_offset(parser, 1);
10736 if (delimiter >= 0x80 || parser->encoding->alnum_char(&delimiter, 1)) {
10737 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
10738 goto lex_next_token;
10739 }
10740
10741 switch (peek(parser)) {
10742 case 'i': {
10743 parser->current.end++;
10744
10745 if (parser->current.end < parser->end) {
10746 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
10747 } else {
10748 lex_mode_push_list_eof(parser);
10749 }
10750
10751 LEX(PM_TOKEN_PERCENT_LOWER_I);
10752 }
10753 case 'I': {
10754 parser->current.end++;
10755
10756 if (parser->current.end < parser->end) {
10757 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
10758 } else {
10759 lex_mode_push_list_eof(parser);
10760 }
10761
10762 LEX(PM_TOKEN_PERCENT_UPPER_I);
10763 }
10764 case 'r': {
10765 parser->current.end++;
10766
10767 if (parser->current.end < parser->end) {
10768 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10769 lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10770 } else {
10771 lex_mode_push_regexp(parser, '\0', '\0');
10772 }
10773
10774 LEX(PM_TOKEN_REGEXP_BEGIN);
10775 }
10776 case 'q': {
10777 parser->current.end++;
10778
10779 if (parser->current.end < parser->end) {
10780 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10781 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10782 } else {
10783 lex_mode_push_string_eof(parser);
10784 }
10785
10786 LEX(PM_TOKEN_STRING_BEGIN);
10787 }
10788 case 'Q': {
10789 parser->current.end++;
10790
10791 if (parser->current.end < parser->end) {
10792 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10793 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10794 } else {
10795 lex_mode_push_string_eof(parser);
10796 }
10797
10798 LEX(PM_TOKEN_STRING_BEGIN);
10799 }
10800 case 's': {
10801 parser->current.end++;
10802
10803 if (parser->current.end < parser->end) {
10804 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10805 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10806 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
10807 } else {
10808 lex_mode_push_string_eof(parser);
10809 }
10810
10811 LEX(PM_TOKEN_SYMBOL_BEGIN);
10812 }
10813 case 'w': {
10814 parser->current.end++;
10815
10816 if (parser->current.end < parser->end) {
10817 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
10818 } else {
10819 lex_mode_push_list_eof(parser);
10820 }
10821
10822 LEX(PM_TOKEN_PERCENT_LOWER_W);
10823 }
10824 case 'W': {
10825 parser->current.end++;
10826
10827 if (parser->current.end < parser->end) {
10828 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
10829 } else {
10830 lex_mode_push_list_eof(parser);
10831 }
10832
10833 LEX(PM_TOKEN_PERCENT_UPPER_W);
10834 }
10835 case 'x': {
10836 parser->current.end++;
10837
10838 if (parser->current.end < parser->end) {
10839 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10840 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10841 } else {
10842 lex_mode_push_string_eof(parser);
10843 }
10844
10845 LEX(PM_TOKEN_PERCENT_LOWER_X);
10846 }
10847 default:
10848 // If we get to this point, then we have a % that is completely
10849 // unparsable. In this case we'll just drop it from the parser
10850 // and skip past it and hope that the next token is something
10851 // that we can parse.
10852 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
10853 goto lex_next_token;
10854 }
10855 }
10856
10857 if (ambiguous_operator_p(parser, space_seen)) {
10858 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
10859 }
10860
10861 lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
10862 LEX(PM_TOKEN_PERCENT);
10863 }
10864
10865 // global variable
10866 case '$': {
10867 pm_token_type_t type = lex_global_variable(parser);
10868
10869 // If we're lexing an embedded variable, then we need to pop back into
10870 // the parent lex context.
10871 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
10872 lex_mode_pop(parser);
10873 }
10874
10875 lex_state_set(parser, PM_LEX_STATE_END);
10876 LEX(type);
10877 }
10878
10879 // instance variable, class variable
10880 case '@':
10881 lex_state_set(parser, parser->lex_state & PM_LEX_STATE_FNAME ? PM_LEX_STATE_ENDFN : PM_LEX_STATE_END);
10882 LEX(lex_at_variable(parser));
10883
10884 default: {
10885 if (*parser->current.start != '_') {
10886 size_t width = char_is_identifier_start(parser, parser->current.start, parser->end - parser->current.start);
10887
10888 // If this isn't the beginning of an identifier, then
10889 // it's an invalid token as we've exhausted all of the
10890 // other options. We'll skip past it and return the next
10891 // token after adding an appropriate error message.
10892 if (!width) {
10893 if (*parser->current.start >= 0x80) {
10894 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
10895 } else if (*parser->current.start == '\\') {
10896 switch (peek_at(parser, parser->current.start + 1)) {
10897 case ' ':
10898 parser->current.end++;
10899 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
10900 break;
10901 case '\f':
10902 parser->current.end++;
10903 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
10904 break;
10905 case '\t':
10906 parser->current.end++;
10907 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
10908 break;
10909 case '\v':
10910 parser->current.end++;
10911 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
10912 break;
10913 case '\r':
10914 if (peek_at(parser, parser->current.start + 2) != '\n') {
10915 parser->current.end++;
10916 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
10917 break;
10918 }
10920 default:
10921 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
10922 break;
10923 }
10924 } else if (char_is_ascii_printable(*parser->current.start)) {
10925 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
10926 } else {
10927 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
10928 }
10929
10930 goto lex_next_token;
10931 }
10932
10933 parser->current.end = parser->current.start + width;
10934 }
10935
10936 pm_token_type_t type = lex_identifier(parser, previous_command_start);
10937
10938 // If we've hit a __END__ and it was at the start of the
10939 // line or the start of the file and it is followed by
10940 // either a \n or a \r\n, then this is the last token of the
10941 // file.
10942 if (
10943 ((parser->current.end - parser->current.start) == 7) &&
10944 current_token_starts_line(parser) &&
10945 (memcmp(parser->current.start, "__END__", 7) == 0) &&
10946 (parser->current.end == parser->end || match_eol(parser))
10947 ) {
10948 // Since we know we're about to add an __END__ comment,
10949 // we know we need to add all of the newlines to get the
10950 // correct column information for it.
10951 const uint8_t *cursor = parser->current.end;
10952 while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
10953 pm_line_offset_list_append(&parser->line_offsets, U32(++cursor - parser->start));
10954 }
10955
10956 parser->current.end = parser->end;
10957 parser->current.type = PM_TOKEN___END__;
10958 parser_lex_callback(parser);
10959
10960 parser->data_loc.start = PM_TOKEN_START(parser, &parser->current);
10961 parser->data_loc.length = PM_TOKEN_LENGTH(&parser->current);
10962
10963 LEX(PM_TOKEN_EOF);
10964 }
10965
10966 pm_lex_state_t last_state = parser->lex_state;
10967
10968 if (type == PM_TOKEN_IDENTIFIER || type == PM_TOKEN_CONSTANT || type == PM_TOKEN_METHOD_NAME) {
10969 if (lex_state_p(parser, PM_LEX_STATE_BEG_ANY | PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_DOT)) {
10970 if (previous_command_start) {
10971 lex_state_set(parser, PM_LEX_STATE_CMDARG);
10972 } else {
10973 lex_state_set(parser, PM_LEX_STATE_ARG);
10974 }
10975 } else if (parser->lex_state == PM_LEX_STATE_FNAME) {
10976 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10977 } else {
10978 lex_state_set(parser, PM_LEX_STATE_END);
10979 }
10980 }
10981
10982 if (
10983 !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
10984 (type == PM_TOKEN_IDENTIFIER) &&
10985 ((pm_parser_local_depth(parser, &parser->current) != -1) ||
10986 pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)))
10987 ) {
10988 lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
10989 }
10990
10991 LEX(type);
10992 }
10993 }
10994 }
10995 case PM_LEX_LIST: {
10996 if (parser->next_start != NULL) {
10997 parser->current.end = parser->next_start;
10998 parser->next_start = NULL;
10999 }
11000
11001 // First we'll set the beginning of the token.
11002 parser->current.start = parser->current.end;
11003
11004 // If there's any whitespace at the start of the list, then we're
11005 // going to trim it off the beginning and create a new token.
11006 size_t whitespace;
11007
11008 if (parser->heredoc_end) {
11009 whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
11010 if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') {
11011 whitespace += 1;
11012 }
11013 } else {
11014 whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
11015 }
11016
11017 if (whitespace > 0) {
11018 parser->current.end += whitespace;
11019 if (peek_offset(parser, -1) == '\n') {
11020 // mutates next_start
11021 parser_flush_heredoc_end(parser);
11022 }
11023 LEX(PM_TOKEN_WORDS_SEP);
11024 }
11025
11026 // We'll check if we're at the end of the file. If we are, then we
11027 // need to return the EOF token.
11028 if (parser->current.end >= parser->end) {
11029 LEX(PM_TOKEN_EOF);
11030 }
11031
11032 // Here we'll get a list of the places where strpbrk should break,
11033 // and then find the first one.
11034 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11035 const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
11036 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11037
11038 // If we haven't found an escape yet, then this buffer will be
11039 // unallocated since we can refer directly to the source string.
11040 pm_token_buffer_t token_buffer = { 0 };
11041
11042 while (breakpoint != NULL) {
11043 // If we hit whitespace, then we must have received content by
11044 // now, so we can return an element of the list.
11045 if (pm_char_is_whitespace(*breakpoint)) {
11046 parser->current.end = breakpoint;
11047 pm_token_buffer_flush(parser, &token_buffer);
11048 LEX(PM_TOKEN_STRING_CONTENT);
11049 }
11050
11051 // If we hit the terminator, we need to check which token to
11052 // return.
11053 if (*breakpoint == lex_mode->as.list.terminator) {
11054 // If this terminator doesn't actually close the list, then
11055 // we need to continue on past it.
11056 if (lex_mode->as.list.nesting > 0) {
11057 parser->current.end = breakpoint + 1;
11058 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11059 lex_mode->as.list.nesting--;
11060 continue;
11061 }
11062
11063 // If we've hit the terminator and we've already skipped
11064 // past content, then we can return a list node.
11065 if (breakpoint > parser->current.start) {
11066 parser->current.end = breakpoint;
11067 pm_token_buffer_flush(parser, &token_buffer);
11068 LEX(PM_TOKEN_STRING_CONTENT);
11069 }
11070
11071 // Otherwise, switch back to the default state and return
11072 // the end of the list.
11073 parser->current.end = breakpoint + 1;
11074 lex_mode_pop(parser);
11075 lex_state_set(parser, PM_LEX_STATE_END);
11076 LEX(PM_TOKEN_STRING_END);
11077 }
11078
11079 // If we hit a null byte, skip directly past it.
11080 if (*breakpoint == '\0') {
11081 breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
11082 continue;
11083 }
11084
11085 // If we hit escapes, then we need to treat the next token
11086 // literally. In this case we'll skip past the next character
11087 // and find the next breakpoint.
11088 if (*breakpoint == '\\') {
11089 parser->current.end = breakpoint + 1;
11090
11091 // If we've hit the end of the file, then break out of the
11092 // loop by setting the breakpoint to NULL.
11093 if (parser->current.end == parser->end) {
11094 breakpoint = NULL;
11095 continue;
11096 }
11097
11098 pm_token_buffer_escape(parser, &token_buffer);
11099 uint8_t peeked = peek(parser);
11100
11101 switch (peeked) {
11102 case ' ':
11103 case '\f':
11104 case '\t':
11105 case '\v':
11106 case '\\':
11107 pm_token_buffer_push_byte(&token_buffer, peeked);
11108 parser->current.end++;
11109 break;
11110 case '\r':
11111 parser->current.end++;
11112 if (peek(parser) != '\n') {
11113 pm_token_buffer_push_byte(&token_buffer, '\r');
11114 break;
11115 }
11117 case '\n':
11118 pm_token_buffer_push_byte(&token_buffer, '\n');
11119
11120 if (parser->heredoc_end) {
11121 // ... if we are on the same line as a heredoc,
11122 // flush the heredoc and continue parsing after
11123 // heredoc_end.
11124 parser_flush_heredoc_end(parser);
11125 pm_token_buffer_copy(parser, &token_buffer);
11126 LEX(PM_TOKEN_STRING_CONTENT);
11127 } else {
11128 // ... else track the newline.
11129 pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
11130 }
11131
11132 parser->current.end++;
11133 break;
11134 default:
11135 if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
11136 pm_token_buffer_push_byte(&token_buffer, peeked);
11137 parser->current.end++;
11138 } else if (lex_mode->as.list.interpolation) {
11139 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
11140 } else {
11141 pm_token_buffer_push_byte(&token_buffer, '\\');
11142 pm_token_buffer_push_escaped(&token_buffer, parser);
11143 }
11144
11145 break;
11146 }
11147
11148 token_buffer.cursor = parser->current.end;
11149 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11150 continue;
11151 }
11152
11153 // If we hit a #, then we will attempt to lex interpolation.
11154 if (*breakpoint == '#') {
11155 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11156
11157 if (!type) {
11158 // If we haven't returned at this point then we had something
11159 // that looked like an interpolated class or instance variable
11160 // like "#@" but wasn't actually. In this case we'll just skip
11161 // to the next breakpoint.
11162 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11163 continue;
11164 }
11165
11166 if (type == PM_TOKEN_STRING_CONTENT) {
11167 pm_token_buffer_flush(parser, &token_buffer);
11168 }
11169
11170 LEX(type);
11171 }
11172
11173 // If we've hit the incrementor, then we need to skip past it
11174 // and find the next breakpoint.
11175 assert(*breakpoint == lex_mode->as.list.incrementor);
11176 parser->current.end = breakpoint + 1;
11177 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11178 lex_mode->as.list.nesting++;
11179 continue;
11180 }
11181
11182 if (parser->current.end > parser->current.start) {
11183 pm_token_buffer_flush(parser, &token_buffer);
11184 LEX(PM_TOKEN_STRING_CONTENT);
11185 }
11186
11187 // If we were unable to find a breakpoint, then this token hits the
11188 // end of the file.
11189 parser->current.end = parser->end;
11190 pm_token_buffer_flush(parser, &token_buffer);
11191 LEX(PM_TOKEN_STRING_CONTENT);
11192 }
11193 case PM_LEX_REGEXP: {
11194 // First, we'll set to start of this token to be the current end.
11195 if (parser->next_start == NULL) {
11196 parser->current.start = parser->current.end;
11197 } else {
11198 parser->current.start = parser->next_start;
11199 parser->current.end = parser->next_start;
11200 parser->next_start = NULL;
11201 }
11202
11203 // We'll check if we're at the end of the file. If we are, then we
11204 // need to return the EOF token.
11205 if (parser->current.end >= parser->end) {
11206 LEX(PM_TOKEN_EOF);
11207 }
11208
11209 // Get a reference to the current mode.
11210 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11211
11212 // These are the places where we need to split up the content of the
11213 // regular expression. We'll use strpbrk to find the first of these
11214 // characters.
11215 const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
11216 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11217 pm_regexp_token_buffer_t token_buffer = { 0 };
11218
11219 while (breakpoint != NULL) {
11220 uint8_t term = lex_mode->as.regexp.terminator;
11221 bool is_terminator = (*breakpoint == term);
11222
11223 // If the terminator is newline, we need to consider \r\n _also_ a newline
11224 // For example: `%\nfoo\r\n`
11225 // The string should be "foo", not "foo\r"
11226 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
11227 if (term == '\n') {
11228 is_terminator = true;
11229 }
11230
11231 // If the terminator is a CR, but we see a CRLF, we need to
11232 // treat the CRLF as a newline, meaning this is _not_ the
11233 // terminator
11234 if (term == '\r') {
11235 is_terminator = false;
11236 }
11237 }
11238
11239 // If we hit the terminator, we need to determine what kind of
11240 // token to return.
11241 if (is_terminator) {
11242 if (lex_mode->as.regexp.nesting > 0) {
11243 parser->current.end = breakpoint + 1;
11244 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11245 lex_mode->as.regexp.nesting--;
11246 continue;
11247 }
11248
11249 // Here we've hit the terminator. If we have already consumed
11250 // content then we need to return that content as string content
11251 // first.
11252 if (breakpoint > parser->current.start) {
11253 parser->current.end = breakpoint;
11254 pm_regexp_token_buffer_flush(parser, &token_buffer);
11255 LEX(PM_TOKEN_STRING_CONTENT);
11256 }
11257
11258 // Check here if we need to track the newline.
11259 size_t eol_length = match_eol_at(parser, breakpoint);
11260 if (eol_length) {
11261 parser->current.end = breakpoint + eol_length;
11262
11263 // Track the newline if we're not in a heredoc that
11264 // would have already have added the newline to the
11265 // list.
11266 if (parser->heredoc_end == NULL) {
11267 pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
11268 }
11269 } else {
11270 parser->current.end = breakpoint + 1;
11271 }
11272
11273 // Since we've hit the terminator of the regular expression,
11274 // we now need to parse the options.
11275 parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
11276
11277 lex_mode_pop(parser);
11278 lex_state_set(parser, PM_LEX_STATE_END);
11279 LEX(PM_TOKEN_REGEXP_END);
11280 }
11281
11282 // If we've hit the incrementor, then we need to skip past it
11283 // and find the next breakpoint.
11284 if (*breakpoint && *breakpoint == lex_mode->as.regexp.incrementor) {
11285 parser->current.end = breakpoint + 1;
11286 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11287 lex_mode->as.regexp.nesting++;
11288 continue;
11289 }
11290
11291 switch (*breakpoint) {
11292 case '\0':
11293 // If we hit a null byte, skip directly past it.
11294 parser->current.end = breakpoint + 1;
11295 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11296 break;
11297 case '\r':
11298 if (peek_at(parser, breakpoint + 1) != '\n') {
11299 parser->current.end = breakpoint + 1;
11300 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11301 break;
11302 }
11303
11304 breakpoint++;
11305 parser->current.end = breakpoint;
11306 pm_regexp_token_buffer_escape(parser, &token_buffer);
11307 token_buffer.base.cursor = breakpoint;
11308
11310 case '\n':
11311 // If we've hit a newline, then we need to track that in
11312 // the list of newlines.
11313 if (parser->heredoc_end == NULL) {
11314 pm_line_offset_list_append(&parser->line_offsets, U32(breakpoint - parser->start + 1));
11315 parser->current.end = breakpoint + 1;
11316 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11317 break;
11318 }
11319
11320 parser->current.end = breakpoint + 1;
11321 parser_flush_heredoc_end(parser);
11322 pm_regexp_token_buffer_flush(parser, &token_buffer);
11323 LEX(PM_TOKEN_STRING_CONTENT);
11324 case '\\': {
11325 // If we hit escapes, then we need to treat the next
11326 // token literally. In this case we'll skip past the
11327 // next character and find the next breakpoint.
11328 parser->current.end = breakpoint + 1;
11329
11330 // If we've hit the end of the file, then break out of
11331 // the loop by setting the breakpoint to NULL.
11332 if (parser->current.end == parser->end) {
11333 breakpoint = NULL;
11334 break;
11335 }
11336
11337 pm_regexp_token_buffer_escape(parser, &token_buffer);
11338 uint8_t peeked = peek(parser);
11339
11340 switch (peeked) {
11341 case '\r':
11342 parser->current.end++;
11343 if (peek(parser) != '\n') {
11344 if (lex_mode->as.regexp.terminator != '\r') {
11345 pm_token_buffer_push_byte(&token_buffer.base, '\\');
11346 }
11347 pm_regexp_token_buffer_push_byte(&token_buffer, '\r');
11348 pm_token_buffer_push_byte(&token_buffer.base, '\r');
11349 break;
11350 }
11352 case '\n':
11353 if (parser->heredoc_end) {
11354 // ... if we are on the same line as a heredoc,
11355 // flush the heredoc and continue parsing after
11356 // heredoc_end.
11357 parser_flush_heredoc_end(parser);
11358 pm_regexp_token_buffer_copy(parser, &token_buffer);
11359 LEX(PM_TOKEN_STRING_CONTENT);
11360 } else {
11361 // ... else track the newline.
11362 pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
11363 }
11364
11365 parser->current.end++;
11366 break;
11367 case 'c':
11368 case 'C':
11369 case 'M':
11370 case 'u':
11371 case 'x':
11372 escape_read(parser, &token_buffer.regexp_buffer, &token_buffer.base.buffer, PM_ESCAPE_FLAG_REGEXP);
11373 break;
11374 default:
11375 if (lex_mode->as.regexp.terminator == peeked) {
11376 // Some characters when they are used as the
11377 // terminator also receive an escape. They are
11378 // enumerated here.
11379 switch (peeked) {
11380 case '$': case ')': case '*': case '+':
11381 case '.': case '>': case '?': case ']':
11382 case '^': case '|': case '}':
11383 pm_token_buffer_push_byte(&token_buffer.base, '\\');
11384 break;
11385 default:
11386 break;
11387 }
11388
11389 pm_regexp_token_buffer_push_byte(&token_buffer, peeked);
11390 pm_token_buffer_push_byte(&token_buffer.base, peeked);
11391 parser->current.end++;
11392 break;
11393 }
11394
11395 if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer.base, '\\');
11396 pm_regexp_token_buffer_push_escaped(&token_buffer, parser);
11397 break;
11398 }
11399
11400 token_buffer.base.cursor = parser->current.end;
11401 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11402 break;
11403 }
11404 case '#': {
11405 // If we hit a #, then we will attempt to lex
11406 // interpolation.
11407 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11408
11409 if (!type) {
11410 // If we haven't returned at this point then we had
11411 // something that looked like an interpolated class or
11412 // instance variable like "#@" but wasn't actually. In
11413 // this case we'll just skip to the next breakpoint.
11414 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11415 break;
11416 }
11417
11418 if (type == PM_TOKEN_STRING_CONTENT) {
11419 pm_regexp_token_buffer_flush(parser, &token_buffer);
11420 }
11421
11422 LEX(type);
11423 }
11424 default:
11425 assert(false && "unreachable");
11426 break;
11427 }
11428 }
11429
11430 if (parser->current.end > parser->current.start) {
11431 pm_regexp_token_buffer_flush(parser, &token_buffer);
11432 LEX(PM_TOKEN_STRING_CONTENT);
11433 }
11434
11435 // If we were unable to find a breakpoint, then this token hits the
11436 // end of the file.
11437 parser->current.end = parser->end;
11438 pm_regexp_token_buffer_flush(parser, &token_buffer);
11439 LEX(PM_TOKEN_STRING_CONTENT);
11440 }
11441 case PM_LEX_STRING: {
11442 // First, we'll set to start of this token to be the current end.
11443 if (parser->next_start == NULL) {
11444 parser->current.start = parser->current.end;
11445 } else {
11446 parser->current.start = parser->next_start;
11447 parser->current.end = parser->next_start;
11448 parser->next_start = NULL;
11449 }
11450
11451 // We'll check if we're at the end of the file. If we are, then we need to
11452 // return the EOF token.
11453 if (parser->current.end >= parser->end) {
11454 LEX(PM_TOKEN_EOF);
11455 }
11456
11457 // These are the places where we need to split up the content of the
11458 // string. We'll use strpbrk to find the first of these characters.
11459 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11460 const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
11461 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11462
11463 // If we haven't found an escape yet, then this buffer will be
11464 // unallocated since we can refer directly to the source string.
11465 pm_token_buffer_t token_buffer = { 0 };
11466
11467 while (breakpoint != NULL) {
11468 // If we hit the incrementor, then we'll increment then nesting and
11469 // continue lexing.
11470 if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
11471 lex_mode->as.string.nesting++;
11472 parser->current.end = breakpoint + 1;
11473 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11474 continue;
11475 }
11476
11477 uint8_t term = lex_mode->as.string.terminator;
11478 bool is_terminator = (*breakpoint == term);
11479
11480 // If the terminator is newline, we need to consider \r\n _also_ a newline
11481 // For example: `%r\nfoo\r\n`
11482 // The string should be /foo/, not /foo\r/
11483 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
11484 if (term == '\n') {
11485 is_terminator = true;
11486 }
11487
11488 // If the terminator is a CR, but we see a CRLF, we need to
11489 // treat the CRLF as a newline, meaning this is _not_ the
11490 // terminator
11491 if (term == '\r') {
11492 is_terminator = false;
11493 }
11494 }
11495
11496 // Note that we have to check the terminator here first because we could
11497 // potentially be parsing a % string that has a # character as the
11498 // terminator.
11499 if (is_terminator) {
11500 // If this terminator doesn't actually close the string, then we need
11501 // to continue on past it.
11502 if (lex_mode->as.string.nesting > 0) {
11503 parser->current.end = breakpoint + 1;
11504 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11505 lex_mode->as.string.nesting--;
11506 continue;
11507 }
11508
11509 // Here we've hit the terminator. If we have already consumed content
11510 // then we need to return that content as string content first.
11511 if (breakpoint > parser->current.start) {
11512 parser->current.end = breakpoint;
11513 pm_token_buffer_flush(parser, &token_buffer);
11514 LEX(PM_TOKEN_STRING_CONTENT);
11515 }
11516
11517 // Otherwise we need to switch back to the parent lex mode and
11518 // return the end of the string.
11519 size_t eol_length = match_eol_at(parser, breakpoint);
11520 if (eol_length) {
11521 parser->current.end = breakpoint + eol_length;
11522
11523 // Track the newline if we're not in a heredoc that
11524 // would have already have added the newline to the
11525 // list.
11526 if (parser->heredoc_end == NULL) {
11527 pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
11528 }
11529 } else {
11530 parser->current.end = breakpoint + 1;
11531 }
11532
11533 if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
11534 parser->current.end++;
11535 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
11536 lex_mode_pop(parser);
11537 LEX(PM_TOKEN_LABEL_END);
11538 }
11539
11540 // When the delimiter itself is a newline, we won't
11541 // get a chance to flush heredocs in the usual places since
11542 // the newline is already consumed.
11543 if (term == '\n' && parser->heredoc_end) {
11544 parser_flush_heredoc_end(parser);
11545 }
11546
11547 lex_state_set(parser, PM_LEX_STATE_END);
11548 lex_mode_pop(parser);
11549 LEX(PM_TOKEN_STRING_END);
11550 }
11551
11552 switch (*breakpoint) {
11553 case '\0':
11554 // Skip directly past the null character.
11555 parser->current.end = breakpoint + 1;
11556 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11557 break;
11558 case '\r':
11559 if (peek_at(parser, breakpoint + 1) != '\n') {
11560 parser->current.end = breakpoint + 1;
11561 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11562 break;
11563 }
11564
11565 // If we hit a \r\n sequence, then we need to treat it
11566 // as a newline.
11567 breakpoint++;
11568 parser->current.end = breakpoint;
11569 pm_token_buffer_escape(parser, &token_buffer);
11570 token_buffer.cursor = breakpoint;
11571
11573 case '\n':
11574 // When we hit a newline, we need to flush any potential
11575 // heredocs. Note that this has to happen after we check
11576 // for the terminator in case the terminator is a
11577 // newline character.
11578 if (parser->heredoc_end == NULL) {
11579 pm_line_offset_list_append(&parser->line_offsets, U32(breakpoint - parser->start + 1));
11580 parser->current.end = breakpoint + 1;
11581 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11582 break;
11583 }
11584
11585 parser->current.end = breakpoint + 1;
11586 parser_flush_heredoc_end(parser);
11587 pm_token_buffer_flush(parser, &token_buffer);
11588 LEX(PM_TOKEN_STRING_CONTENT);
11589 case '\\': {
11590 // Here we hit escapes.
11591 parser->current.end = breakpoint + 1;
11592
11593 // If we've hit the end of the file, then break out of
11594 // the loop by setting the breakpoint to NULL.
11595 if (parser->current.end == parser->end) {
11596 breakpoint = NULL;
11597 continue;
11598 }
11599
11600 pm_token_buffer_escape(parser, &token_buffer);
11601 uint8_t peeked = peek(parser);
11602
11603 switch (peeked) {
11604 case '\\':
11605 pm_token_buffer_push_byte(&token_buffer, '\\');
11606 parser->current.end++;
11607 break;
11608 case '\r':
11609 parser->current.end++;
11610 if (peek(parser) != '\n') {
11611 if (!lex_mode->as.string.interpolation) {
11612 pm_token_buffer_push_byte(&token_buffer, '\\');
11613 }
11614 pm_token_buffer_push_byte(&token_buffer, '\r');
11615 break;
11616 }
11618 case '\n':
11619 if (!lex_mode->as.string.interpolation) {
11620 pm_token_buffer_push_byte(&token_buffer, '\\');
11621 pm_token_buffer_push_byte(&token_buffer, '\n');
11622 }
11623
11624 if (parser->heredoc_end) {
11625 // ... if we are on the same line as a heredoc,
11626 // flush the heredoc and continue parsing after
11627 // heredoc_end.
11628 parser_flush_heredoc_end(parser);
11629 pm_token_buffer_copy(parser, &token_buffer);
11630 LEX(PM_TOKEN_STRING_CONTENT);
11631 } else {
11632 // ... else track the newline.
11633 pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
11634 }
11635
11636 parser->current.end++;
11637 break;
11638 default:
11639 if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
11640 pm_token_buffer_push_byte(&token_buffer, peeked);
11641 parser->current.end++;
11642 } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
11643 pm_token_buffer_push_byte(&token_buffer, peeked);
11644 parser->current.end++;
11645 } else if (lex_mode->as.string.interpolation) {
11646 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
11647 } else {
11648 pm_token_buffer_push_byte(&token_buffer, '\\');
11649 pm_token_buffer_push_escaped(&token_buffer, parser);
11650 }
11651
11652 break;
11653 }
11654
11655 token_buffer.cursor = parser->current.end;
11656 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11657 break;
11658 }
11659 case '#': {
11660 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11661
11662 if (!type) {
11663 // If we haven't returned at this point then we had something that
11664 // looked like an interpolated class or instance variable like "#@"
11665 // but wasn't actually. In this case we'll just skip to the next
11666 // breakpoint.
11667 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11668 break;
11669 }
11670
11671 if (type == PM_TOKEN_STRING_CONTENT) {
11672 pm_token_buffer_flush(parser, &token_buffer);
11673 }
11674
11675 LEX(type);
11676 }
11677 default:
11678 assert(false && "unreachable");
11679 }
11680 }
11681
11682 if (parser->current.end > parser->current.start) {
11683 pm_token_buffer_flush(parser, &token_buffer);
11684 LEX(PM_TOKEN_STRING_CONTENT);
11685 }
11686
11687 // If we've hit the end of the string, then this is an unterminated
11688 // string. In that case we'll return a string content token.
11689 parser->current.end = parser->end;
11690 pm_token_buffer_flush(parser, &token_buffer);
11691 LEX(PM_TOKEN_STRING_CONTENT);
11692 }
11693 case PM_LEX_HEREDOC: {
11694 // First, we'll set to start of this token.
11695 if (parser->next_start == NULL) {
11696 parser->current.start = parser->current.end;
11697 } else {
11698 parser->current.start = parser->next_start;
11699 parser->current.end = parser->next_start;
11700 parser->heredoc_end = NULL;
11701 parser->next_start = NULL;
11702 }
11703
11704 // Now let's grab the information about the identifier off of the
11705 // current lex mode.
11706 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11707 pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
11708
11709 bool line_continuation = lex_mode->as.heredoc.line_continuation;
11710 lex_mode->as.heredoc.line_continuation = false;
11711
11712 // We'll check if we're at the end of the file. If we are, then we
11713 // will add an error (because we weren't able to find the
11714 // terminator) but still continue parsing so that content after the
11715 // declaration of the heredoc can be parsed.
11716 if (parser->current.end >= parser->end) {
11717 pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
11718 parser->next_start = lex_mode->as.heredoc.next_start;
11719 parser->heredoc_end = parser->current.end;
11720 lex_state_set(parser, PM_LEX_STATE_END);
11721 lex_mode_pop(parser);
11722 LEX(PM_TOKEN_HEREDOC_END);
11723 }
11724
11725 const uint8_t *ident_start = heredoc_lex_mode->ident_start;
11726 size_t ident_length = heredoc_lex_mode->ident_length;
11727
11728 // If we are immediately following a newline and we have hit the
11729 // terminator, then we need to return the ending of the heredoc.
11730 if (current_token_starts_line(parser)) {
11731 const uint8_t *start = parser->current.start;
11732
11733 if (!line_continuation && (start + ident_length <= parser->end)) {
11734 const uint8_t *newline = next_newline(start, parser->end - start);
11735 const uint8_t *ident_end = newline;
11736 const uint8_t *terminator_end = newline;
11737
11738 if (newline == NULL) {
11739 terminator_end = parser->end;
11740 ident_end = parser->end;
11741 } else {
11742 terminator_end++;
11743 if (newline[-1] == '\r') {
11744 ident_end--; // Remove \r
11745 }
11746 }
11747
11748 const uint8_t *terminator_start = ident_end - ident_length;
11749 const uint8_t *cursor = start;
11750
11751 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
11752 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
11753 cursor++;
11754 }
11755 }
11756
11757 if (
11758 (cursor == terminator_start) &&
11759 (memcmp(terminator_start, ident_start, ident_length) == 0)
11760 ) {
11761 if (newline != NULL) {
11762 pm_line_offset_list_append(&parser->line_offsets, U32(newline - parser->start + 1));
11763 }
11764
11765 parser->current.end = terminator_end;
11766 if (*lex_mode->as.heredoc.next_start == '\\') {
11767 parser->next_start = NULL;
11768 } else {
11769 parser->next_start = lex_mode->as.heredoc.next_start;
11770 parser->heredoc_end = parser->current.end;
11771 }
11772
11773 lex_state_set(parser, PM_LEX_STATE_END);
11774 lex_mode_pop(parser);
11775 LEX(PM_TOKEN_HEREDOC_END);
11776 }
11777 }
11778
11779 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
11780 if (
11781 heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
11782 lex_mode->as.heredoc.common_whitespace != NULL &&
11783 (*lex_mode->as.heredoc.common_whitespace > whitespace) &&
11784 peek_at(parser, start) != '\n'
11785 ) {
11786 *lex_mode->as.heredoc.common_whitespace = whitespace;
11787 }
11788 }
11789
11790 // Otherwise we'll be parsing string content. These are the places
11791 // where we need to split up the content of the heredoc. We'll use
11792 // strpbrk to find the first of these characters.
11793 uint8_t breakpoints[] = "\r\n\\#";
11794
11795 pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
11796 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
11797 breakpoints[3] = '\0';
11798 }
11799
11800 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11801 pm_token_buffer_t token_buffer = { 0 };
11802 bool was_line_continuation = false;
11803
11804 while (breakpoint != NULL) {
11805 switch (*breakpoint) {
11806 case '\0':
11807 // Skip directly past the null character.
11808 parser->current.end = breakpoint + 1;
11809 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11810 break;
11811 case '\r':
11812 parser->current.end = breakpoint + 1;
11813
11814 if (peek_at(parser, breakpoint + 1) != '\n') {
11815 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11816 break;
11817 }
11818
11819 // If we hit a \r\n sequence, then we want to replace it
11820 // with a single \n character in the final string.
11821 breakpoint++;
11822 pm_token_buffer_escape(parser, &token_buffer);
11823 token_buffer.cursor = breakpoint;
11824
11826 case '\n': {
11827 if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
11828 parser_flush_heredoc_end(parser);
11829 parser->current.end = breakpoint + 1;
11830 pm_token_buffer_flush(parser, &token_buffer);
11831 LEX(PM_TOKEN_STRING_CONTENT);
11832 }
11833
11834 pm_line_offset_list_append(&parser->line_offsets, U32(breakpoint - parser->start + 1));
11835
11836 // If we have a - or ~ heredoc, then we can match after
11837 // some leading whitespace.
11838 const uint8_t *start = breakpoint + 1;
11839
11840 if (!was_line_continuation && (start + ident_length <= parser->end)) {
11841 // We want to match the terminator starting from the end of the line in case
11842 // there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
11843 const uint8_t *newline = next_newline(start, parser->end - start);
11844
11845 if (newline == NULL) {
11846 newline = parser->end;
11847 } else if (newline[-1] == '\r') {
11848 newline--; // Remove \r
11849 }
11850
11851 // Start of a possible terminator.
11852 const uint8_t *terminator_start = newline - ident_length;
11853
11854 // Cursor to check for the leading whitespace. We skip the
11855 // leading whitespace if we have a - or ~ heredoc.
11856 const uint8_t *cursor = start;
11857
11858 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
11859 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
11860 cursor++;
11861 }
11862 }
11863
11864 if (
11865 cursor == terminator_start &&
11866 (memcmp(terminator_start, ident_start, ident_length) == 0)
11867 ) {
11868 parser->current.end = breakpoint + 1;
11869 pm_token_buffer_flush(parser, &token_buffer);
11870 LEX(PM_TOKEN_STRING_CONTENT);
11871 }
11872 }
11873
11874 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
11875
11876 // If we have hit a newline that is followed by a valid
11877 // terminator, then we need to return the content of the
11878 // heredoc here as string content. Then, the next time a
11879 // token is lexed, it will match again and return the
11880 // end of the heredoc.
11881 if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
11882 if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
11883 *lex_mode->as.heredoc.common_whitespace = whitespace;
11884 }
11885
11886 parser->current.end = breakpoint + 1;
11887 pm_token_buffer_flush(parser, &token_buffer);
11888 LEX(PM_TOKEN_STRING_CONTENT);
11889 }
11890
11891 // Otherwise we hit a newline and it wasn't followed by
11892 // a terminator, so we can continue parsing.
11893 parser->current.end = breakpoint + 1;
11894 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11895 break;
11896 }
11897 case '\\': {
11898 // If we hit an escape, then we need to skip past
11899 // however many characters the escape takes up. However
11900 // it's important that if \n or \r\n are escaped, we
11901 // stop looping before the newline and not after the
11902 // newline so that we can still potentially find the
11903 // terminator of the heredoc.
11904 parser->current.end = breakpoint + 1;
11905
11906 // If we've hit the end of the file, then break out of
11907 // the loop by setting the breakpoint to NULL.
11908 if (parser->current.end == parser->end) {
11909 breakpoint = NULL;
11910 continue;
11911 }
11912
11913 pm_token_buffer_escape(parser, &token_buffer);
11914 uint8_t peeked = peek(parser);
11915
11916 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
11917 switch (peeked) {
11918 case '\r':
11919 parser->current.end++;
11920 if (peek(parser) != '\n') {
11921 pm_token_buffer_push_byte(&token_buffer, '\\');
11922 pm_token_buffer_push_byte(&token_buffer, '\r');
11923 break;
11924 }
11926 case '\n':
11927 pm_token_buffer_push_byte(&token_buffer, '\\');
11928 pm_token_buffer_push_byte(&token_buffer, '\n');
11929 token_buffer.cursor = parser->current.end + 1;
11930 breakpoint = parser->current.end;
11931 continue;
11932 default:
11933 pm_token_buffer_push_byte(&token_buffer, '\\');
11934 pm_token_buffer_push_escaped(&token_buffer, parser);
11935 break;
11936 }
11937 } else {
11938 switch (peeked) {
11939 case '\r':
11940 parser->current.end++;
11941 if (peek(parser) != '\n') {
11942 pm_token_buffer_push_byte(&token_buffer, '\r');
11943 break;
11944 }
11946 case '\n':
11947 // If we are in a tilde here, we should
11948 // break out of the loop and return the
11949 // string content.
11950 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
11951 const uint8_t *end = parser->current.end;
11952
11953 if (parser->heredoc_end == NULL) {
11954 pm_line_offset_list_append(&parser->line_offsets, U32(end - parser->start + 1));
11955 }
11956
11957 // Here we want the buffer to only
11958 // include up to the backslash.
11959 parser->current.end = breakpoint;
11960 pm_token_buffer_flush(parser, &token_buffer);
11961
11962 // Now we can advance the end of the
11963 // token past the newline.
11964 parser->current.end = end + 1;
11965 lex_mode->as.heredoc.line_continuation = true;
11966 LEX(PM_TOKEN_STRING_CONTENT);
11967 }
11968
11969 was_line_continuation = true;
11970 token_buffer.cursor = parser->current.end + 1;
11971 breakpoint = parser->current.end;
11972 continue;
11973 default:
11974 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
11975 break;
11976 }
11977 }
11978
11979 token_buffer.cursor = parser->current.end;
11980 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11981 break;
11982 }
11983 case '#': {
11984 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11985
11986 if (!type) {
11987 // If we haven't returned at this point then we had
11988 // something that looked like an interpolated class
11989 // or instance variable like "#@" but wasn't
11990 // actually. In this case we'll just skip to the
11991 // next breakpoint.
11992 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11993 break;
11994 }
11995
11996 if (type == PM_TOKEN_STRING_CONTENT) {
11997 pm_token_buffer_flush(parser, &token_buffer);
11998 }
11999
12000 LEX(type);
12001 }
12002 default:
12003 assert(false && "unreachable");
12004 }
12005
12006 was_line_continuation = false;
12007 }
12008
12009 if (parser->current.end > parser->current.start) {
12010 parser->current.end = parser->end;
12011 pm_token_buffer_flush(parser, &token_buffer);
12012 LEX(PM_TOKEN_STRING_CONTENT);
12013 }
12014
12015 // If we've hit the end of the string, then this is an unterminated
12016 // heredoc. In that case we'll return a string content token.
12017 parser->current.end = parser->end;
12018 pm_token_buffer_flush(parser, &token_buffer);
12019 LEX(PM_TOKEN_STRING_CONTENT);
12020 }
12021 }
12022
12023 assert(false && "unreachable");
12024}
12025
12026#undef LEX
12027
12028/******************************************************************************/
12029/* Parse functions */
12030/******************************************************************************/
12031
12040typedef enum {
12041 PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
12042 PM_BINDING_POWER_STATEMENT = 2,
12043 PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
12044 PM_BINDING_POWER_MODIFIER = 6, // if unless until while
12045 PM_BINDING_POWER_COMPOSITION = 8, // and or
12046 PM_BINDING_POWER_NOT = 10, // not
12047 PM_BINDING_POWER_MATCH = 12, // => in
12048 PM_BINDING_POWER_DEFINED = 14, // defined?
12049 PM_BINDING_POWER_MULTI_ASSIGNMENT = 16, // =
12050 PM_BINDING_POWER_ASSIGNMENT = 18, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
12051 PM_BINDING_POWER_TERNARY = 20, // ?:
12052 PM_BINDING_POWER_RANGE = 22, // .. ...
12053 PM_BINDING_POWER_LOGICAL_OR = 24, // ||
12054 PM_BINDING_POWER_LOGICAL_AND = 26, // &&
12055 PM_BINDING_POWER_EQUALITY = 28, // <=> == === != =~ !~
12056 PM_BINDING_POWER_COMPARISON = 30, // > >= < <=
12057 PM_BINDING_POWER_BITWISE_OR = 32, // | ^
12058 PM_BINDING_POWER_BITWISE_AND = 34, // &
12059 PM_BINDING_POWER_SHIFT = 36, // << >>
12060 PM_BINDING_POWER_TERM = 38, // + -
12061 PM_BINDING_POWER_FACTOR = 40, // * / %
12062 PM_BINDING_POWER_UMINUS = 42, // -@
12063 PM_BINDING_POWER_EXPONENT = 44, // **
12064 PM_BINDING_POWER_UNARY = 46, // ! ~ +@
12065 PM_BINDING_POWER_INDEX = 48, // [] []=
12066 PM_BINDING_POWER_CALL = 50, // :: .
12067 PM_BINDING_POWER_MAX = 52
12068} pm_binding_power_t;
12069
12074typedef struct {
12076 pm_binding_power_t left;
12077
12079 pm_binding_power_t right;
12080
12083
12090
12091#define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
12092#define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
12093#define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
12094#define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
12095#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
12096
12097pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
12098 // rescue
12099 [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = { PM_BINDING_POWER_MODIFIER_RESCUE, PM_BINDING_POWER_COMPOSITION, true, false },
12100
12101 // if unless until while
12102 [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12103 [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12104 [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12105 [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12106
12107 // and or
12108 [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12109 [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12110
12111 // => in
12112 [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12113 [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12114
12115 // &&= &= ^= = >>= <<= -= %= |= ||= += /= *= **=
12116 [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12117 [PM_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12118 [PM_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
12119 [PM_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT,
12120 [PM_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT,
12121 [PM_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12122 [PM_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12123 [PM_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT,
12124 [PM_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12125 [PM_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12126 [PM_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12127 [PM_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT,
12128 [PM_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12129 [PM_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12130
12131 // ?:
12132 [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
12133
12134 // .. ...
12135 [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12136 [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12137 [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12138 [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12139
12140 // ||
12141 [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
12142
12143 // &&
12144 [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
12145
12146 // != !~ == === =~ <=>
12147 [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12148 [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12149 [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12150 [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12151 [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12152 [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12153
12154 // > >= < <=
12155 [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12156 [PM_TOKEN_GREATER_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12157 [PM_TOKEN_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12158 [PM_TOKEN_LESS_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12159
12160 // ^ |
12161 [PM_TOKEN_CARET] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12162 [PM_TOKEN_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12163
12164 // &
12165 [PM_TOKEN_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_AND),
12166
12167 // >> <<
12168 [PM_TOKEN_GREATER_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12169 [PM_TOKEN_LESS_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12170
12171 // - +
12172 [PM_TOKEN_MINUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12173 [PM_TOKEN_PLUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12174
12175 // % / *
12176 [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12177 [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12178 [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12179 [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
12180
12181 // -@
12182 [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
12183 [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false, false },
12184
12185 // **
12186 [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
12187 [PM_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12188
12189 // ! ~ +@
12190 [PM_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12191 [PM_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12192 [PM_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12193
12194 // [
12195 [PM_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_INDEX),
12196
12197 // :: . &.
12198 [PM_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
12199 [PM_TOKEN_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
12200 [PM_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL)
12201};
12202
12203#undef BINDING_POWER_ASSIGNMENT
12204#undef LEFT_ASSOCIATIVE
12205#undef RIGHT_ASSOCIATIVE
12206#undef RIGHT_ASSOCIATIVE_UNARY
12207
12211static inline bool
12212match1(const pm_parser_t *parser, pm_token_type_t type) {
12213 return parser->current.type == type;
12214}
12215
12219static inline bool
12220match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
12221 return match1(parser, type1) || match1(parser, type2);
12222}
12223
12227static inline bool
12228match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
12229 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
12230}
12231
12235static inline bool
12236match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
12237 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
12238}
12239
12243static inline bool
12244match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
12245 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
12246}
12247
12251static inline bool
12252match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
12253 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
12254}
12255
12262static bool
12263accept1(pm_parser_t *parser, pm_token_type_t type) {
12264 if (match1(parser, type)) {
12265 parser_lex(parser);
12266 return true;
12267 }
12268 return false;
12269}
12270
12275static inline bool
12276accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
12277 if (match2(parser, type1, type2)) {
12278 parser_lex(parser);
12279 return true;
12280 }
12281 return false;
12282}
12283
12295static void
12296expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
12297 if (accept1(parser, type)) return;
12298
12299 const uint8_t *location = parser->previous.end;
12300 pm_parser_err(parser, U32(location - parser->start), 0, diag_id);
12301
12302 parser->previous.start = location;
12303 parser->previous.type = 0;
12304}
12305
12310static void
12311expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_diagnostic_id_t diag_id) {
12312 if (accept2(parser, type1, type2)) return;
12313
12314 const uint8_t *location = parser->previous.end;
12315 pm_parser_err(parser, U32(location - parser->start), 0, diag_id);
12316
12317 parser->previous.start = location;
12318 parser->previous.type = 0;
12319}
12320
12325static void
12326expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
12327 if (match1(parser, PM_TOKEN_HEREDOC_END)) {
12328 parser_lex(parser);
12329 } else {
12330 pm_parser_err_heredoc_term(parser, ident_start, ident_length);
12331 parser->previous.start = parser->previous.end;
12332 parser->previous.type = 0;
12333 }
12334}
12335
12342static void
12343expect1_opening(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id, const pm_token_t *opening) {
12344 if (accept1(parser, type)) return;
12345
12346 const uint8_t *start = opening->start;
12347 pm_parser_err(parser, U32(start - parser->start), U32(opening->end - start), diag_id);
12348
12349 parser->previous.start = parser->previous.end;
12350 parser->previous.type = 0;
12351}
12352
12354#define PM_PARSE_ACCEPTS_COMMAND_CALL ((uint8_t) 0x1)
12355#define PM_PARSE_ACCEPTS_LABEL ((uint8_t) 0x2)
12356#define PM_PARSE_ACCEPTS_DO_BLOCK ((uint8_t) 0x4)
12357#define PM_PARSE_IN_ENDLESS_DEF ((uint8_t) 0x8)
12358
12359static pm_node_t *
12360parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
12361
12366static pm_node_t *
12367parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
12368 pm_node_t *node = parse_expression(parser, binding_power, flags, diag_id, depth);
12369 pm_assert_value_expression(parser, node);
12370 return node;
12371}
12372
12391static inline bool
12392token_begins_expression_p(pm_token_type_t type) {
12393 switch (type) {
12394 case PM_TOKEN_EQUAL_GREATER:
12395 case PM_TOKEN_KEYWORD_IN:
12396 // We need to special case this because it is a binary operator that
12397 // should not be marked as beginning an expression.
12398 return false;
12399 case PM_TOKEN_BRACE_RIGHT:
12400 case PM_TOKEN_BRACKET_RIGHT:
12401 case PM_TOKEN_COLON:
12402 case PM_TOKEN_COMMA:
12403 case PM_TOKEN_EMBEXPR_END:
12404 case PM_TOKEN_EOF:
12405 case PM_TOKEN_LAMBDA_BEGIN:
12406 case PM_TOKEN_KEYWORD_DO:
12407 case PM_TOKEN_KEYWORD_DO_BLOCK:
12408 case PM_TOKEN_KEYWORD_DO_LOOP:
12409 case PM_TOKEN_KEYWORD_END:
12410 case PM_TOKEN_KEYWORD_ELSE:
12411 case PM_TOKEN_KEYWORD_ELSIF:
12412 case PM_TOKEN_KEYWORD_ENSURE:
12413 case PM_TOKEN_KEYWORD_THEN:
12414 case PM_TOKEN_KEYWORD_RESCUE:
12415 case PM_TOKEN_KEYWORD_WHEN:
12416 case PM_TOKEN_NEWLINE:
12417 case PM_TOKEN_PARENTHESIS_RIGHT:
12418 case PM_TOKEN_SEMICOLON:
12419 // The reason we need this short-circuit is because we're using the
12420 // binding powers table to tell us if the subsequent token could
12421 // potentially be the start of an expression. If there _is_ a binding
12422 // power for one of these tokens, then we should remove it from this list
12423 // and let it be handled by the default case below.
12424 assert(pm_binding_powers[type].left == PM_BINDING_POWER_UNSET);
12425 return false;
12426 case PM_TOKEN_UAMPERSAND:
12427 // This is a special case because this unary operator cannot appear
12428 // as a general operator, it only appears in certain circumstances.
12429 return false;
12430 case PM_TOKEN_UCOLON_COLON:
12431 case PM_TOKEN_UMINUS:
12432 case PM_TOKEN_UMINUS_NUM:
12433 case PM_TOKEN_UPLUS:
12434 case PM_TOKEN_BANG:
12435 case PM_TOKEN_TILDE:
12436 case PM_TOKEN_UDOT_DOT:
12437 case PM_TOKEN_UDOT_DOT_DOT:
12438 // These unary tokens actually do have binding power associated with them
12439 // so that we can correctly place them into the precedence order. But we
12440 // want them to be marked as beginning an expression, so we need to
12441 // special case them here.
12442 return true;
12443 default:
12444 return pm_binding_powers[type].left == PM_BINDING_POWER_UNSET;
12445 }
12446}
12447
12452static pm_node_t *
12453parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
12454 if (accept1(parser, PM_TOKEN_USTAR)) {
12455 pm_token_t operator = parser->previous;
12456 pm_node_t *expression = parse_value_expression(parser, binding_power, (uint8_t) (flags & PM_PARSE_ACCEPTS_DO_BLOCK), PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
12457 return UP(pm_splat_node_create(parser, &operator, expression));
12458 }
12459
12460 return parse_value_expression(parser, binding_power, flags, diag_id, depth);
12461}
12462
12463static bool
12464pm_node_unreference_each(const pm_node_t *node, void *data) {
12465 switch (PM_NODE_TYPE(node)) {
12466 /* When we are about to destroy a set of nodes that could potentially
12467 * contain block exits for the current scope, we need to check if they
12468 * are contained in the list of block exits and remove them if they are.
12469 */
12470 case PM_BREAK_NODE:
12471 case PM_NEXT_NODE:
12472 case PM_REDO_NODE: {
12473 pm_parser_t *parser = (pm_parser_t *) data;
12474 size_t index = 0;
12475
12476 while (index < parser->current_block_exits->size) {
12477 pm_node_t *block_exit = parser->current_block_exits->nodes[index];
12478
12479 if (block_exit == node) {
12480 if (index + 1 < parser->current_block_exits->size) {
12481 memmove(
12482 &parser->current_block_exits->nodes[index],
12483 &parser->current_block_exits->nodes[index + 1],
12484 (parser->current_block_exits->size - index - 1) * sizeof(pm_node_t *)
12485 );
12486 }
12487 parser->current_block_exits->size--;
12488
12489 /* Note returning true here because these nodes could have
12490 * arguments that are themselves block exits. */
12491 return true;
12492 }
12493
12494 index++;
12495 }
12496
12497 return true;
12498 }
12499 /* When an implicit local variable is written to or targeted, it becomes
12500 * a regular, named local variable. This branch removes it from the list
12501 * of implicit parameters when that happens. */
12502 case PM_LOCAL_VARIABLE_READ_NODE:
12503 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12504 pm_parser_t *parser = (pm_parser_t *) data;
12505 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
12506
12507 for (size_t index = 0; index < implicit_parameters->size; index++) {
12508 if (implicit_parameters->nodes[index] == node) {
12509 /* If the node is not the last one in the list, we need to
12510 * shift the remaining nodes down to fill the gap. This is
12511 * extremely unlikely to happen. */
12512 if (index != implicit_parameters->size - 1) {
12513 memmove(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
12514 }
12515
12516 implicit_parameters->size--;
12517 break;
12518 }
12519 }
12520
12521 return false;
12522 }
12523 default:
12524 return true;
12525 }
12526}
12527
12533static void
12534pm_node_unreference(pm_parser_t *parser, const pm_node_t *node) {
12535 pm_visit_node(node, pm_node_unreference_each, parser);
12536}
12537
12542static void
12543parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
12544 // The method name needs to change. If we previously had
12545 // foo, we now need foo=. In this case we'll allocate a new
12546 // owned string, copy the previous method name in, and
12547 // append an =.
12548 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
12549 size_t length = constant->length;
12550 uint8_t *name = xcalloc(length + 1, sizeof(uint8_t));
12551 if (name == NULL) return;
12552
12553 memcpy(name, constant->start, length);
12554 name[length] = '=';
12555
12556 // Now switch the name to the new string.
12557 // This silences clang analyzer warning about leak of memory pointed by `name`.
12558 // NOLINTNEXTLINE(clang-analyzer-*)
12559 *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
12560}
12561
12568static pm_node_t *
12569parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
12570 switch (PM_NODE_TYPE(target)) {
12571 case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
12572 case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
12573 case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
12574 case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
12575 case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
12576 case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
12577 case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
12578 default: break;
12579 }
12580
12581 pm_constant_id_t name = pm_parser_constant_id_raw(parser, parser->start + PM_NODE_START(target), parser->start + PM_NODE_END(target));
12582 pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
12583
12584 return UP(result);
12585}
12586
12595static pm_node_t *
12596parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
12597 switch (PM_NODE_TYPE(target)) {
12598 case PM_MISSING_NODE:
12599 return target;
12600 case PM_SOURCE_ENCODING_NODE:
12601 case PM_FALSE_NODE:
12602 case PM_SOURCE_FILE_NODE:
12603 case PM_SOURCE_LINE_NODE:
12604 case PM_NIL_NODE:
12605 case PM_SELF_NODE:
12606 case PM_TRUE_NODE: {
12607 // In these special cases, we have specific error messages and we
12608 // will replace them with local variable writes.
12609 return parse_unwriteable_target(parser, target);
12610 }
12611 case PM_CLASS_VARIABLE_READ_NODE:
12613 target->type = PM_CLASS_VARIABLE_TARGET_NODE;
12614 return target;
12615 case PM_CONSTANT_PATH_NODE:
12616 if (context_def_p(parser)) {
12617 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
12618 }
12619
12621 target->type = PM_CONSTANT_PATH_TARGET_NODE;
12622
12623 return target;
12624 case PM_CONSTANT_READ_NODE:
12625 if (context_def_p(parser)) {
12626 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
12627 }
12628
12629 assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
12630 target->type = PM_CONSTANT_TARGET_NODE;
12631
12632 return target;
12633 case PM_BACK_REFERENCE_READ_NODE:
12634 case PM_NUMBERED_REFERENCE_READ_NODE:
12635 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
12636 return target;
12637 case PM_GLOBAL_VARIABLE_READ_NODE:
12639 target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
12640 return target;
12641 case PM_LOCAL_VARIABLE_READ_NODE: {
12642 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(target), PM_NODE_LENGTH(target))) {
12643 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(target), PM_NODE_LENGTH(target), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(target));
12644 pm_node_unreference(parser, target);
12645 }
12646
12647 const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
12648 uint32_t name = cast->name;
12649 uint32_t depth = cast->depth;
12650 pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
12651
12653 target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
12654
12655 return target;
12656 }
12657 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12658 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
12659 pm_node_t *node = UP(pm_local_variable_target_node_create(parser, &target->location, name, 0));
12660
12661 pm_node_unreference(parser, target);
12662
12663 return node;
12664 }
12665 case PM_INSTANCE_VARIABLE_READ_NODE:
12667 target->type = PM_INSTANCE_VARIABLE_TARGET_NODE;
12668 return target;
12669 case PM_MULTI_TARGET_NODE:
12670 if (splat_parent) {
12671 // Multi target is not accepted in all positions. If this is one
12672 // of them, then we need to add an error.
12673 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
12674 }
12675
12676 return target;
12677 case PM_SPLAT_NODE: {
12678 pm_splat_node_t *splat = (pm_splat_node_t *) target;
12679
12680 if (splat->expression != NULL) {
12681 splat->expression = parse_target(parser, splat->expression, multiple, true);
12682 }
12683
12684 return UP(splat);
12685 }
12686 case PM_CALL_NODE: {
12687 pm_call_node_t *call = (pm_call_node_t *) target;
12688
12689 // If we have no arguments to the call node and we need this to be a
12690 // target then this is either a method call or a local variable
12691 // write.
12692 if (
12693 (call->message_loc.length > 0) &&
12694 (parser->start[call->message_loc.start + call->message_loc.length - 1] != '!') &&
12695 (parser->start[call->message_loc.start + call->message_loc.length - 1] != '?') &&
12696 (call->opening_loc.length == 0) &&
12697 (call->arguments == NULL) &&
12698 (call->block == NULL)
12699 ) {
12700 if (call->receiver == NULL) {
12701 // When we get here, we have a local variable write, because it
12702 // was previously marked as a method call but now we have an =.
12703 // This looks like:
12704 //
12705 // foo = 1
12706 //
12707 // When it was parsed in the prefix position, foo was seen as a
12708 // method call with no receiver and no arguments. Now we have an
12709 // =, so we know it's a local variable write.
12710 pm_location_t message_loc = call->message_loc;
12711 pm_constant_id_t name = pm_parser_local_add_location(parser, &message_loc, 0);
12712
12713 return UP(pm_local_variable_target_node_create(parser, &message_loc, name, 0));
12714 }
12715
12716 if (peek_at(parser, parser->start + call->message_loc.start) == '_' || parser->encoding->alnum_char(parser->start + call->message_loc.start, (ptrdiff_t) call->message_loc.length)) {
12717 if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
12718 pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
12719 }
12720
12721 parse_write_name(parser, &call->name);
12722 return UP(pm_call_target_node_create(parser, call));
12723 }
12724 }
12725
12726 // If there is no call operator and the message is "[]" then this is
12727 // an aref expression, and we can transform it into an aset
12728 // expression.
12729 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
12730 return UP(pm_index_target_node_create(parser, call));
12731 }
12732 }
12734 default:
12735 // In this case we have a node that we don't know how to convert
12736 // into a target. We need to treat it as an error. For now, we'll
12737 // mark it as an error and just skip right past it.
12738 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
12739 return target;
12740 }
12741}
12742
12747static pm_node_t *
12748parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
12749 pm_node_t *result = parse_target(parser, target, multiple, false);
12750
12751 // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
12752 // parens after the targets.
12753 if (
12754 !match1(parser, PM_TOKEN_EQUAL) &&
12755 !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
12756 !(context_p(parser, PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
12757 ) {
12758 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
12759 }
12760
12761 return result;
12762}
12763
12768static pm_node_t *
12769parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
12770 pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
12771
12772 if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
12773 return UP(pm_shareable_constant_node_create(parser, write, shareable_constant));
12774 }
12775
12776 return write;
12777}
12778
12782static pm_node_t *
12783parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
12784 switch (PM_NODE_TYPE(target)) {
12785 case PM_MISSING_NODE:
12786 return target;
12787 case PM_CLASS_VARIABLE_READ_NODE: {
12788 pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
12789 return UP(node);
12790 }
12791 case PM_CONSTANT_PATH_NODE: {
12792 pm_node_t *node = UP(pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value));
12793
12794 if (context_def_p(parser)) {
12795 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
12796 }
12797
12798 return parse_shareable_constant_write(parser, node);
12799 }
12800 case PM_CONSTANT_READ_NODE: {
12801 pm_node_t *node = UP(pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value));
12802
12803 if (context_def_p(parser)) {
12804 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
12805 }
12806
12807 return parse_shareable_constant_write(parser, node);
12808 }
12809 case PM_BACK_REFERENCE_READ_NODE:
12810 case PM_NUMBERED_REFERENCE_READ_NODE:
12811 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
12813 case PM_GLOBAL_VARIABLE_READ_NODE: {
12814 pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
12815 return UP(node);
12816 }
12817 case PM_LOCAL_VARIABLE_READ_NODE: {
12819
12820 pm_location_t location = target->location;
12821 pm_constant_id_t name = local_read->name;
12822 uint32_t depth = local_read->depth;
12823 pm_scope_t *scope = pm_parser_scope_find(parser, depth);
12824
12825 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(target), PM_NODE_LENGTH(target))) {
12826 pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
12827 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(target), PM_NODE_LENGTH(target), diag_id, parser->start + PM_NODE_START(target));
12828 pm_node_unreference(parser, target);
12829 }
12830
12831 pm_locals_unread(&scope->locals, name);
12832
12833 return UP(pm_local_variable_write_node_create(parser, name, depth, value, &location, operator));
12834 }
12835 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12836 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
12837 pm_node_t *node = UP(pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator));
12838
12839 pm_node_unreference(parser, target);
12840
12841 return node;
12842 }
12843 case PM_INSTANCE_VARIABLE_READ_NODE: {
12844 pm_node_t *write_node = UP(pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value));
12845 return write_node;
12846 }
12847 case PM_MULTI_TARGET_NODE:
12848 return UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value));
12849 case PM_SPLAT_NODE: {
12850 pm_splat_node_t *splat = (pm_splat_node_t *) target;
12851
12852 if (splat->expression != NULL) {
12853 splat->expression = parse_write(parser, splat->expression, operator, value);
12854 }
12855
12856 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
12857 pm_multi_target_node_targets_append(parser, multi_target, UP(splat));
12858
12859 return UP(pm_multi_write_node_create(parser, multi_target, operator, value));
12860 }
12861 case PM_CALL_NODE: {
12862 pm_call_node_t *call = (pm_call_node_t *) target;
12863
12864 // If we have no arguments to the call node and we need this to be a
12865 // target then this is either a method call or a local variable
12866 // write.
12867 if (
12868 (call->message_loc.length > 0) &&
12869 (parser->start[call->message_loc.start + call->message_loc.length - 1] != '!') &&
12870 (parser->start[call->message_loc.start + call->message_loc.length - 1] != '?') &&
12871 (call->opening_loc.length == 0) &&
12872 (call->arguments == NULL) &&
12873 (call->block == NULL)
12874 ) {
12875 if (call->receiver == NULL) {
12876 // When we get here, we have a local variable write, because it
12877 // was previously marked as a method call but now we have an =.
12878 // This looks like:
12879 //
12880 // foo = 1
12881 //
12882 // When it was parsed in the prefix position, foo was seen as a
12883 // method call with no receiver and no arguments. Now we have an
12884 // =, so we know it's a local variable write.
12885 pm_location_t message_loc = call->message_loc;
12886
12887 pm_refute_numbered_parameter(parser, message_loc.start, message_loc.length);
12888 pm_parser_local_add_location(parser, &message_loc, 0);
12889
12890 pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, parser->start + PM_LOCATION_START(&message_loc), parser->start + PM_LOCATION_END(&message_loc));
12891 target = UP(pm_local_variable_write_node_create(parser, constant_id, 0, value, &message_loc, operator));
12892
12893 return target;
12894 }
12895
12896 if (char_is_identifier_start(parser, parser->start + call->message_loc.start, (ptrdiff_t) call->message_loc.length)) {
12897 // When we get here, we have a method call, because it was
12898 // previously marked as a method call but now we have an =. This
12899 // looks like:
12900 //
12901 // foo.bar = 1
12902 //
12903 // When it was parsed in the prefix position, foo.bar was seen as a
12904 // method call with no arguments. Now we have an =, so we know it's
12905 // a method call with an argument. In this case we will create the
12906 // arguments node, parse the argument, and add it to the list.
12907 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
12908 call->arguments = arguments;
12909
12910 pm_arguments_node_arguments_append(parser->arena, arguments, value);
12911 PM_NODE_LENGTH_SET_NODE(call, arguments);
12912 call->equal_loc = TOK2LOC(parser, operator);
12913
12914 parse_write_name(parser, &call->name);
12915 pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
12916
12917 return UP(call);
12918 }
12919 }
12920
12921 // If there is no call operator and the message is "[]" then this is
12922 // an aref expression, and we can transform it into an aset
12923 // expression.
12924 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
12925 if (call->arguments == NULL) {
12926 call->arguments = pm_arguments_node_create(parser);
12927 }
12928
12929 pm_arguments_node_arguments_append(parser->arena, call->arguments, value);
12930 PM_NODE_LENGTH_SET_NODE(target, value);
12931
12932 // Replace the name with "[]=".
12933 call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
12934 call->equal_loc = TOK2LOC(parser, operator);
12935
12936 // Ensure that the arguments for []= don't contain keywords
12937 pm_index_arguments_check(parser, call->arguments, call->block);
12938 pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
12939
12940 return target;
12941 }
12942
12943 // If there are arguments on the call node, then it can't be a
12944 // method call ending with = or a local variable write, so it must
12945 // be a syntax error. In this case we'll fall through to our default
12946 // handling. We need to free the value that we parsed because there
12947 // is no way for us to attach it to the tree at this point.
12948 //
12949 // Since it is possible for the value to contain an implicit
12950 // parameter somewhere in its subtree, we need to walk it and remove
12951 // any implicit parameters from the list of implicit parameters for
12952 // the current scope.
12953 pm_node_unreference(parser, value);
12954 }
12956 default:
12957 // In this case we have a node that we don't know how to convert into a
12958 // target. We need to treat it as an error. For now, we'll mark it as an
12959 // error and just skip right past it.
12960 pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
12961 return target;
12962 }
12963}
12964
12971static pm_node_t *
12972parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t *equals, pm_node_t *value) {
12973 switch (PM_NODE_TYPE(target)) {
12974 case PM_SOURCE_ENCODING_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
12975 case PM_FALSE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
12976 case PM_SOURCE_FILE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
12977 case PM_SOURCE_LINE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
12978 case PM_NIL_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
12979 case PM_SELF_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
12980 case PM_TRUE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
12981 default: break;
12982 }
12983
12984 pm_constant_id_t name = pm_parser_local_add_location(parser, &target->location, 1);
12985 pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
12986
12987 return UP(result);
12988}
12989
13000static pm_node_t *
13001parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13002 bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13003
13004 pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13005 pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
13006
13007 while (accept1(parser, PM_TOKEN_COMMA)) {
13008 if (accept1(parser, PM_TOKEN_USTAR)) {
13009 // Here we have a splat operator. It can have a name or be
13010 // anonymous. It can be the final target or be in the middle if
13011 // there haven't been any others yet.
13012 if (has_rest) {
13013 pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
13014 }
13015
13016 pm_token_t star_operator = parser->previous;
13017 pm_node_t *name = NULL;
13018
13019 if (token_begins_expression_p(parser->current.type)) {
13020 name = parse_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13021 name = parse_target(parser, name, true, true);
13022 }
13023
13024 pm_node_t *splat = UP(pm_splat_node_create(parser, &star_operator, name));
13025 pm_multi_target_node_targets_append(parser, result, splat);
13026 has_rest = true;
13027 } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13028 context_push(parser, PM_CONTEXT_MULTI_TARGET);
13029 pm_node_t *target = parse_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13030 target = parse_target(parser, target, true, false);
13031
13032 pm_multi_target_node_targets_append(parser, result, target);
13033 context_pop(parser);
13034 } else if (token_begins_expression_p(parser->current.type)) {
13035 pm_node_t *target = parse_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13036 target = parse_target(parser, target, true, false);
13037
13038 pm_multi_target_node_targets_append(parser, result, target);
13039 } else if (!match1(parser, PM_TOKEN_EOF)) {
13040 // If we get here, then we have a trailing , in a multi target node.
13041 // We'll add an implicit rest node to represent this.
13042 pm_node_t *rest = UP(pm_implicit_rest_node_create(parser, &parser->previous));
13043 pm_multi_target_node_targets_append(parser, result, rest);
13044 break;
13045 }
13046 }
13047
13048 return UP(result);
13049}
13050
13055static pm_node_t *
13056parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13057 pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
13058 accept1(parser, PM_TOKEN_NEWLINE);
13059
13060 // Ensure that we have either an = or a ) after the targets.
13061 if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
13062 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13063 }
13064
13065 return result;
13066}
13067
13071static pm_statements_node_t *
13072parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
13073 // First, skip past any optional terminators that might be at the beginning
13074 // of the statements.
13075 while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13076
13077 // If we have a terminator, then we can just return NULL.
13078 if (context_terminator(context, &parser->current)) return NULL;
13079
13080 pm_statements_node_t *statements = pm_statements_node_create(parser);
13081
13082 // At this point we know we have at least one statement, and that it
13083 // immediately follows the current token.
13084 context_push(parser, context);
13085
13086 while (true) {
13087 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
13088 pm_statements_node_body_append(parser, statements, node, true);
13089
13090 // If we're recovering from a syntax error, then we need to stop parsing
13091 // the statements now.
13092 if (parser->recovering) {
13093 // If this is the level of context where the recovery has happened,
13094 // then we can mark the parser as done recovering.
13095 if (context_terminator(context, &parser->current)) parser->recovering = false;
13096 break;
13097 }
13098
13099 // If we have a terminator, then we will parse all consecutive
13100 // terminators and then continue parsing the statements list.
13101 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13102 // If we have a terminator, then we will continue parsing the
13103 // statements list.
13104 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13105 if (context_terminator(context, &parser->current)) break;
13106
13107 // Now we can continue parsing the list of statements.
13108 continue;
13109 }
13110
13111 // At this point we have a list of statements that are not terminated by
13112 // a newline or semicolon. At this point we need to check if we're at
13113 // the end of the statements list. If we are, then we should break out
13114 // of the loop.
13115 if (context_terminator(context, &parser->current)) break;
13116
13117 // At this point, we have a syntax error, because the statement was not
13118 // terminated by a newline or semicolon, and we're not at the end of the
13119 // statements list. Ideally we should scan forward to determine if we
13120 // should insert a missing terminator or break out of parsing the
13121 // statements list at this point.
13122 //
13123 // We don't have that yet, so instead we'll do a more naive approach. If
13124 // we were unable to parse an expression, then we will skip past this
13125 // token and continue parsing the statements list. Otherwise we'll add
13126 // an error and continue parsing the statements list.
13127 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
13128 parser_lex(parser);
13129
13130 // If we are at the end of the file, then we need to stop parsing
13131 // the statements entirely at this point. Mark the parser as
13132 // recovering, as we know that EOF closes the top-level context, and
13133 // then break out of the loop.
13134 if (match1(parser, PM_TOKEN_EOF)) {
13135 parser->recovering = true;
13136 break;
13137 }
13138
13139 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13140 if (context_terminator(context, &parser->current)) break;
13141 } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
13142 // This is an inlined version of accept1 because the error that we
13143 // want to add has varargs. If this happens again, we should
13144 // probably extract a helper function.
13145 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
13146 parser->previous.start = parser->previous.end;
13147 parser->previous.type = 0;
13148 }
13149 }
13150
13151 context_pop(parser);
13152
13153 bool last_value = true;
13154 switch (context) {
13157 last_value = false;
13158 break;
13159 default:
13160 break;
13161 }
13162 pm_void_statements_check(parser, statements, last_value);
13163
13164 return statements;
13165}
13166
13171static void
13172pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13173 const pm_node_t *duplicated = pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, literals, node, true);
13174
13175 if (duplicated != NULL) {
13176 pm_buffer_t buffer = { 0 };
13177 pm_static_literal_inspect(&buffer, &parser->line_offsets, parser->start, parser->start_line, parser->encoding->name, duplicated);
13178
13179 pm_diagnostic_list_append_format(
13180 &parser->warning_list,
13181 duplicated->location.start,
13182 duplicated->location.length,
13183 PM_WARN_DUPLICATED_HASH_KEY,
13184 (int) pm_buffer_length(&buffer),
13185 pm_buffer_value(&buffer),
13186 pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(node), parser->start_line).line
13187 );
13188
13189 pm_buffer_free(&buffer);
13190 }
13191}
13192
13197static void
13198pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13199 pm_node_t *previous;
13200
13201 if ((previous = pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, literals, node, false)) != NULL) {
13202 pm_diagnostic_list_append_format(
13203 &parser->warning_list,
13204 PM_NODE_START(node),
13205 PM_NODE_LENGTH(node),
13206 PM_WARN_DUPLICATED_WHEN_CLAUSE,
13207 pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(node), parser->start_line).line,
13208 pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(previous), parser->start_line).line
13209 );
13210 }
13211}
13212
13216static bool
13217parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node, uint16_t depth) {
13218 assert(PM_NODE_TYPE_P(node, PM_HASH_NODE) || PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE));
13219 bool contains_keyword_splat = false;
13220
13221 while (true) {
13222 pm_node_t *element;
13223
13224 switch (parser->current.type) {
13225 case PM_TOKEN_USTAR_STAR: {
13226 parser_lex(parser);
13227 pm_token_t operator = parser->previous;
13228 pm_node_t *value = NULL;
13229
13230 if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
13231 // If we're about to parse a nested hash that is being
13232 // pushed into this hash directly with **, then we want the
13233 // inner hash to share the static literals with the outer
13234 // hash.
13235 parser->current_hash_keys = literals;
13236 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
13237 } else if (token_begins_expression_p(parser->current.type)) {
13238 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
13239 } else {
13240 pm_parser_scope_forwarding_keywords_check(parser, &operator);
13241 }
13242
13243 element = UP(pm_assoc_splat_node_create(parser, value, &operator));
13244 contains_keyword_splat = true;
13245 break;
13246 }
13247 case PM_TOKEN_LABEL: {
13248 pm_token_t label = parser->current;
13249 parser_lex(parser);
13250
13251 pm_node_t *key = UP(pm_symbol_node_label_create(parser, &label));
13252 pm_hash_key_static_literals_add(parser, literals, key);
13253
13254 pm_node_t *value = NULL;
13255
13256 if (token_begins_expression_p(parser->current.type)) {
13257 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
13258 } else {
13259 if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
13260 pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
13261 value = UP(pm_constant_read_node_create(parser, &constant));
13262 } else {
13263 int depth = -1;
13264 pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
13265
13266 if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
13267 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
13268 } else {
13269 depth = pm_parser_local_depth(parser, &identifier);
13270 }
13271
13272 if (depth == -1) {
13273 value = UP(pm_call_node_variable_call_create(parser, &identifier));
13274 } else {
13275 value = UP(pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth));
13276 }
13277 }
13278
13279 value->location.length++;
13280 value = UP(pm_implicit_node_create(parser, value));
13281 }
13282
13283 element = UP(pm_assoc_node_create(parser, key, NULL, value));
13284 break;
13285 }
13286 default: {
13287 pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK | PM_PARSE_ACCEPTS_LABEL, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
13288
13289 // Hash keys that are strings are automatically frozen. We will
13290 // mark that here.
13291 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
13292 pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
13293 }
13294
13295 pm_hash_key_static_literals_add(parser, literals, key);
13296
13297 pm_token_t operator = { 0 };
13298 if (!pm_symbol_node_label_p(parser, key)) {
13299 expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
13300 operator = parser->previous;
13301 }
13302
13303 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
13304 element = UP(pm_assoc_node_create(parser, key, NTOK2PTR(operator), value));
13305 break;
13306 }
13307 }
13308
13309 if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
13310 pm_hash_node_elements_append(parser->arena, (pm_hash_node_t *) node, element);
13311 } else {
13312 pm_keyword_hash_node_elements_append(parser->arena, (pm_keyword_hash_node_t *) node, element);
13313 }
13314
13315 // If there's no comma after the element, then we're done.
13316 if (!accept1(parser, PM_TOKEN_COMMA)) break;
13317
13318 // If the next element starts with a label or a **, then we know we have
13319 // another element in the hash, so we'll continue parsing.
13320 if (match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)) continue;
13321
13322 // Otherwise we need to check if the subsequent token begins an expression.
13323 // If it does, then we'll continue parsing.
13324 if (token_begins_expression_p(parser->current.type)) continue;
13325
13326 // Otherwise by default we will exit out of this loop.
13327 break;
13328 }
13329
13330 return contains_keyword_splat;
13331}
13332
13333static inline bool
13334argument_allowed_for_bare_hash(pm_parser_t *parser, pm_node_t *argument) {
13335 if (pm_symbol_node_label_p(parser, argument)) {
13336 return true;
13337 }
13338
13339 switch (PM_NODE_TYPE(argument)) {
13340 case PM_CALL_NODE: {
13341 pm_call_node_t *cast = (pm_call_node_t *) argument;
13342 if (cast->opening_loc.length == 0 && cast->arguments != NULL) {
13343 if (PM_NODE_FLAG_P(cast->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS | PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
13344 return false;
13345 }
13346 if (cast->block != NULL) {
13347 return false;
13348 }
13349 }
13350 break;
13351 }
13352 default: break;
13353 }
13354 return accept1(parser, PM_TOKEN_EQUAL_GREATER);
13355}
13356
13360static inline void
13361parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
13362 if (arguments->arguments == NULL) {
13363 arguments->arguments = pm_arguments_node_create(parser);
13364 }
13365
13366 pm_arguments_node_arguments_append(parser->arena, arguments->arguments, argument);
13367}
13368
13372static void
13373parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint8_t flags, uint16_t depth) {
13374 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
13375
13376 // First we need to check if the next token is one that could be the start
13377 // of an argument. If it's not, then we can just return.
13378 if (
13379 match2(parser, terminator, PM_TOKEN_EOF) ||
13380 (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
13381 context_terminator(parser->current_context->context, &parser->current)
13382 ) {
13383 return;
13384 }
13385
13386 bool parsed_first_argument = false;
13387 bool parsed_bare_hash = false;
13388 bool parsed_block_argument = false;
13389 bool parsed_forwarding_arguments = false;
13390
13391 while (!match1(parser, PM_TOKEN_EOF)) {
13392 if (parsed_forwarding_arguments) {
13393 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
13394 }
13395
13396 pm_node_t *argument = NULL;
13397
13398 switch (parser->current.type) {
13399 case PM_TOKEN_USTAR_STAR:
13400 case PM_TOKEN_LABEL: {
13401 if (parsed_bare_hash) {
13402 pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
13403 }
13404
13405 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
13406 argument = UP(hash);
13407
13408 pm_static_literals_t hash_keys = { 0 };
13409 bool contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(hash), (uint16_t) (depth + 1));
13410
13411 parse_arguments_append(parser, arguments, argument);
13412
13413 pm_node_flags_t node_flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13414 if (contains_keyword_splat) node_flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13415 pm_node_flag_set(UP(arguments->arguments), node_flags);
13416
13417 pm_static_literals_free(&hash_keys);
13418 parsed_bare_hash = true;
13419
13420 break;
13421 }
13422 case PM_TOKEN_UAMPERSAND: {
13423 parser_lex(parser);
13424 pm_token_t operator = parser->previous;
13425 pm_node_t *expression = NULL;
13426
13427 if (token_begins_expression_p(parser->current.type)) {
13428 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
13429 } else {
13430 pm_parser_scope_forwarding_block_check(parser, &operator);
13431 }
13432
13433 argument = UP(pm_block_argument_node_create(parser, &operator, expression));
13434 if (parsed_block_argument) {
13435 parse_arguments_append(parser, arguments, argument);
13436 } else {
13437 arguments->block = argument;
13438 }
13439
13440 if (match1(parser, PM_TOKEN_COMMA)) {
13441 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
13442 }
13443
13444 parsed_block_argument = true;
13445 break;
13446 }
13447 case PM_TOKEN_USTAR: {
13448 parser_lex(parser);
13449 pm_token_t operator = parser->previous;
13450
13451 if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
13452 pm_parser_scope_forwarding_positionals_check(parser, &operator);
13453 argument = UP(pm_splat_node_create(parser, &operator, NULL));
13454 if (parsed_bare_hash) {
13455 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
13456 }
13457 } else {
13458 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
13459
13460 if (parsed_bare_hash) {
13461 pm_parser_err(parser, PM_TOKEN_START(parser, &operator), PM_NODE_END(expression) - PM_TOKEN_START(parser, &operator), PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
13462 }
13463
13464 argument = UP(pm_splat_node_create(parser, &operator, expression));
13465 }
13466
13467 parse_arguments_append(parser, arguments, argument);
13468 break;
13469 }
13470 case PM_TOKEN_UDOT_DOT_DOT: {
13471 if (accepts_forwarding) {
13472 parser_lex(parser);
13473
13474 if (token_begins_expression_p(parser->current.type)) {
13475 // If the token begins an expression then this ... was
13476 // not actually argument forwarding but was instead a
13477 // range.
13478 pm_token_t operator = parser->previous;
13479 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
13480
13481 // If we parse a range, we need to validate that we
13482 // didn't accidentally violate the nonassoc rules of the
13483 // ... operator.
13484 if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
13485 pm_range_node_t *range = (pm_range_node_t *) right;
13486 pm_parser_err(parser, range->operator_loc.start, range->operator_loc.length, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
13487 }
13488
13489 argument = UP(pm_range_node_create(parser, NULL, &operator, right));
13490 } else {
13491 pm_parser_scope_forwarding_all_check(parser, &parser->previous);
13492 if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
13493 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
13494 }
13495
13496 argument = UP(pm_forwarding_arguments_node_create(parser, &parser->previous));
13497 parse_arguments_append(parser, arguments, argument);
13498 pm_node_flag_set(UP(arguments->arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
13499 arguments->has_forwarding = true;
13500 parsed_forwarding_arguments = true;
13501 break;
13502 }
13503 }
13504 }
13506 default: {
13507 if (argument == NULL) {
13508 argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (!parsed_first_argument ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0u) | PM_PARSE_ACCEPTS_LABEL, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
13509 }
13510
13511 bool contains_keywords = false;
13512 bool contains_keyword_splat = false;
13513
13514 if (argument_allowed_for_bare_hash(parser, argument)) {
13515 if (parsed_bare_hash) {
13516 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
13517 }
13518
13519 pm_token_t operator = { 0 };
13520 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
13521 operator = parser->previous;
13522 }
13523
13524 pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
13525 contains_keywords = true;
13526
13527 // Create the set of static literals for this hash.
13528 pm_static_literals_t hash_keys = { 0 };
13529 pm_hash_key_static_literals_add(parser, &hash_keys, argument);
13530
13531 // Finish parsing the one we are part way through.
13532 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
13533 argument = UP(pm_assoc_node_create(parser, argument, NTOK2PTR(operator), value));
13534
13535 pm_keyword_hash_node_elements_append(parser->arena, bare_hash, argument);
13536 argument = UP(bare_hash);
13537
13538 // Then parse more if we have a comma
13539 if (accept1(parser, PM_TOKEN_COMMA) && (
13540 token_begins_expression_p(parser->current.type) ||
13541 match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
13542 )) {
13543 contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(bare_hash), (uint16_t) (depth + 1));
13544 }
13545
13546 pm_static_literals_free(&hash_keys);
13547 parsed_bare_hash = true;
13548 }
13549
13550 parse_arguments_append(parser, arguments, argument);
13551
13552 pm_node_flags_t node_flags = 0;
13553 if (contains_keywords) node_flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13554 if (contains_keyword_splat) node_flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13555 pm_node_flag_set(UP(arguments->arguments), node_flags);
13556
13557 break;
13558 }
13559 }
13560
13561 parsed_first_argument = true;
13562
13563 // If parsing the argument failed, we need to stop parsing arguments.
13564 if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
13565
13566 // If the terminator of these arguments is not EOF, then we have a
13567 // specific token we're looking for. In that case we can accept a
13568 // newline here because it is not functioning as a statement terminator.
13569 bool accepted_newline = false;
13570 if (terminator != PM_TOKEN_EOF) {
13571 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
13572 }
13573
13574 if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
13575 // If we previously were on a comma and we just parsed a bare hash,
13576 // then we want to continue parsing arguments. This is because the
13577 // comma was grabbed up by the hash parser.
13578 } else if (accept1(parser, PM_TOKEN_COMMA)) {
13579 // If there was a comma, then we need to check if we also accepted a
13580 // newline. If we did, then this is a syntax error.
13581 if (accepted_newline) {
13582 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
13583 }
13584
13585 // If this is a command call and an argument takes a block,
13586 // there can be no further arguments. For example,
13587 // `foo(bar 1 do end, 2)` should be rejected.
13588 if (PM_NODE_TYPE_P(argument, PM_CALL_NODE)) {
13589 pm_call_node_t *call = (pm_call_node_t *) argument;
13590 if (call->opening_loc.length == 0 && call->arguments != NULL && call->block != NULL) {
13591 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
13592 break;
13593 }
13594 }
13595 } else {
13596 // If there is no comma at the end of the argument list then we're
13597 // done parsing arguments and can break out of this loop.
13598 break;
13599 }
13600
13601 // If we hit the terminator, then that means we have a trailing comma so
13602 // we can accept that output as well.
13603 if (match1(parser, terminator)) break;
13604 }
13605}
13606
13618parse_required_destructured_parameter(pm_parser_t *parser) {
13619 expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
13620
13621 pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
13622 pm_multi_target_node_opening_set(parser, node, &parser->previous);
13623
13624 do {
13625 pm_node_t *param;
13626
13627 // If we get here then we have a trailing comma, which isn't allowed in
13628 // the grammar. In other places, multi targets _do_ allow trailing
13629 // commas, so here we'll assume this is a mistake of the user not
13630 // knowing it's not allowed here.
13631 if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
13632 param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
13633 pm_multi_target_node_targets_append(parser, node, param);
13634 pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
13635 break;
13636 }
13637
13638 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13639 param = UP(parse_required_destructured_parameter(parser));
13640 } else if (accept1(parser, PM_TOKEN_USTAR)) {
13641 pm_token_t star = parser->previous;
13642 pm_node_t *value = NULL;
13643
13644 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
13645 pm_token_t name = parser->previous;
13646 value = UP(pm_required_parameter_node_create(parser, &name));
13647 if (pm_parser_parameter_name_check(parser, &name)) {
13648 pm_node_flag_set_repeated_parameter(value);
13649 }
13650 pm_parser_local_add_token(parser, &name, 1);
13651 }
13652
13653 param = UP(pm_splat_node_create(parser, &star, value));
13654 } else {
13655 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
13656 pm_token_t name = parser->previous;
13657
13658 param = UP(pm_required_parameter_node_create(parser, &name));
13659 if (pm_parser_parameter_name_check(parser, &name)) {
13660 pm_node_flag_set_repeated_parameter(param);
13661 }
13662 pm_parser_local_add_token(parser, &name, 1);
13663 }
13664
13665 pm_multi_target_node_targets_append(parser, node, param);
13666 } while (accept1(parser, PM_TOKEN_COMMA));
13667
13668 accept1(parser, PM_TOKEN_NEWLINE);
13669 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
13670 pm_multi_target_node_closing_set(parser, node, &parser->previous);
13671
13672 return node;
13673}
13674
13679typedef enum {
13680 PM_PARAMETERS_NO_CHANGE = 0, // Extra state for tokens that should not change the state
13681 PM_PARAMETERS_ORDER_NOTHING_AFTER = 1,
13682 PM_PARAMETERS_ORDER_KEYWORDS_REST,
13683 PM_PARAMETERS_ORDER_KEYWORDS,
13684 PM_PARAMETERS_ORDER_REST,
13685 PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
13686 PM_PARAMETERS_ORDER_OPTIONAL,
13687 PM_PARAMETERS_ORDER_NAMED,
13688 PM_PARAMETERS_ORDER_NONE,
13689} pm_parameters_order_t;
13690
13694static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
13695 [0] = PM_PARAMETERS_NO_CHANGE,
13696 [PM_TOKEN_UAMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
13697 [PM_TOKEN_AMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
13698 [PM_TOKEN_UDOT_DOT_DOT] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
13699 [PM_TOKEN_IDENTIFIER] = PM_PARAMETERS_ORDER_NAMED,
13700 [PM_TOKEN_PARENTHESIS_LEFT] = PM_PARAMETERS_ORDER_NAMED,
13701 [PM_TOKEN_EQUAL] = PM_PARAMETERS_ORDER_OPTIONAL,
13702 [PM_TOKEN_LABEL] = PM_PARAMETERS_ORDER_KEYWORDS,
13703 [PM_TOKEN_USTAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
13704 [PM_TOKEN_STAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
13705 [PM_TOKEN_USTAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST,
13706 [PM_TOKEN_STAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST
13707};
13708
13716static bool
13717update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
13718 pm_parameters_order_t state = parameters_ordering[token->type];
13719 if (state == PM_PARAMETERS_NO_CHANGE) return true;
13720
13721 // If we see another ordered argument after a optional argument
13722 // we only continue parsing ordered arguments until we stop seeing ordered arguments.
13723 if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
13724 *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
13725 return true;
13726 } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
13727 return true;
13728 }
13729
13730 if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
13731 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
13732 return false;
13733 } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
13734 pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
13735 return false;
13736 } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
13737 // We know what transition we failed on, so we can provide a better error here.
13738 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
13739 return false;
13740 }
13741
13742 if (state < *current) *current = state;
13743 return true;
13744}
13745
13746static inline void
13747parse_parameters_handle_trailing_comma(
13748 pm_parser_t *parser,
13749 pm_parameters_node_t *params,
13750 pm_parameters_order_t order,
13751 bool in_block,
13752 bool allows_trailing_comma
13753) {
13754 if (!allows_trailing_comma) {
13755 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
13756 return;
13757 }
13758
13759 if (in_block) {
13760 if (order >= PM_PARAMETERS_ORDER_NAMED) {
13761 // foo do |bar,|; end
13762 pm_node_t *param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
13763
13764 if (params->rest == NULL) {
13765 pm_parameters_node_rest_set(params, param);
13766 } else {
13767 pm_parser_err_node(parser, UP(param), PM_ERR_PARAMETER_SPLAT_MULTI);
13768 pm_parameters_node_posts_append(parser->arena, params, UP(param));
13769 }
13770 } else {
13771 // foo do |*bar,|; end
13772 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
13773 }
13774 } else {
13775 // https://bugs.ruby-lang.org/issues/19107
13776 // Allow `def foo(bar,); end`, `def foo(*bar,); end`, etc. but not `def foo(...,); end`
13777 if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1 || order == PM_PARAMETERS_ORDER_NOTHING_AFTER) {
13778 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
13779 }
13780 }
13781}
13782
13786static pm_parameters_node_t *
13787parse_parameters(
13788 pm_parser_t *parser,
13789 pm_binding_power_t binding_power,
13790 bool uses_parentheses,
13791 bool allows_trailing_comma,
13792 bool allows_forwarding_parameters,
13793 bool accepts_blocks_in_defaults,
13794 bool in_block,
13795 pm_diagnostic_id_t diag_id_forwarding,
13796 uint16_t depth
13797) {
13798 pm_do_loop_stack_push(parser, false);
13799
13800 pm_parameters_node_t *params = pm_parameters_node_create(parser);
13801 pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
13802
13803 while (true) {
13804 bool parsing = true;
13805
13806 switch (parser->current.type) {
13807 case PM_TOKEN_PARENTHESIS_LEFT: {
13808 update_parameter_state(parser, &parser->current, &order);
13809 pm_node_t *param = UP(parse_required_destructured_parameter(parser));
13810
13811 if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
13812 pm_parameters_node_requireds_append(parser->arena, params, param);
13813 } else {
13814 pm_parameters_node_posts_append(parser->arena, params, param);
13815 }
13816 break;
13817 }
13818 case PM_TOKEN_UAMPERSAND:
13819 case PM_TOKEN_AMPERSAND: {
13820 update_parameter_state(parser, &parser->current, &order);
13821 parser_lex(parser);
13822
13823 pm_token_t operator = parser->previous;
13824 pm_node_t *param;
13825
13826 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1 && accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
13827 param = (pm_node_t *) pm_no_block_parameter_node_create(parser, &operator, &parser->previous);
13828 } else {
13829 pm_token_t name = {0};
13830
13831 bool repeated = false;
13832 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
13833 name = parser->previous;
13834 repeated = pm_parser_parameter_name_check(parser, &name);
13835 pm_parser_local_add_token(parser, &name, 1);
13836 } else {
13837 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
13838 }
13839
13840 param = (pm_node_t *) pm_block_parameter_node_create(parser, NTOK2PTR(name), &operator);
13841 if (repeated) {
13842 pm_node_flag_set_repeated_parameter(param);
13843 }
13844 }
13845
13846 if (params->block == NULL) {
13847 pm_parameters_node_block_set(params, param);
13848 } else {
13849 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_BLOCK_MULTI);
13850 pm_parameters_node_posts_append(parser->arena, params, param);
13851 }
13852
13853 break;
13854 }
13855 case PM_TOKEN_UDOT_DOT_DOT: {
13856 if (!allows_forwarding_parameters) {
13857 pm_parser_err_current(parser, diag_id_forwarding);
13858 }
13859
13860 bool succeeded = update_parameter_state(parser, &parser->current, &order);
13861 parser_lex(parser);
13862
13863 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
13864 pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
13865
13866 if (params->keyword_rest != NULL) {
13867 // If we already have a keyword rest parameter, then we replace it with the
13868 // forwarding parameter and move the keyword rest parameter to the posts list.
13869 pm_node_t *keyword_rest = params->keyword_rest;
13870 pm_parameters_node_posts_append(parser->arena, params, keyword_rest);
13871 if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
13872 params->keyword_rest = NULL;
13873 }
13874
13875 pm_parameters_node_keyword_rest_set(params, UP(param));
13876 break;
13877 }
13878 case PM_TOKEN_CLASS_VARIABLE:
13879 case PM_TOKEN_IDENTIFIER:
13880 case PM_TOKEN_CONSTANT:
13881 case PM_TOKEN_INSTANCE_VARIABLE:
13882 case PM_TOKEN_GLOBAL_VARIABLE:
13883 case PM_TOKEN_METHOD_NAME: {
13884 parser_lex(parser);
13885 switch (parser->previous.type) {
13886 case PM_TOKEN_CONSTANT:
13887 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
13888 break;
13889 case PM_TOKEN_INSTANCE_VARIABLE:
13890 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
13891 break;
13892 case PM_TOKEN_GLOBAL_VARIABLE:
13893 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
13894 break;
13895 case PM_TOKEN_CLASS_VARIABLE:
13896 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
13897 break;
13898 case PM_TOKEN_METHOD_NAME:
13899 pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
13900 break;
13901 default: break;
13902 }
13903
13904 if (parser->current.type == PM_TOKEN_EQUAL) {
13905 update_parameter_state(parser, &parser->current, &order);
13906 } else {
13907 update_parameter_state(parser, &parser->previous, &order);
13908 }
13909
13910 pm_token_t name = parser->previous;
13911 bool repeated = pm_parser_parameter_name_check(parser, &name);
13912 pm_parser_local_add_token(parser, &name, 1);
13913
13914 if (match1(parser, PM_TOKEN_EQUAL)) {
13915 pm_token_t operator = parser->current;
13916 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
13917 parser_lex(parser);
13918
13919 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
13920 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
13921
13922 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
13923 pm_node_t *value = parse_value_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
13924 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
13925
13926 pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
13927
13928 if (repeated) {
13929 pm_node_flag_set_repeated_parameter(UP(param));
13930 }
13931 pm_parameters_node_optionals_append(parser->arena, params, param);
13932
13933 // If the value of the parameter increased the number of
13934 // reads of that parameter, then we need to warn that we
13935 // have a circular definition.
13936 if ((parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
13937 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &name, PM_ERR_PARAMETER_CIRCULAR);
13938 }
13939
13940 context_pop(parser);
13941
13942 // If parsing the value of the parameter resulted in error recovery,
13943 // then we can put a missing node in its place and stop parsing the
13944 // parameters entirely now.
13945 if (parser->recovering) {
13946 parsing = false;
13947 break;
13948 }
13949 } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
13950 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
13951 if (repeated) {
13952 pm_node_flag_set_repeated_parameter(UP(param));
13953 }
13954 pm_parameters_node_requireds_append(parser->arena, params, UP(param));
13955 } else {
13956 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
13957 if (repeated) {
13958 pm_node_flag_set_repeated_parameter(UP(param));
13959 }
13960 pm_parameters_node_posts_append(parser->arena, params, UP(param));
13961 }
13962
13963 break;
13964 }
13965 case PM_TOKEN_LABEL: {
13966 if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
13967 update_parameter_state(parser, &parser->current, &order);
13968
13969 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
13970 parser_lex(parser);
13971
13972 pm_token_t name = parser->previous;
13973 pm_token_t local = name;
13974 local.end -= 1;
13975
13976 if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
13977 pm_parser_err(parser, PM_TOKEN_START(parser, &local), PM_TOKEN_LENGTH(&local), PM_ERR_ARGUMENT_FORMAL_CONSTANT);
13978 } else if (local.end[-1] == '!' || local.end[-1] == '?') {
13979 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
13980 }
13981
13982 bool repeated = pm_parser_parameter_name_check(parser, &local);
13983 pm_parser_local_add_token(parser, &local, 1);
13984
13985 switch (parser->current.type) {
13986 case PM_TOKEN_COMMA:
13987 case PM_TOKEN_PARENTHESIS_RIGHT:
13988 case PM_TOKEN_PIPE: {
13989 context_pop(parser);
13990
13991 pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
13992 if (repeated) {
13993 pm_node_flag_set_repeated_parameter(param);
13994 }
13995
13996 pm_parameters_node_keywords_append(parser->arena, params, param);
13997 break;
13998 }
13999 case PM_TOKEN_SEMICOLON:
14000 case PM_TOKEN_NEWLINE: {
14001 context_pop(parser);
14002
14003 if (uses_parentheses) {
14004 parsing = false;
14005 break;
14006 }
14007
14008 pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14009 if (repeated) {
14010 pm_node_flag_set_repeated_parameter(param);
14011 }
14012
14013 pm_parameters_node_keywords_append(parser->arena, params, param);
14014 break;
14015 }
14016 default: {
14017 pm_node_t *param;
14018
14019 if (token_begins_expression_p(parser->current.type)) {
14020 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14021 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14022
14023 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14024 pm_node_t *value = parse_value_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
14025 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14026
14027 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14028 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &local, PM_ERR_PARAMETER_CIRCULAR);
14029 }
14030
14031 param = UP(pm_optional_keyword_parameter_node_create(parser, &name, value));
14032 }
14033 else {
14034 param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14035 }
14036
14037 if (repeated) {
14038 pm_node_flag_set_repeated_parameter(param);
14039 }
14040
14041 context_pop(parser);
14042 pm_parameters_node_keywords_append(parser->arena, params, param);
14043
14044 // If parsing the value of the parameter resulted in error recovery,
14045 // then we can put a missing node in its place and stop parsing the
14046 // parameters entirely now.
14047 if (parser->recovering) {
14048 parsing = false;
14049 break;
14050 }
14051 }
14052 }
14053
14054 parser->in_keyword_arg = false;
14055 break;
14056 }
14057 case PM_TOKEN_USTAR:
14058 case PM_TOKEN_STAR: {
14059 update_parameter_state(parser, &parser->current, &order);
14060 parser_lex(parser);
14061
14062 pm_token_t operator = parser->previous;
14063 pm_token_t name = { 0 };
14064 bool repeated = false;
14065
14066 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14067 name = parser->previous;
14068 repeated = pm_parser_parameter_name_check(parser, &name);
14069 pm_parser_local_add_token(parser, &name, 1);
14070 } else {
14071 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
14072 }
14073
14074 pm_node_t *param = UP(pm_rest_parameter_node_create(parser, &operator, NTOK2PTR(name)));
14075 if (repeated) {
14076 pm_node_flag_set_repeated_parameter(param);
14077 }
14078
14079 if (params->rest == NULL) {
14080 pm_parameters_node_rest_set(params, param);
14081 } else {
14082 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
14083 pm_parameters_node_posts_append(parser->arena, params, param);
14084 }
14085
14086 break;
14087 }
14088 case PM_TOKEN_STAR_STAR:
14089 case PM_TOKEN_USTAR_STAR: {
14090 pm_parameters_order_t previous_order = order;
14091 update_parameter_state(parser, &parser->current, &order);
14092 parser_lex(parser);
14093
14094 pm_token_t operator = parser->previous;
14095 pm_node_t *param;
14096
14097 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14098 if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14099 pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14100 }
14101
14102 param = UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
14103 } else {
14104 pm_token_t name = { 0 };
14105
14106 bool repeated = false;
14107 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14108 name = parser->previous;
14109 repeated = pm_parser_parameter_name_check(parser, &name);
14110 pm_parser_local_add_token(parser, &name, 1);
14111 } else {
14112 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
14113 }
14114
14115 param = UP(pm_keyword_rest_parameter_node_create(parser, &operator, NTOK2PTR(name)));
14116 if (repeated) {
14117 pm_node_flag_set_repeated_parameter(param);
14118 }
14119 }
14120
14121 if (params->keyword_rest == NULL) {
14122 pm_parameters_node_keyword_rest_set(params, param);
14123 } else {
14124 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
14125 pm_parameters_node_posts_append(parser->arena, params, param);
14126 }
14127
14128 break;
14129 }
14130 default:
14131 if (parser->previous.type == PM_TOKEN_COMMA) {
14132 parse_parameters_handle_trailing_comma(parser, params, order, in_block, allows_trailing_comma);
14133 }
14134
14135 parsing = false;
14136 break;
14137 }
14138
14139 // If we hit some kind of issue while parsing the parameter, this would
14140 // have been set to false. In that case, we need to break out of the
14141 // loop.
14142 if (!parsing) break;
14143
14144 bool accepted_newline = false;
14145 if (uses_parentheses) {
14146 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14147 }
14148
14149 if (accept1(parser, PM_TOKEN_COMMA)) {
14150 // If there was a comma, but we also accepted a newline, then this
14151 // is a syntax error.
14152 if (accepted_newline) {
14153 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14154 }
14155 } else {
14156 // If there was no comma, then we're done parsing parameters.
14157 break;
14158 }
14159 }
14160
14161 pm_do_loop_stack_pop(parser);
14162
14163 // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
14164 if (PM_NODE_START(params) == PM_NODE_END(params)) {
14165 return NULL;
14166 }
14167
14168 return params;
14169}
14170
14175static size_t
14176token_newline_index(const pm_parser_t *parser) {
14177 if (parser->heredoc_end == NULL) {
14178 // This is the common case. In this case we can look at the previously
14179 // recorded newline in the newline list and subtract from the current
14180 // offset.
14181 return parser->line_offsets.size - 1;
14182 } else {
14183 // This is unlikely. This is the case that we have already parsed the
14184 // start of a heredoc, so we cannot rely on looking at the previous
14185 // offset of the newline list, and instead must go through the whole
14186 // process of a binary search for the line number.
14187 return (size_t) pm_line_offset_list_line(&parser->line_offsets, PM_TOKEN_START(parser, &parser->current), 0);
14188 }
14189}
14190
14195static int64_t
14196token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
14197 const uint8_t *cursor = parser->start + parser->line_offsets.offsets[newline_index];
14198 const uint8_t *end = token->start;
14199
14200 // Skip over the BOM if it is present.
14201 if (
14202 newline_index == 0 &&
14203 parser->start[0] == 0xef &&
14204 parser->start[1] == 0xbb &&
14205 parser->start[2] == 0xbf
14206 ) cursor += 3;
14207
14208 int64_t column = 0;
14209 for (; cursor < end; cursor++) {
14210 switch (*cursor) {
14211 case '\t':
14212 column = ((column / PM_TAB_WHITESPACE_SIZE) + 1) * PM_TAB_WHITESPACE_SIZE;
14213 break;
14214 case ' ':
14215 column++;
14216 break;
14217 default:
14218 column++;
14219 if (break_on_non_space) return -1;
14220 break;
14221 }
14222 }
14223
14224 return column;
14225}
14226
14231static void
14232parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening_token, bool if_after_else, bool allow_indent) {
14233 // If these warnings are disabled (unlikely), then we can just return.
14234 if (!parser->warn_mismatched_indentation) return;
14235
14236 // If the tokens are on the same line, we do not warn.
14237 size_t closing_newline_index = token_newline_index(parser);
14238 if (opening_newline_index == closing_newline_index) return;
14239
14240 // If the opening token has anything other than spaces or tabs before it,
14241 // then we do not warn. This is unless we are matching up an `if`/`end` pair
14242 // and the `if` immediately follows an `else` keyword.
14243 int64_t opening_column = token_column(parser, opening_newline_index, opening_token, !if_after_else);
14244 if (!if_after_else && (opening_column == -1)) return;
14245
14246 // Get a reference to the closing token off the current parser. This assumes
14247 // that the caller has placed this in the correct position.
14248 pm_token_t *closing_token = &parser->current;
14249
14250 // If the tokens are at the same indentation, we do not warn.
14251 int64_t closing_column = token_column(parser, closing_newline_index, closing_token, true);
14252 if ((closing_column == -1) || (opening_column == closing_column)) return;
14253
14254 // If the closing column is greater than the opening column and we are
14255 // allowing indentation, then we do not warn.
14256 if (allow_indent && (closing_column > opening_column)) return;
14257
14258 // Otherwise, add a warning.
14259 PM_PARSER_WARN_FORMAT(
14260 parser,
14261 PM_TOKEN_START(parser, closing_token),
14262 PM_TOKEN_LENGTH(closing_token),
14263 PM_WARN_INDENTATION_MISMATCH,
14264 (int) (closing_token->end - closing_token->start),
14265 (const char *) closing_token->start,
14266 (int) (opening_token->end - opening_token->start),
14267 (const char *) opening_token->start,
14268 ((int32_t) opening_newline_index) + parser->start_line
14269 );
14270}
14271
14272typedef enum {
14273 PM_RESCUES_BEGIN = 1,
14274 PM_RESCUES_BLOCK,
14275 PM_RESCUES_CLASS,
14276 PM_RESCUES_DEF,
14277 PM_RESCUES_LAMBDA,
14278 PM_RESCUES_MODULE,
14279 PM_RESCUES_SCLASS
14280} pm_rescues_type_t;
14281
14286static inline void
14287parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
14288 pm_rescue_node_t *current = NULL;
14289
14290 while (match1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
14291 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14292 parser_lex(parser);
14293
14294 pm_rescue_node_t *rescue = pm_rescue_node_create(parser, &parser->previous);
14295
14296 switch (parser->current.type) {
14297 case PM_TOKEN_EQUAL_GREATER: {
14298 // Here we have an immediate => after the rescue keyword, in which case
14299 // we're going to have an empty list of exceptions to rescue (which
14300 // implies StandardError).
14301 parser_lex(parser);
14302 pm_rescue_node_operator_set(parser, rescue, &parser->previous);
14303
14304 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
14305 reference = parse_target(parser, reference, false, false);
14306
14307 pm_rescue_node_reference_set(rescue, reference);
14308 break;
14309 }
14310 case PM_TOKEN_NEWLINE:
14311 case PM_TOKEN_SEMICOLON:
14312 case PM_TOKEN_KEYWORD_THEN:
14313 // Here we have a terminator for the rescue keyword, in which
14314 // case we're going to just continue on.
14315 break;
14316 default: {
14317 if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
14318 // Here we have something that could be an exception expression, so
14319 // we'll attempt to parse it here and any others delimited by commas.
14320
14321 do {
14322 pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
14323 pm_rescue_node_exceptions_append(parser->arena, rescue, expression);
14324
14325 // If we hit a newline, then this is the end of the rescue expression. We
14326 // can continue on to parse the statements.
14327 if (match3(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_THEN)) break;
14328
14329 // If we hit a `=>` then we're going to parse the exception variable. Once
14330 // we've done that, we'll break out of the loop and parse the statements.
14331 if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
14332 pm_rescue_node_operator_set(parser, rescue, &parser->previous);
14333
14334 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
14335 reference = parse_target(parser, reference, false, false);
14336
14337 pm_rescue_node_reference_set(rescue, reference);
14338 break;
14339 }
14340 } while (accept1(parser, PM_TOKEN_COMMA));
14341 }
14342 }
14343 }
14344
14345 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
14346 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
14347 rescue->then_keyword_loc = TOK2LOC(parser, &parser->previous);
14348 }
14349 } else {
14350 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
14351 rescue->then_keyword_loc = TOK2LOC(parser, &parser->previous);
14352 }
14353
14354 if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
14355 pm_accepts_block_stack_push(parser, true);
14356 pm_context_t context;
14357
14358 switch (type) {
14359 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_RESCUE; break;
14360 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_RESCUE; break;
14361 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_RESCUE; break;
14362 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_RESCUE; break;
14363 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_RESCUE; break;
14364 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_RESCUE; break;
14365 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_RESCUE; break;
14366 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
14367 }
14368
14369 pm_statements_node_t *statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14370 if (statements != NULL) pm_rescue_node_statements_set(rescue, statements);
14371
14372 pm_accepts_block_stack_pop(parser);
14373 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14374 }
14375
14376 if (current == NULL) {
14377 pm_begin_node_rescue_clause_set(parent_node, rescue);
14378 } else {
14379 pm_rescue_node_subsequent_set(current, rescue);
14380 }
14381
14382 current = rescue;
14383 }
14384
14385 // The end node locations on rescue nodes will not be set correctly
14386 // since we won't know the end until we've found all subsequent
14387 // clauses. This sets the end location on all rescues once we know it.
14388 if (current != NULL) {
14389 pm_rescue_node_t *clause = parent_node->rescue_clause;
14390
14391 while (clause != NULL) {
14392 PM_NODE_LENGTH_SET_NODE(clause, current);
14393 clause = clause->subsequent;
14394 }
14395 }
14396
14397 pm_token_t else_keyword;
14398 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
14399 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14400 opening_newline_index = token_newline_index(parser);
14401
14402 else_keyword = parser->current;
14403 opening = &else_keyword;
14404
14405 parser_lex(parser);
14406 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14407
14408 pm_statements_node_t *else_statements = NULL;
14409 if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
14410 pm_accepts_block_stack_push(parser, true);
14411 pm_context_t context;
14412
14413 switch (type) {
14414 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ELSE; break;
14415 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ELSE; break;
14416 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ELSE; break;
14417 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ELSE; break;
14418 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
14419 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
14420 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
14421 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
14422 }
14423
14424 else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14425 pm_accepts_block_stack_pop(parser);
14426
14427 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14428 }
14429
14430 pm_else_node_t *else_clause = pm_else_node_create(parser, &else_keyword, else_statements, &parser->current);
14431 pm_begin_node_else_clause_set(parent_node, else_clause);
14432
14433 // If we don't have a `current` rescue node, then this is a dangling
14434 // else, and it's an error.
14435 if (current == NULL) pm_parser_err_node(parser, UP(else_clause), PM_ERR_BEGIN_LONELY_ELSE);
14436 }
14437
14438 if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
14439 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14440 pm_token_t ensure_keyword = parser->current;
14441
14442 parser_lex(parser);
14443 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14444
14445 pm_statements_node_t *ensure_statements = NULL;
14446 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
14447 pm_accepts_block_stack_push(parser, true);
14448 pm_context_t context;
14449
14450 switch (type) {
14451 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ENSURE; break;
14452 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ENSURE; break;
14453 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ENSURE; break;
14454 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ENSURE; break;
14455 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ENSURE; break;
14456 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ENSURE; break;
14457 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ENSURE; break;
14458 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
14459 }
14460
14461 ensure_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14462 pm_accepts_block_stack_pop(parser);
14463
14464 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14465 }
14466
14467 pm_ensure_node_t *ensure_clause = pm_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->current);
14468 pm_begin_node_ensure_clause_set(parent_node, ensure_clause);
14469 }
14470
14471 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
14472 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14473 pm_begin_node_end_keyword_set(parser, parent_node, &parser->current);
14474 } else {
14475 pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_KEYWORD_END, .start = parser->previous.end, .end = parser->previous.end };
14476 pm_begin_node_end_keyword_set(parser, parent_node, &end_keyword);
14477 }
14478}
14479
14484static pm_begin_node_t *
14485parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
14486 pm_begin_node_t *node = pm_begin_node_create(parser, NULL, statements);
14487 parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
14488
14489 node->base.location.start = U32(start - parser->start);
14490 PM_NODE_LENGTH_SET_TOKEN(parser, node, &parser->current);
14491
14492 return node;
14493}
14494
14499parse_block_parameters(
14500 pm_parser_t *parser,
14501 bool allows_trailing_comma,
14502 const pm_token_t *opening,
14503 bool is_lambda_literal,
14504 bool accepts_blocks_in_defaults,
14505 uint16_t depth
14506) {
14507 pm_parameters_node_t *parameters = NULL;
14508 if (!match1(parser, PM_TOKEN_SEMICOLON)) {
14509 if (!is_lambda_literal) {
14510 context_push(parser, PM_CONTEXT_BLOCK_PARAMETERS);
14511 }
14512 parameters = parse_parameters(
14513 parser,
14514 is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
14515 false,
14516 allows_trailing_comma,
14517 false,
14518 accepts_blocks_in_defaults,
14519 true,
14520 is_lambda_literal ? PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_LAMBDA : PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_BLOCK,
14521 (uint16_t) (depth + 1)
14522 );
14523 if (!is_lambda_literal) {
14524 context_pop(parser);
14525 }
14526 }
14527
14528 pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
14529 if (opening != NULL) {
14530 accept1(parser, PM_TOKEN_NEWLINE);
14531
14532 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
14533 do {
14534 switch (parser->current.type) {
14535 case PM_TOKEN_CONSTANT:
14536 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14537 parser_lex(parser);
14538 break;
14539 case PM_TOKEN_INSTANCE_VARIABLE:
14540 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14541 parser_lex(parser);
14542 break;
14543 case PM_TOKEN_GLOBAL_VARIABLE:
14544 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14545 parser_lex(parser);
14546 break;
14547 case PM_TOKEN_CLASS_VARIABLE:
14548 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14549 parser_lex(parser);
14550 break;
14551 default:
14552 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
14553 break;
14554 }
14555
14556 bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
14557 pm_parser_local_add_token(parser, &parser->previous, 1);
14558
14559 pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
14560 if (repeated) pm_node_flag_set_repeated_parameter(UP(local));
14561
14562 pm_block_parameters_node_append_local(parser->arena, block_parameters, local);
14563 } while (accept1(parser, PM_TOKEN_COMMA));
14564 }
14565 }
14566
14567 return block_parameters;
14568}
14569
14574static bool
14575outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
14576 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
14577 if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
14578 }
14579
14580 return false;
14581}
14582
14588static const char * const pm_numbered_parameter_names[] = {
14589 "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
14590};
14591
14597static pm_node_t *
14598parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
14599 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
14600
14601 // If we have ordinary parameters, then we will return them as the set of
14602 // parameters.
14603 if (parameters != NULL) {
14604 // If we also have implicit parameters, then this is an error.
14605 if (implicit_parameters->size > 0) {
14606 pm_node_t *node = implicit_parameters->nodes[0];
14607
14608 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14609 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
14610 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14611 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
14612 } else {
14613 assert(false && "unreachable");
14614 }
14615 }
14616
14617 return parameters;
14618 }
14619
14620 // If we don't have any implicit parameters, then the set of parameters is
14621 // NULL.
14622 if (implicit_parameters->size == 0) {
14623 return NULL;
14624 }
14625
14626 // If we don't have ordinary parameters, then we now must validate our set
14627 // of implicit parameters. We can only have numbered parameters or it, but
14628 // they cannot be mixed.
14629 uint8_t numbered_parameter = 0;
14630 bool it_parameter = false;
14631
14632 for (size_t index = 0; index < implicit_parameters->size; index++) {
14633 pm_node_t *node = implicit_parameters->nodes[index];
14634
14635 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14636 if (it_parameter) {
14637 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
14638 } else if (outer_scope_using_numbered_parameters_p(parser)) {
14639 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
14640 } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
14641 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
14642 } else if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
14643 numbered_parameter = MAX(numbered_parameter, (uint8_t) (parser->start[node->location.start + 1] - '0'));
14644 } else {
14645 assert(false && "unreachable");
14646 }
14647 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14648 if (numbered_parameter > 0) {
14649 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
14650 } else {
14651 it_parameter = true;
14652 }
14653 }
14654 }
14655
14656 if (numbered_parameter > 0) {
14657 // Go through the parent scopes and mark them as being disallowed from
14658 // using numbered parameters because this inner scope is using them.
14659 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
14660 scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
14661 }
14662 return UP(pm_numbered_parameters_node_create(parser, opening, closing, numbered_parameter));
14663 }
14664
14665 if (it_parameter) {
14666 return UP(pm_it_parameters_node_create(parser, opening, closing));
14667 }
14668
14669 return NULL;
14670}
14671
14675static pm_block_node_t *
14676parse_block(pm_parser_t *parser, uint16_t depth) {
14677 pm_token_t opening = parser->previous;
14678 accept1(parser, PM_TOKEN_NEWLINE);
14679
14680 pm_accepts_block_stack_push(parser, true);
14681 pm_parser_scope_push(parser, false);
14682
14683 pm_block_parameters_node_t *block_parameters = NULL;
14684
14685 if (accept1(parser, PM_TOKEN_PIPE)) {
14686 pm_token_t block_parameters_opening = parser->previous;
14687 if (match1(parser, PM_TOKEN_PIPE)) {
14688 block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
14689 parser->command_start = true;
14690 parser_lex(parser);
14691 } else {
14692 block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false, true, (uint16_t) (depth + 1));
14693 accept1(parser, PM_TOKEN_NEWLINE);
14694 parser->command_start = true;
14695 expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
14696 }
14697
14698 pm_block_parameters_node_closing_set(parser, block_parameters, &parser->previous);
14699 }
14700
14701 accept1(parser, PM_TOKEN_NEWLINE);
14702 pm_node_t *statements = NULL;
14703
14704 if (opening.type == PM_TOKEN_BRACE_LEFT) {
14705 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
14706 statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1)));
14707 }
14708
14709 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE, &opening);
14710 } else {
14711 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
14712 if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE)) {
14713 pm_accepts_block_stack_push(parser, true);
14714 statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1)));
14715 pm_accepts_block_stack_pop(parser);
14716 }
14717
14718 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
14719 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
14720 statements = UP(parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1)));
14721 }
14722 }
14723
14724 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END, &opening);
14725 }
14726
14727 pm_constant_id_list_t locals;
14728 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
14729 pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &opening, &parser->previous);
14730
14731 pm_parser_scope_pop(parser);
14732 pm_accepts_block_stack_pop(parser);
14733
14734 return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
14735}
14736
14742static bool
14743parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, uint8_t flags, uint16_t depth) {
14744 bool found = false;
14745 bool parsed_command_args = false;
14746
14747 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
14748 found |= true;
14749 arguments->opening_loc = TOK2LOC(parser, &parser->previous);
14750
14751 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14752 arguments->closing_loc = TOK2LOC(parser, &parser->previous);
14753 } else {
14754 pm_accepts_block_stack_push(parser, true);
14755 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint8_t) (flags & ~PM_PARSE_ACCEPTS_DO_BLOCK), (uint16_t) (depth + 1));
14756
14757 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14758 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
14759 parser->previous.start = parser->previous.end;
14760 parser->previous.type = 0;
14761 }
14762
14763 pm_accepts_block_stack_pop(parser);
14764 arguments->closing_loc = TOK2LOC(parser, &parser->previous);
14765 }
14766 } else if ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
14767 found |= true;
14768 parsed_command_args = true;
14769 pm_accepts_block_stack_push(parser, false);
14770
14771 // If we get here, then the subsequent token cannot be used as an infix
14772 // operator. In this case we assume the subsequent token is part of an
14773 // argument to this method call.
14774 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, flags, (uint16_t) (depth + 1));
14775
14776 // If we have done with the arguments and still not consumed the comma,
14777 // then we have a trailing comma where we need to check whether it is
14778 // allowed or not.
14779 if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
14780 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type));
14781 }
14782
14783 pm_accepts_block_stack_pop(parser);
14784 }
14785
14786 // If we're at the end of the arguments, we can now check if there is a block
14787 // node that starts with a {. If there is, then we can parse it and add it to
14788 // the arguments.
14789 if (accepts_block) {
14790 pm_block_node_t *block = NULL;
14791
14792 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
14793 found |= true;
14794 block = parse_block(parser, (uint16_t) (depth + 1));
14795 pm_arguments_validate_block(parser, arguments, block);
14796 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
14797 found |= true;
14798 block = parse_block(parser, (uint16_t) (depth + 1));
14799 } else if (parsed_command_args && pm_accepts_block_stack_p(parser) && (flags & PM_PARSE_ACCEPTS_DO_BLOCK) && accept1(parser, PM_TOKEN_KEYWORD_DO_BLOCK)) {
14800 found |= true;
14801 block = parse_block(parser, (uint16_t) (depth + 1));
14802 }
14803
14804 if (block != NULL) {
14805 if (arguments->block == NULL && !arguments->has_forwarding) {
14806 arguments->block = UP(block);
14807 } else {
14808 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_BLOCK_MULTI);
14809
14810 if (arguments->block != NULL) {
14811 if (arguments->arguments == NULL) {
14812 arguments->arguments = pm_arguments_node_create(parser);
14813 }
14814 pm_arguments_node_arguments_append(parser->arena, arguments->arguments, arguments->block);
14815 }
14816 arguments->block = UP(block);
14817 }
14818 }
14819 }
14820
14821 return found;
14822}
14823
14828static void
14829parse_return(pm_parser_t *parser, pm_node_t *node) {
14830 bool in_sclass = false;
14831 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
14832 switch (context_node->context) {
14836 case PM_CONTEXT_BEGIN:
14837 case PM_CONTEXT_CASE_IN:
14840 case PM_CONTEXT_DEFINED:
14841 case PM_CONTEXT_ELSE:
14842 case PM_CONTEXT_ELSIF:
14843 case PM_CONTEXT_EMBEXPR:
14845 case PM_CONTEXT_FOR:
14846 case PM_CONTEXT_IF:
14848 case PM_CONTEXT_MAIN:
14850 case PM_CONTEXT_PARENS:
14851 case PM_CONTEXT_POSTEXE:
14853 case PM_CONTEXT_PREEXE:
14855 case PM_CONTEXT_TERNARY:
14856 case PM_CONTEXT_UNLESS:
14857 case PM_CONTEXT_UNTIL:
14858 case PM_CONTEXT_WHILE:
14859 // Keep iterating up the lists of contexts, because returns can
14860 // see through these.
14861 continue;
14865 case PM_CONTEXT_SCLASS:
14866 in_sclass = true;
14867 continue;
14871 case PM_CONTEXT_CLASS:
14875 case PM_CONTEXT_MODULE:
14876 // These contexts are invalid for a return.
14877 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
14878 return;
14889 case PM_CONTEXT_DEF:
14895 // These contexts are valid for a return, and we should not
14896 // continue to loop.
14897 return;
14898 case PM_CONTEXT_NONE:
14899 // This case should never happen.
14900 assert(false && "unreachable");
14901 break;
14902 }
14903 }
14904 if (in_sclass && parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
14905 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
14906 }
14907}
14908
14913static void
14914parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
14915 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
14916 switch (context_node->context) {
14923 case PM_CONTEXT_DEFINED:
14924 case PM_CONTEXT_FOR:
14931 case PM_CONTEXT_POSTEXE:
14932 case PM_CONTEXT_UNTIL:
14933 case PM_CONTEXT_WHILE:
14934 // These are the good cases. We're allowed to have a block exit
14935 // in these contexts.
14936 return;
14937 case PM_CONTEXT_DEF:
14942 case PM_CONTEXT_MAIN:
14943 case PM_CONTEXT_PREEXE:
14944 case PM_CONTEXT_SCLASS:
14948 // These are the bad cases. We're not allowed to have a block
14949 // exit in these contexts.
14950 //
14951 // If we get here, then we're about to mark this block exit
14952 // as invalid. However, it could later _become_ valid if we
14953 // find a trailing while/until on the expression. In this
14954 // case instead of adding the error here, we'll add the
14955 // block exit to the list of exits for the expression, and
14956 // the node parsing will handle validating it instead.
14957 assert(parser->current_block_exits != NULL);
14958 pm_node_list_append(parser->arena, parser->current_block_exits, node);
14959 return;
14963 case PM_CONTEXT_BEGIN:
14964 case PM_CONTEXT_CASE_IN:
14969 case PM_CONTEXT_CLASS:
14971 case PM_CONTEXT_ELSE:
14972 case PM_CONTEXT_ELSIF:
14973 case PM_CONTEXT_EMBEXPR:
14975 case PM_CONTEXT_IF:
14979 case PM_CONTEXT_MODULE:
14981 case PM_CONTEXT_PARENS:
14984 case PM_CONTEXT_TERNARY:
14985 case PM_CONTEXT_UNLESS:
14986 // In these contexts we should continue walking up the list of
14987 // contexts.
14988 break;
14989 case PM_CONTEXT_NONE:
14990 // This case should never happen.
14991 assert(false && "unreachable");
14992 break;
14993 }
14994 }
14995}
14996
15001static pm_node_list_t *
15002push_block_exits(pm_parser_t *parser, pm_node_list_t *current_block_exits) {
15003 pm_node_list_t *previous_block_exits = parser->current_block_exits;
15004 parser->current_block_exits = current_block_exits;
15005 return previous_block_exits;
15006}
15007
15013static void
15014flush_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15015 pm_node_t *block_exit;
15016 PM_NODE_LIST_FOREACH(parser->current_block_exits, index, block_exit) {
15017 const char *type;
15018
15019 switch (PM_NODE_TYPE(block_exit)) {
15020 case PM_BREAK_NODE: type = "break"; break;
15021 case PM_NEXT_NODE: type = "next"; break;
15022 case PM_REDO_NODE: type = "redo"; break;
15023 default: assert(false && "unreachable"); type = ""; break;
15024 }
15025
15026 PM_PARSER_ERR_NODE_FORMAT(parser, block_exit, PM_ERR_INVALID_BLOCK_EXIT, type);
15027 }
15028
15029 parser->current_block_exits = previous_block_exits;
15030}
15031
15036static void
15037pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15038 if (match2(parser, PM_TOKEN_KEYWORD_WHILE_MODIFIER, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) {
15039 // If we matched a trailing while/until, then all of the block exits in
15040 // the contained list are valid. In this case we do not need to do
15041 // anything.
15042 parser->current_block_exits = previous_block_exits;
15043 } else if (previous_block_exits != NULL) {
15044 // If we did not matching a trailing while/until, then all of the block
15045 // exits contained in the list are invalid for this specific context.
15046 // However, they could still become valid in a higher level context if
15047 // there is another list above this one. In this case we'll push all of
15048 // the block exits up to the previous list.
15049 pm_node_list_concat(parser->arena, previous_block_exits, parser->current_block_exits);
15050 parser->current_block_exits = previous_block_exits;
15051 } else {
15052 // If we did not match a trailing while/until and this was the last
15053 // chance to do so, then all of the block exits in the list are invalid
15054 // and we need to add an error for each of them.
15055 flush_block_exits(parser, previous_block_exits);
15056 }
15057}
15058
15059static inline pm_node_t *
15060parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
15061 context_push(parser, PM_CONTEXT_PREDICATE);
15062 pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
15063 pm_node_t *predicate = parse_value_expression(parser, binding_power, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, error_id, (uint16_t) (depth + 1));
15064
15065 // Predicates are closed by a term, a "then", or a term and then a "then".
15066 bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15067
15068 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15069 predicate_closed = true;
15070 *then_keyword = parser->previous;
15071 }
15072
15073 if (!predicate_closed) {
15074 pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
15075 }
15076
15077 context_pop(parser);
15078 return predicate;
15079}
15080
15081static inline pm_node_t *
15082parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
15083 pm_node_list_t current_block_exits = { 0 };
15084 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
15085
15086 pm_token_t keyword = parser->previous;
15087 pm_token_t then_keyword = { 0 };
15088
15089 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
15090 pm_statements_node_t *statements = NULL;
15091
15092 if (!match3(parser, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
15093 pm_accepts_block_stack_push(parser, true);
15094 statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15095 pm_accepts_block_stack_pop(parser);
15096 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15097 }
15098
15099 pm_node_t *parent = NULL;
15100
15101 switch (context) {
15102 case PM_CONTEXT_IF:
15103 parent = UP(pm_if_node_create(parser, &keyword, predicate, NTOK2PTR(then_keyword), statements, NULL, NULL));
15104 break;
15105 case PM_CONTEXT_UNLESS:
15106 parent = UP(pm_unless_node_create(parser, &keyword, predicate, NTOK2PTR(then_keyword), statements));
15107 break;
15108 default:
15109 assert(false && "unreachable");
15110 break;
15111 }
15112
15113 pm_node_t *current = parent;
15114
15115 // Parse any number of elsif clauses. This will form a linked list of if
15116 // nodes pointing to each other from the top.
15117 if (context == PM_CONTEXT_IF) {
15118 while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
15119 if (parser_end_of_line_p(parser)) {
15120 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_KEYWORD_EOL);
15121 }
15122
15123 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15124 pm_token_t elsif_keyword = parser->current;
15125 parser_lex(parser);
15126
15127 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
15128 pm_accepts_block_stack_push(parser, true);
15129
15130 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF, (uint16_t) (depth + 1));
15131 pm_accepts_block_stack_pop(parser);
15132 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15133
15134 pm_node_t *elsif = UP(pm_if_node_create(parser, &elsif_keyword, predicate, NTOK2PTR(then_keyword), statements, NULL, NULL));
15135 ((pm_if_node_t *) current)->subsequent = elsif;
15136 current = elsif;
15137 }
15138 }
15139
15140 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15141 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15142 opening_newline_index = token_newline_index(parser);
15143
15144 parser_lex(parser);
15145 pm_token_t else_keyword = parser->previous;
15146
15147 pm_accepts_block_stack_push(parser, true);
15148 pm_statements_node_t *else_statements = parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1));
15149 pm_accepts_block_stack_pop(parser);
15150
15151 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15152 parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
15153 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE, &keyword);
15154
15155 pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
15156
15157 switch (context) {
15158 case PM_CONTEXT_IF:
15159 ((pm_if_node_t *) current)->subsequent = UP(else_node);
15160 break;
15161 case PM_CONTEXT_UNLESS:
15162 ((pm_unless_node_t *) parent)->else_clause = else_node;
15163 break;
15164 default:
15165 assert(false && "unreachable");
15166 break;
15167 }
15168 } else {
15169 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
15170 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM, &keyword);
15171 }
15172
15173 // Set the appropriate end location for all of the nodes in the subtree.
15174 switch (context) {
15175 case PM_CONTEXT_IF: {
15176 pm_node_t *current = parent;
15177 bool recursing = true;
15178
15179 while (recursing) {
15180 switch (PM_NODE_TYPE(current)) {
15181 case PM_IF_NODE:
15182 pm_if_node_end_keyword_loc_set(parser, (pm_if_node_t *) current, &parser->previous);
15183 current = ((pm_if_node_t *) current)->subsequent;
15184 recursing = current != NULL;
15185 break;
15186 case PM_ELSE_NODE:
15187 pm_else_node_end_keyword_loc_set(parser, (pm_else_node_t *) current, &parser->previous);
15188 recursing = false;
15189 break;
15190 default: {
15191 recursing = false;
15192 break;
15193 }
15194 }
15195 }
15196 break;
15197 }
15198 case PM_CONTEXT_UNLESS:
15199 pm_unless_node_end_keyword_loc_set(parser, (pm_unless_node_t *) parent, &parser->previous);
15200 break;
15201 default:
15202 assert(false && "unreachable");
15203 break;
15204 }
15205
15206 pop_block_exits(parser, previous_block_exits);
15207 return parent;
15208}
15209
15214#define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15215 case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
15216 case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
15217 case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_BLOCK: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
15218 case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
15219 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
15220 case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
15221 case PM_TOKEN_KEYWORD_OR: case PM_TOKEN_KEYWORD_REDO: case PM_TOKEN_KEYWORD_RESCUE: case PM_TOKEN_KEYWORD_RETRY: \
15222 case PM_TOKEN_KEYWORD_RETURN: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_SUPER: case PM_TOKEN_KEYWORD_THEN: \
15223 case PM_TOKEN_KEYWORD_TRUE: case PM_TOKEN_KEYWORD_UNDEF: case PM_TOKEN_KEYWORD_UNLESS: case PM_TOKEN_KEYWORD_UNTIL: \
15224 case PM_TOKEN_KEYWORD_WHEN: case PM_TOKEN_KEYWORD_WHILE: case PM_TOKEN_KEYWORD_YIELD
15225
15230#define PM_CASE_OPERATOR PM_TOKEN_AMPERSAND: case PM_TOKEN_BACKTICK: case PM_TOKEN_BANG_EQUAL: \
15231 case PM_TOKEN_BANG_TILDE: case PM_TOKEN_BANG: case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \
15232 case PM_TOKEN_BRACKET_LEFT_RIGHT: case PM_TOKEN_CARET: case PM_TOKEN_EQUAL_EQUAL_EQUAL: case PM_TOKEN_EQUAL_EQUAL: \
15233 case PM_TOKEN_EQUAL_TILDE: case PM_TOKEN_GREATER_EQUAL: case PM_TOKEN_GREATER_GREATER: case PM_TOKEN_GREATER: \
15234 case PM_TOKEN_LESS_EQUAL_GREATER: case PM_TOKEN_LESS_EQUAL: case PM_TOKEN_LESS_LESS: case PM_TOKEN_LESS: \
15235 case PM_TOKEN_MINUS: case PM_TOKEN_PERCENT: case PM_TOKEN_PIPE: case PM_TOKEN_PLUS: case PM_TOKEN_SLASH: \
15236 case PM_TOKEN_STAR_STAR: case PM_TOKEN_STAR: case PM_TOKEN_TILDE: case PM_TOKEN_UAMPERSAND: case PM_TOKEN_UMINUS: \
15237 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_UPLUS: case PM_TOKEN_USTAR: case PM_TOKEN_USTAR_STAR
15238
15244#define PM_CASE_PRIMITIVE PM_TOKEN_INTEGER: case PM_TOKEN_INTEGER_IMAGINARY: case PM_TOKEN_INTEGER_RATIONAL: \
15245 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: case PM_TOKEN_FLOAT: case PM_TOKEN_FLOAT_IMAGINARY: \
15246 case PM_TOKEN_FLOAT_RATIONAL: case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: case PM_TOKEN_SYMBOL_BEGIN: \
15247 case PM_TOKEN_REGEXP_BEGIN: case PM_TOKEN_BACKTICK: case PM_TOKEN_PERCENT_LOWER_X: case PM_TOKEN_PERCENT_LOWER_I: \
15248 case PM_TOKEN_PERCENT_LOWER_W: case PM_TOKEN_PERCENT_UPPER_I: case PM_TOKEN_PERCENT_UPPER_W: \
15249 case PM_TOKEN_STRING_BEGIN: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_TRUE: \
15250 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15251 case PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_MINUS_GREATER: case PM_TOKEN_HEREDOC_START: \
15252 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_CHARACTER_LITERAL
15253
15258#define PM_CASE_PARAMETER PM_TOKEN_UAMPERSAND: case PM_TOKEN_AMPERSAND: case PM_TOKEN_UDOT_DOT_DOT: \
15259 case PM_TOKEN_IDENTIFIER: case PM_TOKEN_LABEL: case PM_TOKEN_USTAR: case PM_TOKEN_STAR: case PM_TOKEN_STAR_STAR: \
15260 case PM_TOKEN_USTAR_STAR: case PM_TOKEN_CONSTANT: case PM_TOKEN_INSTANCE_VARIABLE: case PM_TOKEN_GLOBAL_VARIABLE: \
15261 case PM_TOKEN_CLASS_VARIABLE
15262
15267#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
15268 case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
15269 case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
15270 case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
15271
15272// Assert here that the flags are the same so that we can safely switch the type
15273// of the node without having to move the flags.
15274PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING), "Expected the flags to match.");
15275
15280static inline pm_node_flags_t
15281parse_unescaped_encoding(const pm_parser_t *parser) {
15282 if (parser->explicit_encoding != NULL) {
15284 // If the there's an explicit encoding and it's using a UTF-8 escape
15285 // sequence, then mark the string as UTF-8.
15286 return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
15287 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
15288 // If there's a non-UTF-8 escape sequence being used, then the
15289 // string uses the source encoding, unless the source is marked as
15290 // US-ASCII. In that case the string is forced as ASCII-8BIT in
15291 // order to keep the string valid.
15292 return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
15293 }
15294 }
15295 return 0;
15296}
15297
15302static pm_node_t *
15303parse_string_part(pm_parser_t *parser, uint16_t depth) {
15304 switch (parser->current.type) {
15305 // Here the lexer has returned to us plain string content. In this case
15306 // we'll create a string node that has no opening or closing and return that
15307 // as the part. These kinds of parts look like:
15308 //
15309 // "aaa #{bbb} #@ccc ddd"
15310 // ^^^^ ^ ^^^^
15311 case PM_TOKEN_STRING_CONTENT: {
15312 pm_node_t *node = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
15313 pm_node_flag_set(node, parse_unescaped_encoding(parser));
15314
15315 parser_lex(parser);
15316 return node;
15317 }
15318 // Here the lexer has returned the beginning of an embedded expression. In
15319 // that case we'll parse the inner statements and return that as the part.
15320 // These kinds of parts look like:
15321 //
15322 // "aaa #{bbb} #@ccc ddd"
15323 // ^^^^^^
15324 case PM_TOKEN_EMBEXPR_BEGIN: {
15325 // Ruby disallows seeing encoding around interpolation in strings,
15326 // even though it is known at parse time.
15327 parser->explicit_encoding = NULL;
15328
15329 pm_lex_state_t state = parser->lex_state;
15330 int brace_nesting = parser->brace_nesting;
15331
15332 parser->brace_nesting = 0;
15333 lex_state_set(parser, PM_LEX_STATE_BEG);
15334 parser_lex(parser);
15335
15336 pm_token_t opening = parser->previous;
15337 pm_statements_node_t *statements = NULL;
15338
15339 if (!match3(parser, PM_TOKEN_EMBEXPR_END, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
15340 pm_accepts_block_stack_push(parser, true);
15341 statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
15342 pm_accepts_block_stack_pop(parser);
15343 }
15344
15345 parser->brace_nesting = brace_nesting;
15346 lex_state_set(parser, state);
15347 expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
15348
15349 // If this set of embedded statements only contains a single
15350 // statement, then Ruby does not consider it as a possible statement
15351 // that could emit a line event.
15352 if (statements != NULL && statements->body.size == 1) {
15353 pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
15354 }
15355
15356 return UP(pm_embedded_statements_node_create(parser, &opening, statements, &parser->previous));
15357 }
15358
15359 // Here the lexer has returned the beginning of an embedded variable.
15360 // In that case we'll parse the variable and create an appropriate node
15361 // for it and then return that node. These kinds of parts look like:
15362 //
15363 // "aaa #{bbb} #@ccc ddd"
15364 // ^^^^^
15365 case PM_TOKEN_EMBVAR: {
15366 // Ruby disallows seeing encoding around interpolation in strings,
15367 // even though it is known at parse time.
15368 parser->explicit_encoding = NULL;
15369
15370 lex_state_set(parser, PM_LEX_STATE_BEG);
15371 parser_lex(parser);
15372
15373 pm_token_t operator = parser->previous;
15374 pm_node_t *variable;
15375
15376 switch (parser->current.type) {
15377 // In this case a back reference is being interpolated. We'll
15378 // create a global variable read node.
15379 case PM_TOKEN_BACK_REFERENCE:
15380 parser_lex(parser);
15381 variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
15382 break;
15383 // In this case an nth reference is being interpolated. We'll
15384 // create a global variable read node.
15385 case PM_TOKEN_NUMBERED_REFERENCE:
15386 parser_lex(parser);
15387 variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
15388 break;
15389 // In this case a global variable is being interpolated. We'll
15390 // create a global variable read node.
15391 case PM_TOKEN_GLOBAL_VARIABLE:
15392 parser_lex(parser);
15393 variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
15394 break;
15395 // In this case an instance variable is being interpolated.
15396 // We'll create an instance variable read node.
15397 case PM_TOKEN_INSTANCE_VARIABLE:
15398 parser_lex(parser);
15399 variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
15400 break;
15401 // In this case a class variable is being interpolated. We'll
15402 // create a class variable read node.
15403 case PM_TOKEN_CLASS_VARIABLE:
15404 parser_lex(parser);
15405 variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
15406 break;
15407 // We can hit here if we got an invalid token. In that case
15408 // we'll not attempt to lex this token and instead just return a
15409 // missing node.
15410 default:
15411 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
15412 variable = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
15413 break;
15414 }
15415
15416 return UP(pm_embedded_variable_node_create(parser, &operator, variable));
15417 }
15418 default:
15419 parser_lex(parser);
15420 pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
15421 return NULL;
15422 }
15423}
15424
15430static const uint8_t *
15431parse_operator_symbol_name(const pm_token_t *name) {
15432 switch (name->type) {
15433 case PM_TOKEN_TILDE:
15434 case PM_TOKEN_BANG:
15435 if (name->end[-1] == '@') return name->end - 1;
15437 default:
15438 return name->end;
15439 }
15440}
15441
15442static pm_node_t *
15443parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
15444 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, NULL);
15445 const uint8_t *end = parse_operator_symbol_name(&parser->current);
15446
15447 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15448 parser_lex(parser);
15449
15450 pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
15451 pm_node_flag_set(UP(symbol), PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
15452
15453 return UP(symbol);
15454}
15455
15461static pm_node_t *
15462parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state, uint16_t depth) {
15463 const pm_token_t opening = parser->previous;
15464
15465 if (lex_mode->mode != PM_LEX_STRING) {
15466 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15467
15468 switch (parser->current.type) {
15469 case PM_CASE_OPERATOR:
15470 return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
15471 case PM_TOKEN_IDENTIFIER:
15472 case PM_TOKEN_CONSTANT:
15473 case PM_TOKEN_INSTANCE_VARIABLE:
15474 case PM_TOKEN_METHOD_NAME:
15475 case PM_TOKEN_CLASS_VARIABLE:
15476 case PM_TOKEN_GLOBAL_VARIABLE:
15477 case PM_TOKEN_NUMBERED_REFERENCE:
15478 case PM_TOKEN_BACK_REFERENCE:
15479 case PM_CASE_KEYWORD:
15480 parser_lex(parser);
15481 break;
15482 default:
15483 expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
15484 break;
15485 }
15486
15487 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, NULL);
15488 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15489 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15490
15491 return UP(symbol);
15492 }
15493
15494 if (lex_mode->as.string.interpolation) {
15495 // If we have the end of the symbol, then we can return an empty symbol.
15496 if (match1(parser, PM_TOKEN_STRING_END)) {
15497 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15498 parser_lex(parser);
15499 pm_token_t content = {
15500 .type = PM_TOKEN_STRING_CONTENT,
15501 .start = parser->previous.start,
15502 .end = parser->previous.start
15503 };
15504
15505 return UP(pm_symbol_node_create(parser, &opening, &content, &parser->previous));
15506 }
15507
15508 // Now we can parse the first part of the symbol.
15509 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
15510
15511 // If we got a string part, then it's possible that we could transform
15512 // what looks like an interpolated symbol into a regular symbol.
15513 if (part && PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15514 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15515 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15516
15517 return UP(pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous));
15518 }
15519
15520 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
15521 if (part) pm_interpolated_symbol_node_append(parser->arena, symbol, part);
15522
15523 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15524 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
15525 pm_interpolated_symbol_node_append(parser->arena, symbol, part);
15526 }
15527 }
15528
15529 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15530 if (match1(parser, PM_TOKEN_EOF)) {
15531 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15532 } else {
15533 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15534 }
15535
15536 pm_interpolated_symbol_node_closing_loc_set(parser, symbol, &parser->previous);
15537 return UP(symbol);
15538 }
15539
15540 pm_token_t content;
15541 pm_string_t unescaped;
15542
15543 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15544 content = parser->current;
15545 unescaped = parser->current_string;
15546 parser_lex(parser);
15547
15548 // If we have two string contents in a row, then the content of this
15549 // symbol is split because of heredoc contents. This looks like:
15550 //
15551 // <<A; :'a
15552 // A
15553 // b'
15554 //
15555 // In this case, the best way we have to represent this is as an
15556 // interpolated string node, so that's what we'll do here.
15557 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15558 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
15559 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &unescaped));
15560 pm_interpolated_symbol_node_append(parser->arena, symbol, part);
15561
15562 part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->current, NULL, &parser->current_string));
15563 pm_interpolated_symbol_node_append(parser->arena, symbol, part);
15564
15565 if (next_state != PM_LEX_STATE_NONE) {
15566 lex_state_set(parser, next_state);
15567 }
15568
15569 parser_lex(parser);
15570 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
15571
15572 pm_interpolated_symbol_node_closing_loc_set(parser, symbol, &parser->previous);
15573 return UP(symbol);
15574 }
15575 } else {
15576 content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
15577 pm_string_shared_init(&unescaped, content.start, content.end);
15578 }
15579
15580 if (next_state != PM_LEX_STATE_NONE) {
15581 lex_state_set(parser, next_state);
15582 }
15583
15584 if (match1(parser, PM_TOKEN_EOF)) {
15585 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
15586 } else {
15587 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
15588 }
15589
15590 return UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false)));
15591}
15592
15597static inline pm_node_t *
15598parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
15599 switch (parser->current.type) {
15600 case PM_CASE_OPERATOR:
15601 return parse_operator_symbol(parser, NULL, PM_LEX_STATE_NONE);
15602 case PM_CASE_KEYWORD:
15603 case PM_TOKEN_CONSTANT:
15604 case PM_TOKEN_IDENTIFIER:
15605 case PM_TOKEN_METHOD_NAME: {
15606 parser_lex(parser);
15607
15608 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, NULL, &parser->previous, NULL);
15609 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15610 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15611
15612 return UP(symbol);
15613 }
15614 case PM_TOKEN_SYMBOL_BEGIN: {
15615 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
15616 parser_lex(parser);
15617
15618 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
15619 }
15620 default:
15621 pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
15622 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
15623 }
15624}
15625
15632static inline pm_node_t *
15633parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
15634 switch (parser->current.type) {
15635 case PM_CASE_OPERATOR:
15636 return parse_operator_symbol(parser, NULL, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
15637 case PM_CASE_KEYWORD:
15638 case PM_TOKEN_CONSTANT:
15639 case PM_TOKEN_IDENTIFIER:
15640 case PM_TOKEN_METHOD_NAME: {
15641 if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
15642 parser_lex(parser);
15643
15644 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, NULL, &parser->previous, NULL);
15645 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15646 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15647
15648 return UP(symbol);
15649 }
15650 case PM_TOKEN_SYMBOL_BEGIN: {
15651 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
15652 parser_lex(parser);
15653
15654 return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
15655 }
15656 case PM_TOKEN_BACK_REFERENCE:
15657 parser_lex(parser);
15658 return UP(pm_back_reference_read_node_create(parser, &parser->previous));
15659 case PM_TOKEN_NUMBERED_REFERENCE:
15660 parser_lex(parser);
15661 return UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
15662 case PM_TOKEN_GLOBAL_VARIABLE:
15663 parser_lex(parser);
15664 return UP(pm_global_variable_read_node_create(parser, &parser->previous));
15665 default:
15666 pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
15667 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
15668 }
15669}
15670
15675static pm_node_t *
15676parse_variable(pm_parser_t *parser) {
15677 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
15678 int depth;
15679 bool is_numbered_param = pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous));
15680
15681 if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
15682 return UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false));
15683 }
15684
15685 pm_scope_t *current_scope = parser->current_scope;
15686 if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
15687 if (is_numbered_param) {
15688 // When you use a numbered parameter, it implies the existence of
15689 // all of the locals that exist before it. For example, referencing
15690 // _2 means that _1 must exist. Therefore here we loop through all
15691 // of the possibilities and add them into the constant pool.
15692 uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
15693 for (uint8_t number = 1; number <= maximum; number++) {
15694 pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
15695 }
15696
15697 if (!match1(parser, PM_TOKEN_EQUAL)) {
15698 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
15699 }
15700
15701 pm_node_t *node = UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false));
15702 pm_node_list_append(parser->arena, &current_scope->implicit_parameters, node);
15703
15704 return node;
15705 } else if ((parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
15706 pm_node_t *node = UP(pm_it_local_variable_read_node_create(parser, &parser->previous));
15707 pm_node_list_append(parser->arena, &current_scope->implicit_parameters, node);
15708
15709 return node;
15710 }
15711 }
15712
15713 return NULL;
15714}
15715
15719static pm_node_t *
15720parse_variable_call(pm_parser_t *parser) {
15721 pm_node_flags_t flags = 0;
15722
15723 if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
15724 pm_node_t *node = parse_variable(parser);
15725 if (node != NULL) return node;
15726 flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
15727 }
15728
15729 pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
15730 pm_node_flag_set(UP(node), flags);
15731
15732 return UP(node);
15733}
15734
15740static inline pm_token_t
15741parse_method_definition_name(pm_parser_t *parser) {
15742 switch (parser->current.type) {
15743 case PM_CASE_KEYWORD:
15744 case PM_TOKEN_CONSTANT:
15745 case PM_TOKEN_METHOD_NAME:
15746 parser_lex(parser);
15747 return parser->previous;
15748 case PM_TOKEN_IDENTIFIER:
15749 pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current));
15750 parser_lex(parser);
15751 return parser->previous;
15752 case PM_CASE_OPERATOR:
15753 lex_state_set(parser, PM_LEX_STATE_ENDFN);
15754 parser_lex(parser);
15755 return parser->previous;
15756 default:
15757 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type));
15758 return (pm_token_t) { .type = 0, .start = parser->current.start, .end = parser->current.end };
15759 }
15760}
15761
15762static void
15763parse_heredoc_dedent_string(pm_arena_t *arena, pm_string_t *string, size_t common_whitespace) {
15764 // Make a writable copy in the arena if the string isn't already writable.
15765 // We keep a mutable pointer to the arena memory so we can memmove into it
15766 // below without casting away const from the string's source field.
15767 uint8_t *writable;
15768
15769 if (string->type != PM_STRING_OWNED) {
15770 size_t length = pm_string_length(string);
15771 writable = (uint8_t *) pm_arena_memdup(arena, pm_string_source(string), length, PRISM_ALIGNOF(uint8_t));
15772 pm_string_constant_init(string, (const char *) writable, length);
15773 } else {
15774 writable = (uint8_t *) string->source;
15775 }
15776
15777 // Now get the bounds of the existing string. We'll use this as a
15778 // destination to move bytes into. We'll also use it for bounds checking
15779 // since we don't require that these strings be null terminated.
15780 size_t dest_length = pm_string_length(string);
15781 const uint8_t *source_cursor = writable;
15782 const uint8_t *source_end = source_cursor + dest_length;
15783
15784 // We're going to move bytes backward in the string when we get leading
15785 // whitespace, so we'll maintain a pointer to the current position in the
15786 // string that we're writing to.
15787 size_t trimmed_whitespace = 0;
15788
15789 // While we haven't reached the amount of common whitespace that we need to
15790 // trim and we haven't reached the end of the string, we'll keep trimming
15791 // whitespace. Trimming in this context means skipping over these bytes such
15792 // that they aren't copied into the new string.
15793 while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
15794 if (*source_cursor == '\t') {
15795 trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
15796 if (trimmed_whitespace > common_whitespace) break;
15797 } else {
15798 trimmed_whitespace++;
15799 }
15800
15801 source_cursor++;
15802 dest_length--;
15803 }
15804
15805 memmove(writable, source_cursor, (size_t) (source_end - source_cursor));
15806 string->length = dest_length;
15807}
15808
15813static inline bool
15814heredoc_dedent_discard_string_node(pm_parser_t *parser, pm_string_node_t *string_node) {
15815 if (string_node->unescaped.length == 0) {
15816 const uint8_t *cursor = parser->start + PM_LOCATION_START(&string_node->content_loc);
15817 return pm_memchr(cursor, '\\', string_node->content_loc.length, parser->encoding_changed, parser->encoding) == NULL;
15818 }
15819 return false;
15820}
15821
15825static void
15826parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
15827 // The next node should be dedented if it's the first node in the list or if
15828 // it follows a string node.
15829 bool dedent_next = true;
15830
15831 // Iterate over all nodes, and trim whitespace accordingly. We're going to
15832 // keep around two indices: a read and a write.
15833 size_t write_index = 0;
15834
15835 pm_node_t *node;
15836 PM_NODE_LIST_FOREACH(nodes, read_index, node) {
15837 // We're not manipulating child nodes that aren't strings. In this case
15838 // we'll skip past it and indicate that the subsequent node should not
15839 // be dedented.
15840 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) {
15841 nodes->nodes[write_index++] = node;
15842 dedent_next = false;
15843 continue;
15844 }
15845
15846 pm_string_node_t *string_node = ((pm_string_node_t *) node);
15847 if (dedent_next) {
15848 parse_heredoc_dedent_string(parser->arena, &string_node->unescaped, common_whitespace);
15849 }
15850
15851 if (heredoc_dedent_discard_string_node(parser, string_node)) {
15852 } else {
15853 nodes->nodes[write_index++] = node;
15854 }
15855
15856 // We always dedent the next node if it follows a string node.
15857 dedent_next = true;
15858 }
15859
15860 nodes->size = write_index;
15861}
15862
15866static pm_token_t
15867parse_strings_empty_content(const uint8_t *location) {
15868 return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
15869}
15870
15874static inline pm_node_t *
15875parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
15876 assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
15877 bool concating = false;
15878
15879 while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
15880 pm_node_t *node = NULL;
15881
15882 // Here we have found a string literal. We'll parse it and add it to
15883 // the list of strings.
15884 const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
15885 assert(lex_mode->mode == PM_LEX_STRING);
15886 bool lex_interpolation = lex_mode->as.string.interpolation;
15887 bool label_allowed = lex_mode->as.string.label_allowed && accepts_label;
15888
15889 pm_token_t opening = parser->current;
15890 parser_lex(parser);
15891
15892 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15893 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
15894 // If we get here, then we have an end immediately after a
15895 // start. In that case we'll create an empty content token and
15896 // return an uninterpolated string.
15897 pm_token_t content = parse_strings_empty_content(parser->previous.start);
15898 pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
15899
15900 pm_string_shared_init(&string->unescaped, content.start, content.end);
15901 node = UP(string);
15902 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
15903 // If we get here, then we have an end of a label immediately
15904 // after a start. In that case we'll create an empty symbol
15905 // node.
15906 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, NULL, &parser->previous);
15907 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.start);
15908 node = UP(symbol);
15909
15910 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
15911 } else if (!lex_interpolation) {
15912 // If we don't accept interpolation then we expect the string to
15913 // start with a single string content node.
15914 pm_string_t unescaped;
15915 pm_token_t content;
15916
15917 if (match1(parser, PM_TOKEN_EOF)) {
15918 unescaped = PM_STRING_EMPTY;
15919 content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->start, .end = parser->start };
15920 } else {
15921 unescaped = parser->current_string;
15922 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
15923 content = parser->previous;
15924 }
15925
15926 // It is unfortunately possible to have multiple string content
15927 // nodes in a row in the case that there's heredoc content in
15928 // the middle of the string, like this cursed example:
15929 //
15930 // <<-END+'b
15931 // a
15932 // END
15933 // c'+'d'
15934 //
15935 // In that case we need to switch to an interpolated string to
15936 // be able to contain all of the parts.
15937 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15938 pm_node_list_t parts = { 0 };
15939 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &unescaped));
15940 pm_node_list_append(parser->arena, &parts, part);
15941
15942 do {
15943 part = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
15944 pm_node_list_append(parser->arena, &parts, part);
15945 parser_lex(parser);
15946 } while (match1(parser, PM_TOKEN_STRING_CONTENT));
15947
15948 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
15949 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
15950 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
15951 node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
15952 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
15953 } else if (match1(parser, PM_TOKEN_EOF)) {
15954 pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
15955 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
15956 } else if (accept1(parser, PM_TOKEN_STRING_END)) {
15957 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
15958 } else {
15959 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
15960 parser->previous.start = parser->previous.end;
15961 parser->previous.type = 0;
15962 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
15963 }
15964 } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15965 // In this case we've hit string content so we know the string
15966 // at least has something in it. We'll need to check if the
15967 // following token is the end (in which case we can return a
15968 // plain string) or if it's not then it has interpolation.
15969 pm_token_t content = parser->current;
15970 pm_string_t unescaped = parser->current_string;
15971 parser_lex(parser);
15972
15973 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15974 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
15975 pm_node_flag_set(node, parse_unescaped_encoding(parser));
15976
15977 // Kind of odd behavior, but basically if we have an
15978 // unterminated string and it ends in a newline, we back up one
15979 // character so that the error message is on the last line of
15980 // content in the string.
15981 if (!accept1(parser, PM_TOKEN_STRING_END)) {
15982 const uint8_t *location = parser->previous.end;
15983 if (location > parser->start && location[-1] == '\n') location--;
15984 pm_parser_err(parser, U32(location - parser->start), 0, PM_ERR_STRING_LITERAL_EOF);
15985
15986 parser->previous.start = parser->previous.end;
15987 parser->previous.type = 0;
15988 }
15989 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
15990 node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
15991 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
15992 } else {
15993 // If we get here, then we have interpolation so we'll need
15994 // to create a string or symbol node with interpolation.
15995 pm_node_list_t parts = { 0 };
15996 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
15997 pm_node_flag_set(part, parse_unescaped_encoding(parser));
15998 pm_node_list_append(parser->arena, &parts, part);
15999
16000 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16001 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16002 pm_node_list_append(parser->arena, &parts, part);
16003 }
16004 }
16005
16006 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16007 node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
16008 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16009 } else if (match1(parser, PM_TOKEN_EOF)) {
16010 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16011 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
16012 } else {
16013 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16014 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16015 }
16016 }
16017 } else {
16018 // If we get here, then the first part of the string is not plain
16019 // string content, in which case we need to parse the string as an
16020 // interpolated string.
16021 pm_node_list_t parts = { 0 };
16022 pm_node_t *part;
16023
16024 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16025 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16026 pm_node_list_append(parser->arena, &parts, part);
16027 }
16028 }
16029
16030 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16031 node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
16032 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16033 } else if (match1(parser, PM_TOKEN_EOF)) {
16034 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16035 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
16036 } else {
16037 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16038 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16039 }
16040 }
16041
16042 if (current == NULL) {
16043 // If the node we just parsed is a symbol node, then we can't
16044 // concatenate it with anything else, so we can now return that
16045 // node.
16046 if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
16047 return node;
16048 }
16049
16050 // If we don't already have a node, then it's fine and we can just
16051 // set the result to be the node we just parsed.
16052 current = node;
16053 } else {
16054 // Otherwise we need to check the type of the node we just parsed.
16055 // If it cannot be concatenated with the previous node, then we'll
16056 // need to add a syntax error.
16057 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
16058 pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16059 }
16060
16061 // If we haven't already created our container for concatenation,
16062 // we'll do that now.
16063 if (!concating) {
16064 if (!PM_NODE_TYPE_P(current, PM_STRING_NODE) && !PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
16065 pm_parser_err_node(parser, current, PM_ERR_STRING_CONCATENATION);
16066 }
16067
16068 concating = true;
16069 pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
16070 pm_interpolated_string_node_append(parser->arena, container, current);
16071 current = UP(container);
16072 }
16073
16074 pm_interpolated_string_node_append(parser->arena, (pm_interpolated_string_node_t *) current, node);
16075 }
16076 }
16077
16078 return current;
16079}
16080
16081#define PM_PARSE_PATTERN_SINGLE 0
16082#define PM_PARSE_PATTERN_TOP 1
16083#define PM_PARSE_PATTERN_MULTI 2
16084
16085static pm_node_t *
16086parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
16087
16093static void
16094parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
16095 // Skip this capture if it starts with an underscore.
16096 if (peek_at(parser, parser->start + location->start) == '_') return;
16097
16098 if (pm_constant_id_list_includes(captures, capture)) {
16099 pm_parser_err(parser, location->start, location->length, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
16100 } else {
16101 pm_constant_id_list_append(parser->arena, captures, capture);
16102 }
16103}
16104
16108static pm_node_t *
16109parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node, uint16_t depth) {
16110 // Now, if there are any :: operators that follow, parse them as constant
16111 // path nodes.
16112 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
16113 pm_token_t delimiter = parser->previous;
16114 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16115 node = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
16116 }
16117
16118 // If there is a [ or ( that follows, then this is part of a larger pattern
16119 // expression. We'll parse the inner pattern here, then modify the returned
16120 // inner pattern with our constant path attached.
16121 if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
16122 return node;
16123 }
16124
16125 pm_token_t opening;
16126 pm_token_t closing;
16127 pm_node_t *inner = NULL;
16128
16129 if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
16130 opening = parser->previous;
16131 accept1(parser, PM_TOKEN_NEWLINE);
16132
16133 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16134 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16135 accept1(parser, PM_TOKEN_NEWLINE);
16136 expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
16137 }
16138
16139 closing = parser->previous;
16140 } else {
16141 parser_lex(parser);
16142 opening = parser->previous;
16143 accept1(parser, PM_TOKEN_NEWLINE);
16144
16145 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
16146 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16147 accept1(parser, PM_TOKEN_NEWLINE);
16148 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
16149 }
16150
16151 closing = parser->previous;
16152 }
16153
16154 if (!inner) {
16155 // If there was no inner pattern, then we have something like Foo() or
16156 // Foo[]. In that case we'll create an array pattern with no requireds.
16157 return UP(pm_array_pattern_node_constant_create(parser, node, &opening, &closing));
16158 }
16159
16160 // Now that we have the inner pattern, check to see if it's an array, find,
16161 // or hash pattern. If it is, then we'll attach our constant path to it if
16162 // it doesn't already have a constant. If it's not one of those node types
16163 // or it does have a constant, then we'll create an array pattern.
16164 switch (PM_NODE_TYPE(inner)) {
16165 case PM_ARRAY_PATTERN_NODE: {
16166 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16167
16168 if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
16169 PM_NODE_START_SET_NODE(pattern_node, node);
16170 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16171
16172 pattern_node->constant = node;
16173 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16174 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16175
16176 return UP(pattern_node);
16177 }
16178
16179 break;
16180 }
16181 case PM_FIND_PATTERN_NODE: {
16182 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16183
16184 if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
16185 PM_NODE_START_SET_NODE(pattern_node, node);
16186 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16187
16188 pattern_node->constant = node;
16189 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16190 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16191
16192 return UP(pattern_node);
16193 }
16194
16195 break;
16196 }
16197 case PM_HASH_PATTERN_NODE: {
16198 pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
16199
16200 if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
16201 PM_NODE_START_SET_NODE(pattern_node, node);
16202 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16203
16204 pattern_node->constant = node;
16205 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16206 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16207
16208 return UP(pattern_node);
16209 }
16210
16211 break;
16212 }
16213 default:
16214 break;
16215 }
16216
16217 // If we got here, then we didn't return one of the inner patterns by
16218 // attaching its constant. In this case we'll create an array pattern and
16219 // attach our constant to it.
16220 pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16221 pm_array_pattern_node_requireds_append(parser->arena, pattern_node, inner);
16222 return UP(pattern_node);
16223}
16224
16228static pm_splat_node_t *
16229parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16230 assert(parser->previous.type == PM_TOKEN_USTAR);
16231 pm_token_t operator = parser->previous;
16232 pm_node_t *name = NULL;
16233
16234 // Rest patterns don't necessarily have a name associated with them. So we
16235 // will check for that here. If they do, then we'll add it to the local
16236 // table since this pattern will cause it to become a local variable.
16237 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16238 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16239
16240 int depth;
16241 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16242 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16243 }
16244
16245 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
16246 name = UP(pm_local_variable_target_node_create(
16247 parser,
16248 &TOK2LOC(parser, &parser->previous),
16249 constant_id,
16250 (uint32_t) (depth == -1 ? 0 : depth)
16251 ));
16252 }
16253
16254 // Finally we can return the created node.
16255 return pm_splat_node_create(parser, &operator, name);
16256}
16257
16261static pm_node_t *
16262parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16263 assert(parser->current.type == PM_TOKEN_USTAR_STAR);
16264 parser_lex(parser);
16265
16266 pm_token_t operator = parser->previous;
16267 pm_node_t *value = NULL;
16268
16269 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
16270 return UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
16271 }
16272
16273 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16274 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16275
16276 int depth;
16277 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16278 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16279 }
16280
16281 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
16282 value = UP(pm_local_variable_target_node_create(
16283 parser,
16284 &TOK2LOC(parser, &parser->previous),
16285 constant_id,
16286 (uint32_t) (depth == -1 ? 0 : depth)
16287 ));
16288 }
16289
16290 return UP(pm_assoc_splat_node_create(parser, value, &operator));
16291}
16292
16297static bool
16298pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
16299 ptrdiff_t length = end - start;
16300 if (length == 0) return false;
16301
16302 // First ensure that it starts with a valid identifier starting character.
16303 size_t width = char_is_identifier_start(parser, start, end - start);
16304 if (width == 0) return false;
16305
16306 // Next, ensure that it's not an uppercase character.
16307 if (parser->encoding_changed) {
16308 if (parser->encoding->isupper_char(start, length)) return false;
16309 } else {
16310 if (pm_encoding_utf_8_isupper_char(start, length)) return false;
16311 }
16312
16313 // Next, iterate through all of the bytes of the string to ensure that they
16314 // are all valid identifier characters.
16315 const uint8_t *cursor = start + width;
16316 while ((width = char_is_identifier(parser, cursor, end - cursor))) cursor += width;
16317 return cursor == end;
16318}
16319
16324static pm_node_t *
16325parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
16326 const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
16327 const uint8_t *start = parser->start + PM_LOCATION_START(value_loc);
16328 const uint8_t *end = parser->start + PM_LOCATION_END(value_loc);
16329
16330 pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, start, end);
16331 int depth = -1;
16332
16333 if (pm_slice_is_valid_local(parser, start, end)) {
16334 depth = pm_parser_local_depth_constant_id(parser, constant_id);
16335 } else {
16336 pm_parser_err(parser, PM_NODE_START(key), PM_NODE_LENGTH(key), PM_ERR_PATTERN_HASH_KEY_LOCALS);
16337
16338 if ((end > start) && ((end[-1] == '!') || (end[-1] == '?'))) {
16339 PM_PARSER_ERR_FORMAT(parser, value_loc->start, value_loc->length, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (end - start), (const char *) start);
16340 }
16341 }
16342
16343 if (depth == -1) {
16344 pm_parser_local_add(parser, constant_id, start, end, 0);
16345 }
16346
16347 parse_pattern_capture(parser, captures, constant_id, value_loc);
16348 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
16349 parser,
16350 value_loc,
16351 constant_id,
16352 (uint32_t) (depth == -1 ? 0 : depth)
16353 );
16354
16355 return UP(pm_implicit_node_create(parser, UP(target)));
16356}
16357
16362static void
16363parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
16364 if (pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, keys, node, true) != NULL) {
16365 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
16366 }
16367}
16368
16373parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, uint16_t depth) {
16374 pm_node_list_t assocs = { 0 };
16375 pm_static_literals_t keys = { 0 };
16376 pm_node_t *rest = NULL;
16377
16378 switch (PM_NODE_TYPE(first_node)) {
16379 case PM_ASSOC_SPLAT_NODE:
16380 case PM_NO_KEYWORDS_PARAMETER_NODE:
16381 rest = first_node;
16382 break;
16383 case PM_SYMBOL_NODE: {
16384 if (pm_symbol_node_label_p(parser, first_node)) {
16385 parse_pattern_hash_key(parser, &keys, first_node);
16386 pm_node_t *value;
16387
16388 if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
16389 // Otherwise, we will create an implicit local variable
16390 // target for the value.
16391 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
16392 } else {
16393 // Here we have a value for the first assoc in the list, so
16394 // we will parse it now.
16395 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
16396 }
16397
16398 pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, NULL, value));
16399 pm_node_list_append(parser->arena, &assocs, assoc);
16400 break;
16401 }
16402 }
16404 default: {
16405 // If we get anything else, then this is an error. For this we'll
16406 // create a missing node for the value and create an assoc node for
16407 // the first node in the list.
16408 pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
16409 pm_parser_err_node(parser, first_node, diag_id);
16410
16411 pm_node_t *value = UP(pm_missing_node_create(parser, PM_NODE_START(first_node), PM_NODE_LENGTH(first_node)));
16412 pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, NULL, value));
16413
16414 pm_node_list_append(parser->arena, &assocs, assoc);
16415 break;
16416 }
16417 }
16418
16419 // If there are any other assocs, then we'll parse them now.
16420 while (accept1(parser, PM_TOKEN_COMMA)) {
16421 // Here we need to break to support trailing commas.
16422 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
16423 // Trailing commas are not allowed to follow a rest pattern.
16424 if (rest != NULL) {
16425 pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16426 }
16427
16428 break;
16429 }
16430
16431 if (match1(parser, PM_TOKEN_USTAR_STAR)) {
16432 pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures);
16433
16434 if (rest == NULL) {
16435 rest = assoc;
16436 } else {
16437 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16438 pm_node_list_append(parser->arena, &assocs, assoc);
16439 }
16440 } else {
16441 pm_node_t *key;
16442
16443 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16444 key = parse_strings(parser, NULL, true, (uint16_t) (depth + 1));
16445
16446 if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
16447 pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
16448 } else if (!pm_symbol_node_label_p(parser, key)) {
16449 pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16450 }
16451 } else if (accept1(parser, PM_TOKEN_LABEL)) {
16452 key = UP(pm_symbol_node_label_create(parser, &parser->previous));
16453 } else {
16454 expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16455
16456 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = parser->previous.end, .end = parser->previous.end };
16457 key = UP(pm_symbol_node_create(parser, NULL, &label, NULL));
16458 }
16459
16460 parse_pattern_hash_key(parser, &keys, key);
16461 pm_node_t *value = NULL;
16462
16463 if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
16464 if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) {
16465 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
16466 } else {
16467 value = UP(pm_missing_node_create(parser, PM_NODE_END(key), 0));
16468 }
16469 } else {
16470 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
16471 }
16472
16473 pm_node_t *assoc = UP(pm_assoc_node_create(parser, key, NULL, value));
16474
16475 if (rest != NULL) {
16476 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16477 }
16478
16479 pm_node_list_append(parser->arena, &assocs, assoc);
16480 }
16481 }
16482
16483 pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
16484 // assocs.nodes is arena-allocated; no explicit free needed.
16485
16486 pm_static_literals_free(&keys);
16487 return node;
16488}
16489
16493static pm_node_t *
16494parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id, uint16_t depth) {
16495 switch (parser->current.type) {
16496 case PM_TOKEN_IDENTIFIER:
16497 case PM_TOKEN_METHOD_NAME: {
16498 parser_lex(parser);
16499 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16500
16501 int depth;
16502 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16503 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16504 }
16505
16506 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
16507 return UP(pm_local_variable_target_node_create(
16508 parser,
16509 &TOK2LOC(parser, &parser->previous),
16510 constant_id,
16511 (uint32_t) (depth == -1 ? 0 : depth)
16512 ));
16513 }
16514 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
16515 pm_token_t opening = parser->current;
16516 parser_lex(parser);
16517
16518 if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16519 // If we have an empty array pattern, then we'll just return a new
16520 // array pattern node.
16521 return UP(pm_array_pattern_node_empty_create(parser, &opening, &parser->previous));
16522 }
16523
16524 // Otherwise, we'll parse the inner pattern, then deal with it depending
16525 // on the type it returns.
16526 pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16527
16528 accept1(parser, PM_TOKEN_NEWLINE);
16529 expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
16530 pm_token_t closing = parser->previous;
16531
16532 switch (PM_NODE_TYPE(inner)) {
16533 case PM_ARRAY_PATTERN_NODE: {
16534 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16535 if (pattern_node->opening_loc.length == 0) {
16536 PM_NODE_START_SET_TOKEN(parser, pattern_node, &opening);
16537 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16538
16539 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16540 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16541
16542 return UP(pattern_node);
16543 }
16544
16545 break;
16546 }
16547 case PM_FIND_PATTERN_NODE: {
16548 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16549 if (pattern_node->opening_loc.length == 0) {
16550 PM_NODE_START_SET_TOKEN(parser, pattern_node, &opening);
16551 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16552
16553 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16554 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16555
16556 return UP(pattern_node);
16557 }
16558
16559 break;
16560 }
16561 default:
16562 break;
16563 }
16564
16565 pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
16566 pm_array_pattern_node_requireds_append(parser->arena, node, inner);
16567 return UP(node);
16568 }
16569 case PM_TOKEN_BRACE_LEFT: {
16570 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
16571 parser->pattern_matching_newlines = false;
16572
16574 pm_token_t opening = parser->current;
16575 parser_lex(parser);
16576
16577 if (accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
16578 // If we have an empty hash pattern, then we'll just return a new hash
16579 // pattern node.
16580 node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
16581 } else {
16582 pm_node_t *first_node;
16583
16584 switch (parser->current.type) {
16585 case PM_TOKEN_LABEL:
16586 parser_lex(parser);
16587 first_node = UP(pm_symbol_node_label_create(parser, &parser->previous));
16588 break;
16589 case PM_TOKEN_USTAR_STAR:
16590 first_node = parse_pattern_keyword_rest(parser, captures);
16591 break;
16592 case PM_TOKEN_STRING_BEGIN:
16593 first_node = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_DO_BLOCK | PM_PARSE_ACCEPTS_LABEL, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
16594 break;
16595 default: {
16596 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
16597 parser_lex(parser);
16598
16599 first_node = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
16600 break;
16601 }
16602 }
16603
16604 node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
16605
16606 accept1(parser, PM_TOKEN_NEWLINE);
16607 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE, &opening);
16608 pm_token_t closing = parser->previous;
16609
16610 PM_NODE_START_SET_TOKEN(parser, node, &opening);
16611 PM_NODE_LENGTH_SET_TOKEN(parser, node, &closing);
16612
16613 node->opening_loc = TOK2LOC(parser, &opening);
16614 node->closing_loc = TOK2LOC(parser, &closing);
16615 }
16616
16617 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
16618 return UP(node);
16619 }
16620 case PM_TOKEN_UDOT_DOT:
16621 case PM_TOKEN_UDOT_DOT_DOT: {
16622 pm_token_t operator = parser->current;
16623 parser_lex(parser);
16624
16625 // Since we have a unary range operator, we need to parse the subsequent
16626 // expression as the right side of the range.
16627 switch (parser->current.type) {
16628 case PM_CASE_PRIMITIVE: {
16629 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
16630 return UP(pm_range_node_create(parser, NULL, &operator, right));
16631 }
16632 default: {
16633 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
16634 pm_node_t *right = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &operator), PM_TOKEN_LENGTH(&operator)));
16635 return UP(pm_range_node_create(parser, NULL, &operator, right));
16636 }
16637 }
16638 }
16639 case PM_CASE_PRIMITIVE: {
16640 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_LABEL | PM_PARSE_ACCEPTS_DO_BLOCK, diag_id, (uint16_t) (depth + 1));
16641
16642 // If we found a label, we need to immediately return to the caller.
16643 if (pm_symbol_node_label_p(parser, node)) return node;
16644
16645 // Call nodes (arithmetic operations) are not allowed in patterns
16646 if (PM_NODE_TYPE(node) == PM_CALL_NODE) {
16647 pm_parser_err_node(parser, node, diag_id);
16648 pm_missing_node_t *missing_node = pm_missing_node_create(parser, PM_NODE_START(node), PM_NODE_LENGTH(node));
16649
16650 pm_node_unreference(parser, node);
16651 return UP(missing_node);
16652 }
16653
16654 // Now that we have a primitive, we need to check if it's part of a range.
16655 if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
16656 pm_token_t operator = parser->previous;
16657
16658 // Now that we have the operator, we need to check if this is followed
16659 // by another expression. If it is, then we will create a full range
16660 // node. Otherwise, we'll create an endless range.
16661 switch (parser->current.type) {
16662 case PM_CASE_PRIMITIVE: {
16663 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
16664 return UP(pm_range_node_create(parser, node, &operator, right));
16665 }
16666 default:
16667 return UP(pm_range_node_create(parser, node, &operator, NULL));
16668 }
16669 }
16670
16671 return node;
16672 }
16673 case PM_TOKEN_CARET: {
16674 parser_lex(parser);
16675 pm_token_t operator = parser->previous;
16676
16677 // At this point we have a pin operator. We need to check the subsequent
16678 // expression to determine if it's a variable or an expression.
16679 switch (parser->current.type) {
16680 case PM_TOKEN_IDENTIFIER: {
16681 parser_lex(parser);
16682 pm_node_t *variable = UP(parse_variable(parser));
16683
16684 if (variable == NULL) {
16685 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
16686 variable = UP(pm_local_variable_read_node_missing_create(parser, &parser->previous, 0));
16687 }
16688
16689 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16690 }
16691 case PM_TOKEN_INSTANCE_VARIABLE: {
16692 parser_lex(parser);
16693 pm_node_t *variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
16694
16695 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16696 }
16697 case PM_TOKEN_CLASS_VARIABLE: {
16698 parser_lex(parser);
16699 pm_node_t *variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
16700
16701 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16702 }
16703 case PM_TOKEN_GLOBAL_VARIABLE: {
16704 parser_lex(parser);
16705 pm_node_t *variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
16706
16707 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16708 }
16709 case PM_TOKEN_NUMBERED_REFERENCE: {
16710 parser_lex(parser);
16711 pm_node_t *variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
16712
16713 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16714 }
16715 case PM_TOKEN_BACK_REFERENCE: {
16716 parser_lex(parser);
16717 pm_node_t *variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
16718
16719 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16720 }
16721 case PM_TOKEN_PARENTHESIS_LEFT: {
16722 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
16723 parser->pattern_matching_newlines = false;
16724
16725 pm_token_t lparen = parser->current;
16726 parser_lex(parser);
16727
16728 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_PARSE_ACCEPTS_DO_BLOCK | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
16729 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
16730
16731 accept1(parser, PM_TOKEN_NEWLINE);
16732 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &lparen);
16733 return UP(pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous));
16734 }
16735 default: {
16736 // If we get here, then we have a pin operator followed by something
16737 // not understood. We'll create a missing node and return that.
16738 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
16739 pm_node_t *variable = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &operator), PM_TOKEN_LENGTH(&operator)));
16740 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16741 }
16742 }
16743 }
16744 case PM_TOKEN_UCOLON_COLON: {
16745 pm_token_t delimiter = parser->current;
16746 parser_lex(parser);
16747
16748 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16749 pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
16750
16751 return parse_pattern_constant_path(parser, captures, UP(node), (uint16_t) (depth + 1));
16752 }
16753 case PM_TOKEN_CONSTANT: {
16754 pm_token_t constant = parser->current;
16755 parser_lex(parser);
16756
16757 pm_node_t *node = UP(pm_constant_read_node_create(parser, &constant));
16758 return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
16759 }
16760 default:
16761 pm_parser_err_current(parser, diag_id);
16762 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
16763 }
16764}
16765
16766static bool
16767parse_pattern_alternation_error_each(const pm_node_t *node, void *data) {
16768 switch (PM_NODE_TYPE(node)) {
16769 case PM_LOCAL_VARIABLE_TARGET_NODE: {
16770 pm_parser_t *parser = (pm_parser_t *) data;
16771 pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE);
16772 return false;
16773 }
16774 default:
16775 return true;
16776 }
16777}
16778
16783static void
16784parse_pattern_alternation_error(pm_parser_t *parser, const pm_node_t *node) {
16785 pm_visit_node(node, parse_pattern_alternation_error_each, parser);
16786}
16787
16792static pm_node_t *
16793parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
16794 pm_node_t *node = first_node;
16795 bool alternation = false;
16796
16797 while ((node == NULL) || (alternation = accept1(parser, PM_TOKEN_PIPE))) {
16798 if (alternation && !PM_NODE_TYPE_P(node, PM_ALTERNATION_PATTERN_NODE) && captures->size) {
16799 parse_pattern_alternation_error(parser, node);
16800 }
16801
16802 switch (parser->current.type) {
16803 case PM_TOKEN_IDENTIFIER:
16804 case PM_TOKEN_BRACKET_LEFT_ARRAY:
16805 case PM_TOKEN_BRACE_LEFT:
16806 case PM_TOKEN_CARET:
16807 case PM_TOKEN_CONSTANT:
16808 case PM_TOKEN_UCOLON_COLON:
16809 case PM_TOKEN_UDOT_DOT:
16810 case PM_TOKEN_UDOT_DOT_DOT:
16811 case PM_CASE_PRIMITIVE: {
16812 if (!alternation) {
16813 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
16814 } else {
16815 pm_token_t operator = parser->previous;
16816 pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
16817
16818 if (captures->size) parse_pattern_alternation_error(parser, right);
16819 node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
16820 }
16821
16822 break;
16823 }
16824 case PM_TOKEN_PARENTHESIS_LEFT:
16825 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
16826 pm_token_t operator = parser->previous;
16827 pm_token_t opening = parser->current;
16828 parser_lex(parser);
16829
16830 pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16831 accept1(parser, PM_TOKEN_NEWLINE);
16832 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
16833 pm_node_t *right = UP(pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0));
16834
16835 if (!alternation) {
16836 node = right;
16837 } else {
16838 if (captures->size) parse_pattern_alternation_error(parser, right);
16839 node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
16840 }
16841
16842 break;
16843 }
16844 default: {
16845 pm_parser_err_current(parser, diag_id);
16846 pm_node_t *right = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
16847
16848 if (!alternation) {
16849 node = right;
16850 } else {
16851 if (captures->size) parse_pattern_alternation_error(parser, right);
16852 node = UP(pm_alternation_pattern_node_create(parser, node, right, &parser->previous));
16853 }
16854
16855 break;
16856 }
16857 }
16858 }
16859
16860 // If we have an =>, then we are assigning this pattern to a variable.
16861 // In this case we should create an assignment node.
16862 while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
16863 pm_token_t operator = parser->previous;
16864 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
16865
16866 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16867 int depth;
16868
16869 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16870 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16871 }
16872
16873 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
16874 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
16875 parser,
16876 &TOK2LOC(parser, &parser->previous),
16877 constant_id,
16878 (uint32_t) (depth == -1 ? 0 : depth)
16879 );
16880
16881 node = UP(pm_capture_pattern_node_create(parser, node, target, &operator));
16882 }
16883
16884 return node;
16885}
16886
16890static pm_node_t *
16891parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
16892 pm_node_t *node = NULL;
16893
16894 bool leading_rest = false;
16895 bool trailing_rest = false;
16896
16897 switch (parser->current.type) {
16898 case PM_TOKEN_LABEL: {
16899 parser_lex(parser);
16900 pm_node_t *key = UP(pm_symbol_node_label_create(parser, &parser->previous));
16901 node = UP(parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1)));
16902
16903 if (!(flags & PM_PARSE_PATTERN_TOP)) {
16904 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
16905 }
16906
16907 return node;
16908 }
16909 case PM_TOKEN_USTAR_STAR: {
16910 node = parse_pattern_keyword_rest(parser, captures);
16911 node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
16912
16913 if (!(flags & PM_PARSE_PATTERN_TOP)) {
16914 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
16915 }
16916
16917 return node;
16918 }
16919 case PM_TOKEN_STRING_BEGIN: {
16920 // We need special handling for string beginnings because they could
16921 // be dynamic symbols leading to hash patterns.
16922 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
16923
16924 if (pm_symbol_node_label_p(parser, node)) {
16925 node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
16926
16927 if (!(flags & PM_PARSE_PATTERN_TOP)) {
16928 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
16929 }
16930
16931 return node;
16932 }
16933
16934 node = parse_pattern_primitives(parser, captures, node, diag_id, (uint16_t) (depth + 1));
16935 break;
16936 }
16937 case PM_TOKEN_USTAR: {
16938 if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
16939 parser_lex(parser);
16940 node = UP(parse_pattern_rest(parser, captures));
16941 leading_rest = true;
16942 break;
16943 }
16944 }
16946 default:
16947 node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
16948 break;
16949 }
16950
16951 // If we got a dynamic label symbol, then we need to treat it like the
16952 // beginning of a hash pattern.
16953 if (pm_symbol_node_label_p(parser, node)) {
16954 return UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
16955 }
16956
16957 if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
16958 // If we have a comma, then we are now parsing either an array pattern
16959 // or a find pattern. We need to parse all of the patterns, put them
16960 // into a big list, and then determine which type of node we have.
16961 pm_node_list_t nodes = { 0 };
16962 pm_node_list_append(parser->arena, &nodes, node);
16963
16964 // Gather up all of the patterns into the list.
16965 while (accept1(parser, PM_TOKEN_COMMA)) {
16966 // Break early here in case we have a trailing comma.
16967 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
16968 node = UP(pm_implicit_rest_node_create(parser, &parser->previous));
16969 pm_node_list_append(parser->arena, &nodes, node);
16970 trailing_rest = true;
16971 break;
16972 }
16973
16974 if (accept1(parser, PM_TOKEN_USTAR)) {
16975 node = UP(parse_pattern_rest(parser, captures));
16976
16977 // If we have already parsed a splat pattern, then this is an
16978 // error. We will continue to parse the rest of the patterns,
16979 // but we will indicate it as an error.
16980 if (trailing_rest) {
16981 pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
16982 }
16983
16984 trailing_rest = true;
16985 } else {
16986 node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
16987 }
16988
16989 pm_node_list_append(parser->arena, &nodes, node);
16990 }
16991
16992 // If the first pattern and the last pattern are rest patterns, then we
16993 // will call this a find pattern, regardless of how many rest patterns
16994 // are in between because we know we already added the appropriate
16995 // errors. Otherwise we will create an array pattern.
16996 if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
16997 node = UP(pm_find_pattern_node_create(parser, &nodes));
16998
16999 if (nodes.size == 2) {
17000 pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
17001 }
17002 } else {
17003 node = UP(pm_array_pattern_node_node_list_create(parser, &nodes));
17004
17005 if (leading_rest && trailing_rest) {
17006 pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
17007 }
17008 }
17009
17010 // nodes.nodes is arena-allocated; no explicit free needed.
17011 } else if (leading_rest) {
17012 // Otherwise, if we parsed a single splat pattern, then we know we have
17013 // an array pattern, so we can go ahead and create that node.
17014 node = UP(pm_array_pattern_node_rest_create(parser, node));
17015 }
17016
17017 return node;
17018}
17019
17025static inline void
17026parse_negative_numeric(pm_node_t *node) {
17027 switch (PM_NODE_TYPE(node)) {
17028 case PM_INTEGER_NODE: {
17029 pm_integer_node_t *cast = (pm_integer_node_t *) node;
17030 cast->base.location.start--;
17031 cast->base.location.length++;
17032 cast->value.negative = true;
17033 break;
17034 }
17035 case PM_FLOAT_NODE: {
17036 pm_float_node_t *cast = (pm_float_node_t *) node;
17037 cast->base.location.start--;
17038 cast->base.location.length++;
17039 cast->value = -cast->value;
17040 break;
17041 }
17042 case PM_RATIONAL_NODE: {
17043 pm_rational_node_t *cast = (pm_rational_node_t *) node;
17044 cast->base.location.start--;
17045 cast->base.location.length++;
17046 cast->numerator.negative = true;
17047 break;
17048 }
17049 case PM_IMAGINARY_NODE:
17050 node->location.start--;
17051 node->location.length++;
17052 parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
17053 break;
17054 default:
17055 assert(false && "unreachable");
17056 break;
17057 }
17058}
17059
17065static void
17066pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
17067 switch (diag_id) {
17068 case PM_ERR_HASH_KEY: {
17069 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, pm_token_type_human(parser->previous.type));
17070 break;
17071 }
17072 case PM_ERR_HASH_VALUE:
17073 case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17074 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_type_human(parser->current.type));
17075 break;
17076 }
17077 case PM_ERR_UNARY_RECEIVER: {
17078 const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
17079 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, human, parser->previous.start[0]);
17080 break;
17081 }
17082 case PM_ERR_UNARY_DISALLOWED:
17083 case PM_ERR_EXPECT_ARGUMENT: {
17084 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_type_human(parser->current.type));
17085 break;
17086 }
17087 default:
17088 pm_parser_err_previous(parser, diag_id);
17089 break;
17090 }
17091}
17092
17096static void
17097parse_retry(pm_parser_t *parser, const pm_node_t *node) {
17098#define CONTEXT_NONE 0
17099#define CONTEXT_THROUGH_ENSURE 1
17100#define CONTEXT_THROUGH_ELSE 2
17101
17102 pm_context_node_t *context_node = parser->current_context;
17103 int context = CONTEXT_NONE;
17104
17105 while (context_node != NULL) {
17106 switch (context_node->context) {
17114 case PM_CONTEXT_DEFINED:
17116 // These are the good cases. We're allowed to have a retry here.
17117 return;
17118 case PM_CONTEXT_CLASS:
17119 case PM_CONTEXT_DEF:
17121 case PM_CONTEXT_MAIN:
17122 case PM_CONTEXT_MODULE:
17123 case PM_CONTEXT_PREEXE:
17124 case PM_CONTEXT_SCLASS:
17125 // These are the bad cases. We're not allowed to have a retry in
17126 // these contexts.
17127 if (context == CONTEXT_NONE) {
17128 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE);
17129 } else if (context == CONTEXT_THROUGH_ENSURE) {
17130 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ENSURE);
17131 } else if (context == CONTEXT_THROUGH_ELSE) {
17132 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ELSE);
17133 }
17134 return;
17142 // These are also bad cases, but with a more specific error
17143 // message indicating the else.
17144 context = CONTEXT_THROUGH_ELSE;
17145 break;
17153 // These are also bad cases, but with a more specific error
17154 // message indicating the ensure.
17155 context = CONTEXT_THROUGH_ENSURE;
17156 break;
17157 case PM_CONTEXT_NONE:
17158 // This case should never happen.
17159 assert(false && "unreachable");
17160 break;
17161 case PM_CONTEXT_BEGIN:
17165 case PM_CONTEXT_CASE_IN:
17168 case PM_CONTEXT_ELSE:
17169 case PM_CONTEXT_ELSIF:
17170 case PM_CONTEXT_EMBEXPR:
17172 case PM_CONTEXT_FOR:
17173 case PM_CONTEXT_IF:
17178 case PM_CONTEXT_PARENS:
17179 case PM_CONTEXT_POSTEXE:
17181 case PM_CONTEXT_TERNARY:
17182 case PM_CONTEXT_UNLESS:
17183 case PM_CONTEXT_UNTIL:
17184 case PM_CONTEXT_WHILE:
17185 // In these contexts we should continue walking up the list of
17186 // contexts.
17187 break;
17188 }
17189
17190 context_node = context_node->prev;
17191 }
17192
17193#undef CONTEXT_NONE
17194#undef CONTEXT_ENSURE
17195#undef CONTEXT_ELSE
17196}
17197
17201static void
17202parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17203 pm_context_node_t *context_node = parser->current_context;
17204
17205 while (context_node != NULL) {
17206 switch (context_node->context) {
17207 case PM_CONTEXT_DEF:
17209 case PM_CONTEXT_DEFINED:
17213 // These are the good cases. We're allowed to have a block exit
17214 // in these contexts.
17215 return;
17216 case PM_CONTEXT_CLASS:
17220 case PM_CONTEXT_MAIN:
17221 case PM_CONTEXT_MODULE:
17225 case PM_CONTEXT_SCLASS:
17229 // These are the bad cases. We're not allowed to have a retry in
17230 // these contexts.
17231 pm_parser_err_node(parser, node, PM_ERR_INVALID_YIELD);
17232 return;
17233 case PM_CONTEXT_NONE:
17234 // This case should never happen.
17235 assert(false && "unreachable");
17236 break;
17237 case PM_CONTEXT_BEGIN:
17247 case PM_CONTEXT_CASE_IN:
17250 case PM_CONTEXT_ELSE:
17251 case PM_CONTEXT_ELSIF:
17252 case PM_CONTEXT_EMBEXPR:
17254 case PM_CONTEXT_FOR:
17255 case PM_CONTEXT_IF:
17263 case PM_CONTEXT_PARENS:
17264 case PM_CONTEXT_POSTEXE:
17266 case PM_CONTEXT_PREEXE:
17268 case PM_CONTEXT_TERNARY:
17269 case PM_CONTEXT_UNLESS:
17270 case PM_CONTEXT_UNTIL:
17271 case PM_CONTEXT_WHILE:
17272 // In these contexts we should continue walking up the list of
17273 // contexts.
17274 break;
17275 }
17276
17277 context_node = context_node->prev;
17278 }
17279}
17280
17285static inline bool
17286pm_call_node_command_p(const pm_call_node_t *node) {
17287 return (
17288 (node->opening_loc.length == 0) &&
17289 (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
17290 (node->arguments != NULL || node->block != NULL)
17291 );
17292}
17293
17299static bool
17300pm_command_call_value_p(const pm_node_t *node) {
17301 switch (PM_NODE_TYPE(node)) {
17302 case PM_CALL_NODE: {
17303 const pm_call_node_t *call = (const pm_call_node_t *) node;
17304
17305 // Command-style calls (e.g., foo bar, obj.foo bar).
17306 // Attribute writes (e.g., a.b = 1) are not commands.
17307 if (pm_call_node_command_p(call) && !PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE) && (call->receiver == NULL || call->call_operator_loc.length > 0)) {
17308 return true;
17309 }
17310
17311 // A `!` or `not` prefix wrapping a command call (e.g.,
17312 // `!foo bar`, `not foo bar`) is also a command-call value.
17313 if (call->receiver != NULL && call->arguments == NULL && call->opening_loc.length == 0 && call->call_operator_loc.length == 0) {
17314 return pm_command_call_value_p(call->receiver);
17315 }
17316
17317 return false;
17318 }
17319 case PM_SUPER_NODE: {
17320 const pm_super_node_t *cast = (const pm_super_node_t *) node;
17321 return cast->lparen_loc.length == 0 && (cast->arguments != NULL || cast->block != NULL);
17322 }
17323 case PM_YIELD_NODE: {
17324 const pm_yield_node_t *cast = (const pm_yield_node_t *) node;
17325 return cast->lparen_loc.length == 0 && cast->arguments != NULL;
17326 }
17327 case PM_RESCUE_MODIFIER_NODE:
17328 return pm_command_call_value_p(((const pm_rescue_modifier_node_t *) node)->expression);
17329 case PM_DEF_NODE: {
17330 const pm_def_node_t *cast = (const pm_def_node_t *) node;
17331 if (cast->equal_loc.length > 0 && cast->body != NULL) {
17332 const pm_node_t *body = cast->body;
17333 if (PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE)) {
17334 body = ((const pm_statements_node_t *) body)->body.nodes[((const pm_statements_node_t *) body)->body.size - 1];
17335 }
17336 return pm_command_call_value_p(body);
17337 }
17338 return false;
17339 }
17340 default:
17341 return false;
17342 }
17343}
17344
17351static bool
17352pm_block_call_p(const pm_node_t *node) {
17353 while (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
17354 const pm_call_node_t *call = (const pm_call_node_t *) node;
17355 if (call->opening_loc.length > 0) return false;
17356
17357 // Root: command with do-block (e.g., `foo bar do end`).
17358 if (call->arguments != NULL && call->block != NULL && PM_NODE_TYPE_P(call->block, PM_BLOCK_NODE)) {
17359 return true;
17360 }
17361
17362 // Walk up the receiver chain (e.g., `foo bar do end.baz`).
17363 if (call->call_operator_loc.length > 0 && call->receiver != NULL) {
17364 node = call->receiver;
17365 continue;
17366 }
17367
17368 return false;
17369 }
17370
17371 return false;
17372}
17373
17377static inline pm_node_t *
17378parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
17379 switch (parser->current.type) {
17380 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
17381 parser_lex(parser);
17382
17383 pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
17384 pm_accepts_block_stack_push(parser, true);
17385 bool parsed_bare_hash = false;
17386
17387 while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
17388 bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
17389
17390 // Handle the case where we don't have a comma and we have a
17391 // newline followed by a right bracket.
17392 if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17393 break;
17394 }
17395
17396 // Ensure that we have a comma between elements in the array.
17397 if (array->elements.size > 0) {
17398 if (accept1(parser, PM_TOKEN_COMMA)) {
17399 // If there was a comma but we also accepts a newline,
17400 // then this is a syntax error.
17401 if (accepted_newline) {
17402 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
17403 }
17404 } else {
17405 // If there was no comma, then we need to add a syntax
17406 // error.
17407 PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_END(parser, &parser->previous), 0, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
17408 parser->previous.start = parser->previous.end;
17409 parser->previous.type = 0;
17410 }
17411 }
17412
17413 // If we have a right bracket immediately following a comma,
17414 // this is allowed since it's a trailing comma. In this case we
17415 // can break out of the loop.
17416 if (match1(parser, PM_TOKEN_BRACKET_RIGHT)) break;
17417
17418 pm_node_t *element;
17419
17420 if (accept1(parser, PM_TOKEN_USTAR)) {
17421 pm_token_t operator = parser->previous;
17422 pm_node_t *expression = NULL;
17423
17424 if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
17425 pm_parser_scope_forwarding_positionals_check(parser, &operator);
17426 } else {
17427 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
17428 }
17429
17430 element = UP(pm_splat_node_create(parser, &operator, expression));
17431 } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
17432 if (parsed_bare_hash) {
17433 pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
17434 }
17435
17436 element = UP(pm_keyword_hash_node_create(parser));
17437 pm_static_literals_t hash_keys = { 0 };
17438
17439 if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_DO_BLOCK, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
17440 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
17441 }
17442
17443 pm_static_literals_free(&hash_keys);
17444 parsed_bare_hash = true;
17445 } else {
17446 element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_LABEL, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
17447
17448 if (pm_symbol_node_label_p(parser, element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
17449 if (parsed_bare_hash) {
17450 pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
17451 }
17452
17453 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
17454 pm_static_literals_t hash_keys = { 0 };
17455 pm_hash_key_static_literals_add(parser, &hash_keys, element);
17456
17457 pm_token_t operator = { 0 };
17458 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
17459 operator = parser->previous;
17460 }
17461
17462 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
17463 pm_node_t *assoc = UP(pm_assoc_node_create(parser, element, NTOK2PTR(operator), value));
17464 pm_keyword_hash_node_elements_append(parser->arena, hash, assoc);
17465
17466 element = UP(hash);
17467 if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17468 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
17469 }
17470
17471 pm_static_literals_free(&hash_keys);
17472 parsed_bare_hash = true;
17473 }
17474 }
17475
17476 pm_array_node_elements_append(parser->arena, array, element);
17477 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
17478 }
17479
17480 accept1(parser, PM_TOKEN_NEWLINE);
17481
17482 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17483 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
17484 parser->previous.start = parser->previous.end;
17485 parser->previous.type = 0;
17486 }
17487
17488 pm_array_node_close_set(parser, array, &parser->previous);
17489 pm_accepts_block_stack_pop(parser);
17490
17491 return UP(array);
17492 }
17493 case PM_TOKEN_PARENTHESIS_LEFT:
17494 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
17495 pm_token_t opening = parser->current;
17496 pm_node_flags_t paren_flags = 0;
17497
17498 pm_node_list_t current_block_exits = { 0 };
17499 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
17500
17501 parser_lex(parser);
17502 while (true) {
17503 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
17504 paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17505 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
17506 break;
17507 }
17508 }
17509
17510 // If this is the end of the file or we match a right parenthesis, then
17511 // we have an empty parentheses node, and we can immediately return.
17512 if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
17513 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
17514 pop_block_exits(parser, previous_block_exits);
17515 return UP(pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, paren_flags));
17516 }
17517
17518 // Otherwise, we're going to parse the first statement in the list
17519 // of statements within the parentheses.
17520 pm_accepts_block_stack_push(parser, true);
17521 context_push(parser, PM_CONTEXT_PARENS);
17522 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
17523 context_pop(parser);
17524
17525 // Determine if this statement is followed by a terminator. In the
17526 // case of a single statement, this is fine. But in the case of
17527 // multiple statements it's required.
17528 bool terminator_found = false;
17529
17530 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
17531 terminator_found = true;
17532 paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17533 } else if (accept1(parser, PM_TOKEN_NEWLINE)) {
17534 terminator_found = true;
17535 }
17536
17537 if (terminator_found) {
17538 while (true) {
17539 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
17540 paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17541 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
17542 break;
17543 }
17544 }
17545 }
17546
17547 // If we hit a right parenthesis, then we're done parsing the
17548 // parentheses node, and we can check which kind of node we should
17549 // return.
17550 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
17551 if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
17552 lex_state_set(parser, PM_LEX_STATE_ENDARG);
17553 }
17554
17555 parser_lex(parser);
17556 pm_accepts_block_stack_pop(parser);
17557 pop_block_exits(parser, previous_block_exits);
17558
17559 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
17560 // If we have a single statement and are ending on a right
17561 // parenthesis, then we need to check if this is possibly a
17562 // multiple target node.
17563 pm_multi_target_node_t *multi_target;
17564
17565 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.length == 0) {
17566 multi_target = (pm_multi_target_node_t *) statement;
17567 } else {
17568 multi_target = pm_multi_target_node_create(parser);
17569 pm_multi_target_node_targets_append(parser, multi_target, statement);
17570 }
17571
17572 multi_target->lparen_loc = TOK2LOC(parser, &opening);
17573 multi_target->rparen_loc = TOK2LOC(parser, &parser->previous);
17574 PM_NODE_START_SET_TOKEN(parser, multi_target, &opening);
17575 PM_NODE_LENGTH_SET_TOKEN(parser, multi_target, &parser->previous);
17576
17577 pm_node_t *result;
17578 if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
17579 result = parse_targets(parser, UP(multi_target), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17580 accept1(parser, PM_TOKEN_NEWLINE);
17581 } else {
17582 result = UP(multi_target);
17583 }
17584
17585 if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
17586 // All set, this is explicitly allowed by the parent
17587 // context.
17588 } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
17589 // All set, we're inside a for loop and we're parsing
17590 // multiple targets.
17591 } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
17592 // Multi targets are not allowed when it's not a
17593 // statement level.
17594 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
17595 } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
17596 // Multi targets must be followed by an equal sign in
17597 // order to be valid (or a right parenthesis if they are
17598 // nested).
17599 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
17600 }
17601
17602 return result;
17603 }
17604
17605 // If we have a single statement and are ending on a right parenthesis
17606 // and we didn't return a multiple assignment node, then we can return a
17607 // regular parentheses node now.
17608 pm_statements_node_t *statements = pm_statements_node_create(parser);
17609 pm_statements_node_body_append(parser, statements, statement, true);
17610
17611 return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, paren_flags));
17612 }
17613
17614 // If we have more than one statement in the set of parentheses,
17615 // then we are going to parse all of them as a list of statements.
17616 // We'll do that here.
17617 context_push(parser, PM_CONTEXT_PARENS);
17618 paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17619
17620 pm_statements_node_t *statements = pm_statements_node_create(parser);
17621 pm_statements_node_body_append(parser, statements, statement, true);
17622
17623 // If we didn't find a terminator and we didn't find a right
17624 // parenthesis, then this is a syntax error.
17625 if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
17626 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
17627 }
17628
17629 // Parse each statement within the parentheses.
17630 while (true) {
17631 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
17632 pm_statements_node_body_append(parser, statements, node, true);
17633
17634 // If we're recovering from a syntax error, then we need to stop
17635 // parsing the statements now.
17636 if (parser->recovering) {
17637 // If this is the level of context where the recovery has
17638 // happened, then we can mark the parser as done recovering.
17639 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
17640 break;
17641 }
17642
17643 // If we couldn't parse an expression at all, then we need to
17644 // bail out of the loop.
17645 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) break;
17646
17647 // If we successfully parsed a statement, then we are going to
17648 // need terminator to delimit them.
17649 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
17650 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
17651 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
17652 } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
17653 break;
17654 } else if (!match1(parser, PM_TOKEN_EOF)) {
17655 // If we're at the end of the file, then we're going to add
17656 // an error after this for the ) anyway.
17657 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
17658 }
17659 }
17660
17661 context_pop(parser);
17662 pm_accepts_block_stack_pop(parser);
17663 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
17664
17665 // When we're parsing multi targets, we allow them to be followed by
17666 // a right parenthesis if they are at the statement level. This is
17667 // only possible if they are the final statement in a parentheses.
17668 // We need to explicitly reject that here.
17669 {
17670 pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
17671
17672 if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
17673 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
17674 pm_multi_target_node_targets_append(parser, multi_target, statement);
17675
17676 statement = UP(multi_target);
17677 statements->body.nodes[statements->body.size - 1] = statement;
17678 }
17679
17680 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
17681 const uint8_t *offset = parser->start + PM_NODE_END(statement);
17682 pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
17683 pm_node_t *value = UP(pm_missing_node_create(parser, PM_NODE_END(statement), 0));
17684
17685 statement = UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value));
17686 statements->body.nodes[statements->body.size - 1] = statement;
17687
17688 pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
17689 }
17690 }
17691
17692 pop_block_exits(parser, previous_block_exits);
17693 pm_void_statements_check(parser, statements, true);
17694 return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, paren_flags));
17695 }
17696 case PM_TOKEN_BRACE_LEFT: {
17697 // If we were passed a current_hash_keys via the parser, then that
17698 // means we're already parsing a hash and we want to share the set
17699 // of hash keys with this inner hash we're about to parse for the
17700 // sake of warnings. We'll set it to NULL after we grab it to make
17701 // sure subsequent expressions don't use it. Effectively this is a
17702 // way of getting around passing it to every call to
17703 // parse_expression.
17704 pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
17705 parser->current_hash_keys = NULL;
17706
17707 pm_accepts_block_stack_push(parser, true);
17708 parser_lex(parser);
17709
17710 pm_token_t opening = parser->previous;
17711 pm_hash_node_t *node = pm_hash_node_create(parser, &opening);
17712
17713 if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
17714 if (current_hash_keys != NULL) {
17715 parse_assocs(parser, current_hash_keys, UP(node), (uint16_t) (depth + 1));
17716 } else {
17717 pm_static_literals_t hash_keys = { 0 };
17718 parse_assocs(parser, &hash_keys, UP(node), (uint16_t) (depth + 1));
17719 pm_static_literals_free(&hash_keys);
17720 }
17721
17722 accept1(parser, PM_TOKEN_NEWLINE);
17723 }
17724
17725 pm_accepts_block_stack_pop(parser);
17726 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM, &opening);
17727 pm_hash_node_closing_loc_set(parser, node, &parser->previous);
17728
17729 return UP(node);
17730 }
17731 case PM_TOKEN_CHARACTER_LITERAL: {
17732 pm_node_t *node = UP(pm_string_node_create_current_string(
17733 parser,
17734 &(pm_token_t) {
17735 .type = PM_TOKEN_STRING_BEGIN,
17736 .start = parser->current.start,
17737 .end = parser->current.start + 1
17738 },
17739 &(pm_token_t) {
17740 .type = PM_TOKEN_STRING_CONTENT,
17741 .start = parser->current.start + 1,
17742 .end = parser->current.end
17743 },
17744 NULL
17745 ));
17746
17747 pm_node_flag_set(node, parse_unescaped_encoding(parser));
17748
17749 // Skip past the character literal here, since now we have handled
17750 // parser->explicit_encoding correctly.
17751 parser_lex(parser);
17752
17753 // Characters can be followed by strings in which case they are
17754 // automatically concatenated.
17755 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
17756 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
17757 }
17758
17759 return node;
17760 }
17761 case PM_TOKEN_CLASS_VARIABLE: {
17762 parser_lex(parser);
17763 pm_node_t *node = UP(pm_class_variable_read_node_create(parser, &parser->previous));
17764
17765 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17766 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17767 }
17768
17769 return node;
17770 }
17771 case PM_TOKEN_CONSTANT: {
17772 parser_lex(parser);
17773 pm_token_t constant = parser->previous;
17774
17775 // If a constant is immediately followed by parentheses, then this is in
17776 // fact a method call, not a constant read.
17777 if (
17778 match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
17779 ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
17780 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
17781 match1(parser, PM_TOKEN_BRACE_LEFT)
17782 ) {
17783 pm_arguments_t arguments = { 0 };
17784 parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
17785 return UP(pm_call_node_fcall_create(parser, &constant, &arguments));
17786 }
17787
17788 pm_node_t *node = UP(pm_constant_read_node_create(parser, &parser->previous));
17789
17790 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17791 // If we get here, then we have a comma immediately following a
17792 // constant, so we're going to parse this as a multiple assignment.
17793 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17794 }
17795
17796 return node;
17797 }
17798 case PM_TOKEN_UCOLON_COLON: {
17799 parser_lex(parser);
17800 pm_token_t delimiter = parser->previous;
17801
17802 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17803 pm_node_t *node = UP(pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous));
17804
17805 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17806 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17807 }
17808
17809 return node;
17810 }
17811 case PM_TOKEN_UDOT_DOT:
17812 case PM_TOKEN_UDOT_DOT_DOT: {
17813 pm_token_t operator = parser->current;
17814 parser_lex(parser);
17815
17816 pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
17817
17818 // Unary .. and ... are special because these are non-associative
17819 // operators that can also be unary operators. In this case we need
17820 // to explicitly reject code that has a .. or ... that follows this
17821 // expression.
17822 if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
17823 pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
17824 }
17825
17826 return UP(pm_range_node_create(parser, NULL, &operator, right));
17827 }
17828 case PM_TOKEN_FLOAT:
17829 parser_lex(parser);
17830 return UP(pm_float_node_create(parser, &parser->previous));
17831 case PM_TOKEN_FLOAT_IMAGINARY:
17832 parser_lex(parser);
17833 return UP(pm_float_node_imaginary_create(parser, &parser->previous));
17834 case PM_TOKEN_FLOAT_RATIONAL:
17835 parser_lex(parser);
17836 return UP(pm_float_node_rational_create(parser, &parser->previous));
17837 case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
17838 parser_lex(parser);
17839 return UP(pm_float_node_rational_imaginary_create(parser, &parser->previous));
17840 case PM_TOKEN_NUMBERED_REFERENCE: {
17841 parser_lex(parser);
17842 pm_node_t *node = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
17843
17844 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17845 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17846 }
17847
17848 return node;
17849 }
17850 case PM_TOKEN_GLOBAL_VARIABLE: {
17851 parser_lex(parser);
17852 pm_node_t *node = UP(pm_global_variable_read_node_create(parser, &parser->previous));
17853
17854 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17855 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17856 }
17857
17858 return node;
17859 }
17860 case PM_TOKEN_BACK_REFERENCE: {
17861 parser_lex(parser);
17862 pm_node_t *node = UP(pm_back_reference_read_node_create(parser, &parser->previous));
17863
17864 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17865 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17866 }
17867
17868 return node;
17869 }
17870 case PM_TOKEN_IDENTIFIER:
17871 case PM_TOKEN_METHOD_NAME: {
17872 parser_lex(parser);
17873 pm_token_t identifier = parser->previous;
17874 pm_node_t *node = parse_variable_call(parser);
17875
17876 if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
17877 // If parse_variable_call returned with a call node, then we
17878 // know the identifier is not in the local table. In that case
17879 // we need to check if there are arguments following the
17880 // identifier.
17881 pm_call_node_t *call = (pm_call_node_t *) node;
17882 pm_arguments_t arguments = { 0 };
17883
17884 if (parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1))) {
17885 // Since we found arguments, we need to turn off the
17886 // variable call bit in the flags.
17887 pm_node_flag_unset(UP(call), PM_CALL_NODE_FLAGS_VARIABLE_CALL);
17888
17889 call->opening_loc = arguments.opening_loc;
17890 call->arguments = arguments.arguments;
17891 call->closing_loc = arguments.closing_loc;
17892 call->block = arguments.block;
17893
17894 const pm_location_t *end = pm_arguments_end(&arguments);
17895 if (end == NULL) {
17896 PM_NODE_LENGTH_SET_LOCATION(call, &call->message_loc);
17897 } else {
17898 PM_NODE_LENGTH_SET_LOCATION(call, end);
17899 }
17900 }
17901 } else {
17902 // Otherwise, we know the identifier is in the local table. This
17903 // can still be a method call if it is followed by arguments or
17904 // a block, so we need to check for that here.
17905 if (
17906 ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
17907 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
17908 match1(parser, PM_TOKEN_BRACE_LEFT)
17909 ) {
17910 pm_arguments_t arguments = { 0 };
17911 parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
17912 pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
17913
17914 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
17915 // If we're about to convert an 'it' implicit local
17916 // variable read into a method call, we need to remove
17917 // it from the list of implicit local variables.
17918 pm_node_unreference(parser, node);
17919 } else {
17920 // Otherwise, we're about to convert a regular local
17921 // variable read into a method call, in which case we
17922 // need to indicate that this was not a read for the
17923 // purposes of warnings.
17924 assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
17925
17926 if (pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &identifier), PM_TOKEN_LENGTH(&identifier))) {
17927 pm_node_unreference(parser, node);
17928 } else {
17930 pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
17931 }
17932 }
17933
17934 return UP(fcall);
17935 }
17936 }
17937
17938 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17939 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17940 }
17941
17942 return node;
17943 }
17944 case PM_TOKEN_HEREDOC_START: {
17945 // Here we have found a heredoc. We'll parse it and add it to the
17946 // list of strings.
17947 assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
17948 pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
17949
17950 size_t common_whitespace = (size_t) -1;
17951 parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
17952
17953 parser_lex(parser);
17954 pm_token_t opening = parser->previous;
17955
17956 pm_node_t *node;
17957 pm_node_t *part;
17958
17959 if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
17960 // If we get here, then we have an empty heredoc. We'll create
17961 // an empty content token and return an empty string node.
17962 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
17963 pm_token_t content = parse_strings_empty_content(parser->previous.start);
17964
17965 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
17966 node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
17967 } else {
17968 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
17969 }
17970
17971 PM_NODE_LENGTH_SET_TOKEN(parser, node, &opening);
17972 } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
17973 // If we get here, then we tried to find something in the
17974 // heredoc but couldn't actually parse anything, so we'll just
17975 // return a missing node.
17976 //
17977 // parse_string_part handles its own errors, so there is no need
17978 // for us to add one here.
17979 node = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
17980 } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
17981 // If we get here, then the part that we parsed was plain string
17982 // content and we're at the end of the heredoc, so we can return
17983 // just a string node with the heredoc opening and closing as
17984 // its opening and closing.
17985 pm_node_flag_set(part, parse_unescaped_encoding(parser));
17986 pm_string_node_t *cast = (pm_string_node_t *) part;
17987
17988 cast->opening_loc = TOK2LOC(parser, &opening);
17989 cast->closing_loc = TOK2LOC(parser, &parser->current);
17990 cast->base.location = cast->opening_loc;
17991
17992 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
17993 assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
17994 cast->base.type = PM_X_STRING_NODE;
17995 }
17996
17997 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
17998 parse_heredoc_dedent_string(parser->arena, &cast->unescaped, common_whitespace);
17999 }
18000
18001 node = UP(cast);
18002 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18003 } else {
18004 // If we get here, then we have multiple parts in the heredoc,
18005 // so we'll need to create an interpolated string node to hold
18006 // them all.
18007 pm_node_list_t parts = { 0 };
18008 pm_node_list_append(parser->arena, &parts, part);
18009
18010 while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18011 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
18012 pm_node_list_append(parser->arena, &parts, part);
18013 }
18014 }
18015
18016 // Now that we have all of the parts, create the correct type of
18017 // interpolated node.
18018 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18019 pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
18020 cast->parts = parts;
18021
18022 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18023 pm_interpolated_xstring_node_closing_set(parser, cast, &parser->previous);
18024
18025 cast->base.location = cast->opening_loc;
18026 node = UP(cast);
18027 } else {
18028 pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
18029
18030 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18031 pm_interpolated_string_node_closing_set(parser, cast, &parser->previous);
18032
18033 cast->base.location = cast->opening_loc;
18034 node = UP(cast);
18035 }
18036
18037 // If this is a heredoc that is indented with a ~, then we need
18038 // to dedent each line by the common leading whitespace.
18039 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18040 pm_node_list_t *nodes;
18041 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18042 nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
18043 } else {
18044 nodes = &((pm_interpolated_string_node_t *) node)->parts;
18045 }
18046
18047 parse_heredoc_dedent(parser, nodes, common_whitespace);
18048 }
18049 }
18050
18051 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18052 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18053 }
18054
18055 return node;
18056 }
18057 case PM_TOKEN_INSTANCE_VARIABLE: {
18058 parser_lex(parser);
18059 pm_node_t *node = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
18060
18061 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18062 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18063 }
18064
18065 return node;
18066 }
18067 case PM_TOKEN_INTEGER: {
18068 pm_node_flags_t base = parser->integer_base;
18069 parser_lex(parser);
18070 return UP(pm_integer_node_create(parser, base, &parser->previous));
18071 }
18072 case PM_TOKEN_INTEGER_IMAGINARY: {
18073 pm_node_flags_t base = parser->integer_base;
18074 parser_lex(parser);
18075 return UP(pm_integer_node_imaginary_create(parser, base, &parser->previous));
18076 }
18077 case PM_TOKEN_INTEGER_RATIONAL: {
18078 pm_node_flags_t base = parser->integer_base;
18079 parser_lex(parser);
18080 return UP(pm_integer_node_rational_create(parser, base, &parser->previous));
18081 }
18082 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: {
18083 pm_node_flags_t base = parser->integer_base;
18084 parser_lex(parser);
18085 return UP(pm_integer_node_rational_imaginary_create(parser, base, &parser->previous));
18086 }
18087 case PM_TOKEN_KEYWORD___ENCODING__:
18088 parser_lex(parser);
18089 return UP(pm_source_encoding_node_create(parser, &parser->previous));
18090 case PM_TOKEN_KEYWORD___FILE__:
18091 parser_lex(parser);
18092 return UP(pm_source_file_node_create(parser, &parser->previous));
18093 case PM_TOKEN_KEYWORD___LINE__:
18094 parser_lex(parser);
18095 return UP(pm_source_line_node_create(parser, &parser->previous));
18096 case PM_TOKEN_KEYWORD_ALIAS: {
18097 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18098 pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
18099 }
18100
18101 parser_lex(parser);
18102 pm_token_t keyword = parser->previous;
18103
18104 pm_node_t *new_name = parse_alias_argument(parser, true, (uint16_t) (depth + 1));
18105 pm_node_t *old_name = parse_alias_argument(parser, false, (uint16_t) (depth + 1));
18106
18107 switch (PM_NODE_TYPE(new_name)) {
18108 case PM_BACK_REFERENCE_READ_NODE:
18109 case PM_NUMBERED_REFERENCE_READ_NODE:
18110 case PM_GLOBAL_VARIABLE_READ_NODE: {
18111 if (PM_NODE_TYPE_P(old_name, PM_BACK_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_GLOBAL_VARIABLE_READ_NODE)) {
18112 if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) {
18113 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
18114 }
18115 } else {
18116 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18117 }
18118
18119 return UP(pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name));
18120 }
18121 case PM_SYMBOL_NODE:
18122 case PM_INTERPOLATED_SYMBOL_NODE: {
18123 if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE)) {
18124 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18125 }
18126 }
18128 default:
18129 return UP(pm_alias_method_node_create(parser, &keyword, new_name, old_name));
18130 }
18131 }
18132 case PM_TOKEN_KEYWORD_CASE: {
18133 size_t opening_newline_index = token_newline_index(parser);
18134 parser_lex(parser);
18135
18136 pm_token_t case_keyword = parser->previous;
18137 pm_node_t *predicate = NULL;
18138
18139 pm_node_list_t current_block_exits = { 0 };
18140 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18141
18142 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18143 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18144 predicate = NULL;
18145 } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
18146 predicate = NULL;
18147 } else if (!token_begins_expression_p(parser->current.type)) {
18148 predicate = NULL;
18149 } else {
18150 predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
18151 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18152 }
18153
18154 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
18155 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18156 parser_lex(parser);
18157 pop_block_exits(parser, previous_block_exits);
18158 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18159 return UP(pm_case_node_create(parser, &case_keyword, predicate, &parser->previous));
18160 }
18161
18162 // At this point we can create a case node, though we don't yet know
18163 // if it is a case-in or case-when node.
18164 pm_node_t *node;
18165
18166 if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18167 pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, NULL);
18168 pm_static_literals_t literals = { 0 };
18169
18170 // At this point we've seen a when keyword, so we know this is a
18171 // case-when node. We will continue to parse the when nodes
18172 // until we hit the end of the list.
18173 while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18174 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18175 parser_lex(parser);
18176
18177 pm_token_t when_keyword = parser->previous;
18178 pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
18179
18180 do {
18181 if (accept1(parser, PM_TOKEN_USTAR)) {
18182 pm_token_t operator = parser->previous;
18183 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18184
18185 pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
18186 pm_when_node_conditions_append(parser->arena, when_node, UP(splat_node));
18187
18188 if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
18189 } else {
18190 pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
18191 pm_when_node_conditions_append(parser->arena, when_node, condition);
18192
18193 // If we found a missing node, then this is a syntax
18194 // error and we should stop looping.
18195 if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
18196
18197 // If this is a string node, then we need to mark it
18198 // as frozen because when clause strings are frozen.
18199 if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
18200 pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
18201 } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
18202 pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
18203 }
18204
18205 pm_when_clause_static_literals_add(parser, &literals, condition);
18206 }
18207 } while (accept1(parser, PM_TOKEN_COMMA));
18208
18209 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18210 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18211 pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous);
18212 }
18213 } else {
18214 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
18215 pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous);
18216 }
18217
18218 if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18219 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
18220 if (statements != NULL) {
18221 pm_when_node_statements_set(when_node, statements);
18222 }
18223 }
18224
18225 pm_case_node_condition_append(parser->arena, case_node, UP(when_node));
18226 }
18227
18228 // If we didn't parse any conditions (in or when) then we need
18229 // to indicate that we have an error.
18230 if (case_node->conditions.size == 0) {
18231 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18232 }
18233
18234 pm_static_literals_free(&literals);
18235 node = UP(case_node);
18236 } else {
18237 pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate);
18238
18239 // If this is a case-match node (i.e., it is a pattern matching
18240 // case statement) then we must have a predicate.
18241 if (predicate == NULL) {
18242 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
18243 }
18244
18245 // At this point we expect that we're parsing a case-in node. We
18246 // will continue to parse the in nodes until we hit the end of
18247 // the list.
18248 while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
18249 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18250
18251 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
18252 parser->pattern_matching_newlines = true;
18253
18254 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
18255 parser->command_start = false;
18256 parser_lex(parser);
18257
18258 pm_token_t in_keyword = parser->previous;
18259
18260 pm_constant_id_list_t captures = { 0 };
18261 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
18262
18263 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
18264
18265 // Since we're in the top-level of the case-in node we need
18266 // to check for guard clauses in the form of `if` or
18267 // `unless` statements.
18268 if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
18269 pm_token_t keyword = parser->previous;
18270 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
18271 pattern = UP(pm_if_node_modifier_create(parser, pattern, &keyword, predicate));
18272 } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
18273 pm_token_t keyword = parser->previous;
18274 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
18275 pattern = UP(pm_unless_node_modifier_create(parser, pattern, &keyword, predicate));
18276 }
18277
18278 // Now we need to check for the terminator of the in node's
18279 // pattern. It can be a newline or semicolon optionally
18280 // followed by a `then` keyword.
18281 pm_token_t then_keyword = { 0 };
18282 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18283 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18284 then_keyword = parser->previous;
18285 }
18286 } else {
18287 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
18288 then_keyword = parser->previous;
18289 }
18290
18291 // Now we can actually parse the statements associated with
18292 // the in node.
18293 pm_statements_node_t *statements;
18294 if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18295 statements = NULL;
18296 } else {
18297 statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
18298 }
18299
18300 // Now that we have the full pattern and statements, we can
18301 // create the node and attach it to the case node.
18302 pm_node_t *condition = UP(pm_in_node_create(parser, pattern, statements, &in_keyword, NTOK2PTR(then_keyword)));
18303 pm_case_match_node_condition_append(parser->arena, case_node, condition);
18304 }
18305
18306 // If we didn't parse any conditions (in or when) then we need
18307 // to indicate that we have an error.
18308 if (case_node->conditions.size == 0) {
18309 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18310 }
18311
18312 node = UP(case_node);
18313 }
18314
18315 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18316 if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
18317 pm_token_t else_keyword = parser->previous;
18318 pm_else_node_t *else_node;
18319
18320 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
18321 else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
18322 } else {
18323 else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
18324 }
18325
18326 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18327 pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
18328 } else {
18329 pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
18330 }
18331 }
18332
18333 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18334 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM, &case_keyword);
18335
18336 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18337 pm_case_node_end_keyword_loc_set(parser, (pm_case_node_t *) node, &parser->previous);
18338 } else {
18339 pm_case_match_node_end_keyword_loc_set(parser, (pm_case_match_node_t *) node, &parser->previous);
18340 }
18341
18342 pop_block_exits(parser, previous_block_exits);
18343 return node;
18344 }
18345 case PM_TOKEN_KEYWORD_BEGIN: {
18346 size_t opening_newline_index = token_newline_index(parser);
18347 parser_lex(parser);
18348
18349 pm_token_t begin_keyword = parser->previous;
18350 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18351
18352 pm_node_list_t current_block_exits = { 0 };
18353 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18354 pm_statements_node_t *begin_statements = NULL;
18355
18356 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18357 pm_accepts_block_stack_push(parser, true);
18358 begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN, (uint16_t) (depth + 1));
18359 pm_accepts_block_stack_pop(parser);
18360 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18361 }
18362
18363 pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
18364 parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
18365 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM, &begin_keyword);
18366
18367 PM_NODE_LENGTH_SET_TOKEN(parser, begin_node, &parser->previous);
18368 pm_begin_node_end_keyword_set(parser, begin_node, &parser->previous);
18369 pop_block_exits(parser, previous_block_exits);
18370 return UP(begin_node);
18371 }
18372 case PM_TOKEN_KEYWORD_BEGIN_UPCASE: {
18373 pm_node_list_t current_block_exits = { 0 };
18374 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18375
18376 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18377 pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
18378 }
18379
18380 parser_lex(parser);
18381 pm_token_t keyword = parser->previous;
18382
18383 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE);
18384 pm_token_t opening = parser->previous;
18385 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
18386
18387 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM, &opening);
18388 pm_context_t context = parser->current_context->context;
18389 if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
18390 pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
18391 }
18392
18393 flush_block_exits(parser, previous_block_exits);
18394 return UP(pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
18395 }
18396 case PM_TOKEN_KEYWORD_BREAK:
18397 case PM_TOKEN_KEYWORD_NEXT:
18398 case PM_TOKEN_KEYWORD_RETURN: {
18399 parser_lex(parser);
18400
18401 pm_token_t keyword = parser->previous;
18402 pm_arguments_t arguments = { 0 };
18403
18404 if (
18405 token_begins_expression_p(parser->current.type) ||
18406 match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)
18407 ) {
18408 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
18409
18410 if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
18411 pm_token_t next = parser->current;
18412 parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, flags, (uint16_t) (depth + 1));
18413
18414 // Reject `foo && return bar`.
18415 if (!(flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && arguments.arguments != NULL) {
18416 PM_PARSER_ERR_TOKEN_FORMAT(parser, &next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(next.type));
18417 }
18418 }
18419 }
18420
18421 switch (keyword.type) {
18422 case PM_TOKEN_KEYWORD_BREAK: {
18423 pm_node_t *node = UP(pm_break_node_create(parser, &keyword, arguments.arguments));
18424 if (!parser->partial_script) parse_block_exit(parser, node);
18425 return node;
18426 }
18427 case PM_TOKEN_KEYWORD_NEXT: {
18428 pm_node_t *node = UP(pm_next_node_create(parser, &keyword, arguments.arguments));
18429 if (!parser->partial_script) parse_block_exit(parser, node);
18430 return node;
18431 }
18432 case PM_TOKEN_KEYWORD_RETURN: {
18433 pm_node_t *node = UP(pm_return_node_create(parser, &keyword, arguments.arguments));
18434 parse_return(parser, node);
18435 return node;
18436 }
18437 default:
18438 assert(false && "unreachable");
18439 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
18440 }
18441 }
18442 case PM_TOKEN_KEYWORD_SUPER: {
18443 parser_lex(parser);
18444
18445 pm_token_t keyword = parser->previous;
18446 pm_arguments_t arguments = { 0 };
18447 parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
18448
18449 if (
18450 arguments.opening_loc.length == 0 &&
18451 arguments.arguments == NULL &&
18452 ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
18453 ) {
18454 return UP(pm_forwarding_super_node_create(parser, &keyword, &arguments));
18455 }
18456
18457 return UP(pm_super_node_create(parser, &keyword, &arguments));
18458 }
18459 case PM_TOKEN_KEYWORD_YIELD: {
18460 parser_lex(parser);
18461
18462 pm_token_t keyword = parser->previous;
18463 pm_arguments_t arguments = { 0 };
18464 parse_arguments_list(parser, &arguments, false, flags, (uint16_t) (depth + 1));
18465
18466 // It's possible that we've parsed a block argument through our
18467 // call to parse_arguments_list. If we found one, we should mark it
18468 // as invalid and destroy it, as we don't have a place for it on the
18469 // yield node.
18470 if (arguments.block != NULL) {
18471 pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
18472 pm_node_unreference(parser, arguments.block);
18473 arguments.block = NULL;
18474 }
18475
18476 pm_node_t *node = UP(pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc));
18477 if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
18478
18479 return node;
18480 }
18481 case PM_TOKEN_KEYWORD_CLASS: {
18482 size_t opening_newline_index = token_newline_index(parser);
18483 parser_lex(parser);
18484
18485 pm_token_t class_keyword = parser->previous;
18486 pm_do_loop_stack_push(parser, false);
18487
18488 pm_node_list_t current_block_exits = { 0 };
18489 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18490
18491 if (accept1(parser, PM_TOKEN_LESS_LESS)) {
18492 pm_token_t operator = parser->previous;
18493 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
18494
18495 pm_parser_scope_push(parser, true);
18496 if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18497 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type));
18498 }
18499
18500 pm_node_t *statements = NULL;
18501 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18502 pm_accepts_block_stack_push(parser, true);
18503 statements = UP(parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1)));
18504 pm_accepts_block_stack_pop(parser);
18505 }
18506
18507 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
18508 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18509 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1)));
18510 } else {
18511 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
18512 }
18513
18514 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
18515
18516 pm_constant_id_list_t locals;
18517 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18518
18519 pm_parser_scope_pop(parser);
18520 pm_do_loop_stack_pop(parser);
18521
18522 flush_block_exits(parser, previous_block_exits);
18523 return UP(pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous));
18524 }
18525
18526 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
18527 pm_token_t name = parser->previous;
18528 if (name.type != PM_TOKEN_CONSTANT) {
18529 pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
18530 }
18531
18532 pm_token_t inheritance_operator = { 0 };
18533 pm_node_t *superclass;
18534
18535 if (match1(parser, PM_TOKEN_LESS)) {
18536 inheritance_operator = parser->current;
18537 lex_state_set(parser, PM_LEX_STATE_BEG);
18538
18539 parser->command_start = true;
18540 parser_lex(parser);
18541
18542 superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
18543 } else {
18544 superclass = NULL;
18545 }
18546
18547 pm_parser_scope_push(parser, true);
18548
18549 if (inheritance_operator.start != NULL) {
18550 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
18551 } else {
18552 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18553 }
18554 pm_node_t *statements = NULL;
18555
18556 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18557 pm_accepts_block_stack_push(parser, true);
18558 statements = UP(parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1)));
18559 pm_accepts_block_stack_pop(parser);
18560 }
18561
18562 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
18563 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18564 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1)));
18565 } else {
18566 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
18567 }
18568
18569 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
18570
18571 if (context_def_p(parser)) {
18572 pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
18573 }
18574
18575 pm_constant_id_list_t locals;
18576 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18577
18578 pm_parser_scope_pop(parser);
18579 pm_do_loop_stack_pop(parser);
18580
18581 if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
18582 pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
18583 }
18584
18585 pop_block_exits(parser, previous_block_exits);
18586 return UP(pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, NTOK2PTR(inheritance_operator), superclass, statements, &parser->previous));
18587 }
18588 case PM_TOKEN_KEYWORD_DEF: {
18589 pm_node_list_t current_block_exits = { 0 };
18590 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18591
18592 pm_token_t def_keyword = parser->current;
18593 size_t opening_newline_index = token_newline_index(parser);
18594
18595 pm_node_t *receiver = NULL;
18596 pm_token_t operator = { 0 };
18597 pm_token_t name;
18598
18599 // This context is necessary for lexing `...` in a bare params
18600 // correctly. It must be pushed before lexing the first param, so it
18601 // is here.
18602 context_push(parser, PM_CONTEXT_DEF_PARAMS);
18603 parser_lex(parser);
18604
18605 // This will be false if the method name is not a valid identifier
18606 // but could be followed by an operator.
18607 bool valid_name = true;
18608
18609 switch (parser->current.type) {
18610 case PM_CASE_OPERATOR:
18611 pm_parser_scope_push(parser, true);
18612 lex_state_set(parser, PM_LEX_STATE_ENDFN);
18613 parser_lex(parser);
18614
18615 name = parser->previous;
18616 break;
18617 case PM_TOKEN_IDENTIFIER: {
18618 parser_lex(parser);
18619
18620 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
18621 receiver = parse_variable_call(parser);
18622
18623 pm_parser_scope_push(parser, true);
18624 lex_state_set(parser, PM_LEX_STATE_FNAME);
18625 parser_lex(parser);
18626
18627 operator = parser->previous;
18628 name = parse_method_definition_name(parser);
18629 } else {
18630 pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous));
18631 pm_parser_scope_push(parser, true);
18632
18633 name = parser->previous;
18634 }
18635
18636 break;
18637 }
18638 case PM_TOKEN_INSTANCE_VARIABLE:
18639 case PM_TOKEN_CLASS_VARIABLE:
18640 case PM_TOKEN_GLOBAL_VARIABLE:
18641 valid_name = false;
18643 case PM_TOKEN_CONSTANT:
18644 case PM_TOKEN_KEYWORD_NIL:
18645 case PM_TOKEN_KEYWORD_SELF:
18646 case PM_TOKEN_KEYWORD_TRUE:
18647 case PM_TOKEN_KEYWORD_FALSE:
18648 case PM_TOKEN_KEYWORD___FILE__:
18649 case PM_TOKEN_KEYWORD___LINE__:
18650 case PM_TOKEN_KEYWORD___ENCODING__: {
18651 pm_parser_scope_push(parser, true);
18652 parser_lex(parser);
18653
18654 pm_token_t identifier = parser->previous;
18655
18656 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
18657 lex_state_set(parser, PM_LEX_STATE_FNAME);
18658 parser_lex(parser);
18659 operator = parser->previous;
18660
18661 switch (identifier.type) {
18662 case PM_TOKEN_CONSTANT:
18663 receiver = UP(pm_constant_read_node_create(parser, &identifier));
18664 break;
18665 case PM_TOKEN_INSTANCE_VARIABLE:
18666 receiver = UP(pm_instance_variable_read_node_create(parser, &identifier));
18667 break;
18668 case PM_TOKEN_CLASS_VARIABLE:
18669 receiver = UP(pm_class_variable_read_node_create(parser, &identifier));
18670 break;
18671 case PM_TOKEN_GLOBAL_VARIABLE:
18672 receiver = UP(pm_global_variable_read_node_create(parser, &identifier));
18673 break;
18674 case PM_TOKEN_KEYWORD_NIL:
18675 receiver = UP(pm_nil_node_create(parser, &identifier));
18676 break;
18677 case PM_TOKEN_KEYWORD_SELF:
18678 receiver = UP(pm_self_node_create(parser, &identifier));
18679 break;
18680 case PM_TOKEN_KEYWORD_TRUE:
18681 receiver = UP(pm_true_node_create(parser, &identifier));
18682 break;
18683 case PM_TOKEN_KEYWORD_FALSE:
18684 receiver = UP(pm_false_node_create(parser, &identifier));
18685 break;
18686 case PM_TOKEN_KEYWORD___FILE__:
18687 receiver = UP(pm_source_file_node_create(parser, &identifier));
18688 break;
18689 case PM_TOKEN_KEYWORD___LINE__:
18690 receiver = UP(pm_source_line_node_create(parser, &identifier));
18691 break;
18692 case PM_TOKEN_KEYWORD___ENCODING__:
18693 receiver = UP(pm_source_encoding_node_create(parser, &identifier));
18694 break;
18695 default:
18696 break;
18697 }
18698
18699 name = parse_method_definition_name(parser);
18700 } else {
18701 if (!valid_name) {
18702 PM_PARSER_ERR_TOKEN_FORMAT(parser, &identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
18703 }
18704
18705 name = identifier;
18706 }
18707 break;
18708 }
18709 case PM_TOKEN_PARENTHESIS_LEFT: {
18710 // The current context is `PM_CONTEXT_DEF_PARAMS`, however
18711 // the inner expression of this parenthesis should not be
18712 // processed under this context. Thus, the context is popped
18713 // here.
18714 context_pop(parser);
18715 parser_lex(parser);
18716
18717 pm_token_t lparen = parser->previous;
18718 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
18719
18720 accept1(parser, PM_TOKEN_NEWLINE);
18721 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18722 pm_token_t rparen = parser->previous;
18723
18724 lex_state_set(parser, PM_LEX_STATE_FNAME);
18725 expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
18726
18727 operator = parser->previous;
18728 receiver = UP(pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0));
18729
18730 // To push `PM_CONTEXT_DEF_PARAMS` again is for the same
18731 // reason as described the above.
18732 pm_parser_scope_push(parser, true);
18733 context_push(parser, PM_CONTEXT_DEF_PARAMS);
18734 name = parse_method_definition_name(parser);
18735 break;
18736 }
18737 default:
18738 pm_parser_scope_push(parser, true);
18739 name = parse_method_definition_name(parser);
18740 break;
18741 }
18742
18743 pm_token_t lparen = { 0 };
18744 pm_token_t rparen = { 0 };
18745 pm_parameters_node_t *params;
18746
18747 bool accept_endless_def = true;
18748 switch (parser->current.type) {
18749 case PM_TOKEN_PARENTHESIS_LEFT: {
18750 parser_lex(parser);
18751 lparen = parser->previous;
18752
18753 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18754 params = NULL;
18755 } else {
18756 // https://bugs.ruby-lang.org/issues/19107
18757 bool allow_trailing_comma = parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1;
18758 params = parse_parameters(
18759 parser,
18760 PM_BINDING_POWER_DEFINED,
18761 true,
18762 allow_trailing_comma,
18763 true,
18764 true,
18765 false,
18766 PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
18767 (uint16_t) (depth + 1)
18768 );
18769 }
18770
18771 lex_state_set(parser, PM_LEX_STATE_BEG);
18772 parser->command_start = true;
18773
18774 context_pop(parser);
18775 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18776 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
18777 parser->previous.start = parser->previous.end;
18778 parser->previous.type = 0;
18779 }
18780
18781 rparen = parser->previous;
18782 break;
18783 }
18784 case PM_CASE_PARAMETER: {
18785 // If we're about to lex a label, we need to add the label
18786 // state to make sure the next newline is ignored.
18787 if (parser->current.type == PM_TOKEN_LABEL) {
18788 lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
18789 }
18790
18791 params = parse_parameters(
18792 parser,
18793 PM_BINDING_POWER_DEFINED,
18794 false,
18795 false,
18796 true,
18797 true,
18798 false,
18799 PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
18800 (uint16_t) (depth + 1)
18801 );
18802
18803 // Reject `def * = 1` and similar. We have to specifically check
18804 // for them because they create ambiguity with optional arguments.
18805 accept_endless_def = false;
18806
18807 context_pop(parser);
18808 break;
18809 }
18810 default: {
18811 params = NULL;
18812 context_pop(parser);
18813 break;
18814 }
18815 }
18816
18817 pm_node_t *statements = NULL;
18818 pm_token_t equal = { 0 };
18819 pm_token_t end_keyword = { 0 };
18820
18821 if (accept1(parser, PM_TOKEN_EQUAL)) {
18822 if (token_is_setter_name(&name)) {
18823 pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
18824 }
18825 if (!accept_endless_def) {
18826 pm_parser_err_previous(parser, PM_ERR_DEF_ENDLESS_PARAMETERS);
18827 }
18828 if (
18831 ) {
18832 PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &def_keyword), PM_TOKENS_LENGTH(&def_keyword, &parser->previous), PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE, "endless method definition");
18833 }
18834 equal = parser->previous;
18835
18836 context_push(parser, PM_CONTEXT_DEF);
18837 pm_do_loop_stack_push(parser, false);
18838 statements = UP(pm_statements_node_create(parser));
18839
18840 uint8_t allow_flags;
18841 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
18842 allow_flags = flags & PM_PARSE_ACCEPTS_COMMAND_CALL;
18843 } else {
18844 // Allow `def foo = puts "Hello"` but not `private def foo = puts "Hello"`
18845 allow_flags = (binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION) ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0;
18846 }
18847
18848 // Inside a def body, we push true onto the
18849 // accepts_block_stack so that `do` is lexed as
18850 // PM_TOKEN_KEYWORD_DO (which can only start a block for
18851 // primary-level constructs, not commands). During command
18852 // argument parsing, the stack is pushed to false, causing
18853 // `do` to be lexed as PM_TOKEN_KEYWORD_DO_BLOCK, which
18854 // is not consumed inside the endless def body and instead
18855 // left for the outer context.
18856 pm_accepts_block_stack_push(parser, true);
18857 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_flags | PM_PARSE_IN_ENDLESS_DEF, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
18858 pm_accepts_block_stack_pop(parser);
18859
18860 // If an unconsumed PM_TOKEN_KEYWORD_DO follows the body,
18861 // it is an error (e.g., `def f = 1 do end`).
18862 // PM_TOKEN_KEYWORD_DO_BLOCK is intentionally not caught
18863 // here — it should bubble up to the outer context (e.g.,
18864 // `private def f = puts "Hello" do end` where the block
18865 // attaches to `private`).
18866 if (accept1(parser, PM_TOKEN_KEYWORD_DO)) {
18867 pm_block_node_t *block = parse_block(parser, (uint16_t) (depth + 1));
18868 pm_parser_err_node(parser, UP(block), PM_ERR_DEF_ENDLESS_DO_BLOCK);
18869 }
18870
18871 if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
18872 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
18873
18874 pm_token_t rescue_keyword = parser->previous;
18875
18876 // In the Ruby grammar, the rescue value of an endless
18877 // method command excludes and/or and in/=>.
18878 pm_node_t *value = parse_expression(parser, PM_BINDING_POWER_MATCH + 1, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
18879 context_pop(parser);
18880
18881 statement = UP(pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value));
18882 }
18883
18884 // A nested endless def whose body is a command call (e.g.,
18885 // `def f = def g = foo bar`) is a command assignment and
18886 // cannot appear as a def body.
18887 if (PM_NODE_TYPE_P(statement, PM_DEF_NODE) && pm_command_call_value_p(statement)) {
18888 PM_PARSER_ERR_NODE_FORMAT(parser, statement, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
18889 }
18890
18891 pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
18892 pm_do_loop_stack_pop(parser);
18893 context_pop(parser);
18894 } else {
18895 if (lparen.start == NULL) {
18896 lex_state_set(parser, PM_LEX_STATE_BEG);
18897 parser->command_start = true;
18898 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
18899 } else {
18900 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18901 }
18902
18903 pm_accepts_block_stack_push(parser, true);
18904 pm_do_loop_stack_push(parser, false);
18905
18906 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18907 pm_accepts_block_stack_push(parser, true);
18908 statements = UP(parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1)));
18909 pm_accepts_block_stack_pop(parser);
18910 }
18911
18912 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
18913 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18914 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1)));
18915 } else {
18916 parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
18917 }
18918
18919 pm_accepts_block_stack_pop(parser);
18920 pm_do_loop_stack_pop(parser);
18921
18922 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM, &def_keyword);
18923 end_keyword = parser->previous;
18924 }
18925
18926 pm_constant_id_list_t locals;
18927 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18928 pm_parser_scope_pop(parser);
18929
18935 pm_constant_id_t name_id = pm_parser_constant_id_raw(parser, name.start, parse_operator_symbol_name(&name));
18936
18937 flush_block_exits(parser, previous_block_exits);
18938
18939 return UP(pm_def_node_create(
18940 parser,
18941 name_id,
18942 &name,
18943 receiver,
18944 params,
18945 statements,
18946 &locals,
18947 &def_keyword,
18948 NTOK2PTR(operator),
18949 NTOK2PTR(lparen),
18950 NTOK2PTR(rparen),
18951 NTOK2PTR(equal),
18952 NTOK2PTR(end_keyword)
18953 ));
18954 }
18955 case PM_TOKEN_KEYWORD_DEFINED: {
18956 parser_lex(parser);
18957
18958 pm_token_t keyword = parser->previous;
18959 pm_token_t lparen = { 0 };
18960 pm_token_t rparen = { 0 };
18961 pm_node_t *expression;
18962
18963 context_push(parser, PM_CONTEXT_DEFINED);
18964 bool newline = accept1(parser, PM_TOKEN_NEWLINE);
18965
18966 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
18967 lparen = parser->previous;
18968
18969 if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18970 expression = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
18971 lparen = (pm_token_t) { 0 };
18972 } else {
18973 expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
18974
18975 if (!parser->recovering) {
18976 accept1(parser, PM_TOKEN_NEWLINE);
18977 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18978 rparen = parser->previous;
18979 }
18980 }
18981 } else {
18982 expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
18983 }
18984
18985 context_pop(parser);
18986 return UP(pm_defined_node_create(
18987 parser,
18988 NTOK2PTR(lparen),
18989 expression,
18990 NTOK2PTR(rparen),
18991 &keyword
18992 ));
18993 }
18994 case PM_TOKEN_KEYWORD_END_UPCASE: {
18995 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18996 pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
18997 }
18998
18999 parser_lex(parser);
19000 pm_token_t keyword = parser->previous;
19001
19002 if (context_def_p(parser)) {
19003 pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
19004 }
19005
19006 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
19007 pm_token_t opening = parser->previous;
19008 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
19009
19010 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM, &opening);
19011 return UP(pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
19012 }
19013 case PM_TOKEN_KEYWORD_FALSE:
19014 parser_lex(parser);
19015 return UP(pm_false_node_create(parser, &parser->previous));
19016 case PM_TOKEN_KEYWORD_FOR: {
19017 size_t opening_newline_index = token_newline_index(parser);
19018 parser_lex(parser);
19019
19020 pm_token_t for_keyword = parser->previous;
19021 pm_node_t *index;
19022
19023 context_push(parser, PM_CONTEXT_FOR_INDEX);
19024
19025 // First, parse out the first index expression.
19026 if (accept1(parser, PM_TOKEN_USTAR)) {
19027 pm_token_t star_operator = parser->previous;
19028 pm_node_t *name = NULL;
19029
19030 if (token_begins_expression_p(parser->current.type)) {
19031 name = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19032 }
19033
19034 index = UP(pm_splat_node_create(parser, &star_operator, name));
19035 } else if (token_begins_expression_p(parser->current.type)) {
19036 index = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
19037 } else {
19038 pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
19039 index = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &for_keyword), PM_TOKEN_LENGTH(&for_keyword)));
19040 }
19041
19042 // Now, if there are multiple index expressions, parse them out.
19043 if (match1(parser, PM_TOKEN_COMMA)) {
19044 index = parse_targets(parser, index, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19045 } else {
19046 index = parse_target(parser, index, false, false);
19047 }
19048
19049 context_pop(parser);
19050 pm_do_loop_stack_push(parser, true);
19051
19052 expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
19053 pm_token_t in_keyword = parser->previous;
19054
19055 pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
19056 pm_do_loop_stack_pop(parser);
19057
19058 pm_token_t do_keyword = { 0 };
19059 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19060 do_keyword = parser->previous;
19061 } else {
19062 if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
19063 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type));
19064 }
19065 }
19066
19067 pm_statements_node_t *statements = NULL;
19068 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19069 statements = parse_statements(parser, PM_CONTEXT_FOR, (uint16_t) (depth + 1));
19070 }
19071
19072 parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
19073 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM, &for_keyword);
19074
19075 return UP(pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, NTOK2PTR(do_keyword), &parser->previous));
19076 }
19077 case PM_TOKEN_KEYWORD_IF:
19078 if (parser_end_of_line_p(parser)) {
19079 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_KEYWORD_EOL);
19080 }
19081
19082 size_t opening_newline_index = token_newline_index(parser);
19083 bool if_after_else = parser->previous.type == PM_TOKEN_KEYWORD_ELSE;
19084 parser_lex(parser);
19085
19086 return parse_conditional(parser, PM_CONTEXT_IF, opening_newline_index, if_after_else, (uint16_t) (depth + 1));
19087 case PM_TOKEN_KEYWORD_UNDEF: {
19088 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19089 pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
19090 }
19091
19092 parser_lex(parser);
19093 pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
19094 pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19095
19096 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19097 } else {
19098 pm_undef_node_append(parser->arena, undef, name);
19099
19100 while (match1(parser, PM_TOKEN_COMMA)) {
19101 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
19102 parser_lex(parser);
19103 name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19104
19105 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19106 break;
19107 }
19108
19109 pm_undef_node_append(parser->arena, undef, name);
19110 }
19111 }
19112
19113 return UP(undef);
19114 }
19115 case PM_TOKEN_KEYWORD_NOT: {
19116 parser_lex(parser);
19117
19118 pm_token_t message = parser->previous;
19119 pm_arguments_t arguments = { 0 };
19120 pm_node_t *receiver = NULL;
19121
19122 // The `not` keyword without parentheses is only valid in contexts
19123 // where it would be parsed as an expression (i.e., at or below
19124 // the `not` binding power level). In other contexts (e.g., method
19125 // arguments, array elements, assignment right-hand sides),
19126 // parentheses are required: `not(x)`. An exception is made for
19127 // endless def bodies, where `not` is valid as both `arg` and
19128 // `command` (e.g., `def f = not 1`, `def f = not foo bar`).
19129 if (binding_power > PM_BINDING_POWER_NOT && !(flags & PM_PARSE_IN_ENDLESS_DEF) && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19130 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) {
19131 pm_parser_err(parser, PM_TOKEN_END(parser, &parser->previous), 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN);
19132 } else {
19133 accept1(parser, PM_TOKEN_NEWLINE);
19134 pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER);
19135 }
19136
19137 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
19138 }
19139
19140 accept1(parser, PM_TOKEN_NEWLINE);
19141
19142 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19143 pm_token_t lparen = parser->previous;
19144
19145 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19146 receiver = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
19147 } else {
19148 arguments.opening_loc = TOK2LOC(parser, &lparen);
19149 receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19150
19151 if (!parser->recovering) {
19152 accept1(parser, PM_TOKEN_NEWLINE);
19153 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19154 arguments.closing_loc = TOK2LOC(parser, &parser->previous);
19155 }
19156 }
19157 } else {
19158 receiver = parse_expression(parser, PM_BINDING_POWER_NOT, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19159 }
19160
19161 return UP(pm_call_node_not_create(parser, receiver, &message, &arguments));
19162 }
19163 case PM_TOKEN_KEYWORD_UNLESS: {
19164 size_t opening_newline_index = token_newline_index(parser);
19165 parser_lex(parser);
19166
19167 return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
19168 }
19169 case PM_TOKEN_KEYWORD_MODULE: {
19170 pm_node_list_t current_block_exits = { 0 };
19171 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19172
19173 size_t opening_newline_index = token_newline_index(parser);
19174 parser_lex(parser);
19175 pm_token_t module_keyword = parser->previous;
19176
19177 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
19178 pm_token_t name;
19179
19180 // If we can recover from a syntax error that occurred while parsing
19181 // the name of the module, then we'll handle that here.
19182 if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) {
19183 pop_block_exits(parser, previous_block_exits);
19184
19185 pm_token_t missing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
19186 return UP(pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing));
19187 }
19188
19189 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
19190 pm_token_t double_colon = parser->previous;
19191
19192 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
19193 constant_path = UP(pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous));
19194 }
19195
19196 // Here we retrieve the name of the module. If it wasn't a constant,
19197 // then it's possible that `module foo` was passed, which is a
19198 // syntax error. We handle that here as well.
19199 name = parser->previous;
19200 if (name.type != PM_TOKEN_CONSTANT) {
19201 pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
19202 }
19203
19204 pm_parser_scope_push(parser, true);
19205 accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
19206 pm_node_t *statements = NULL;
19207
19208 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19209 pm_accepts_block_stack_push(parser, true);
19210 statements = UP(parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1)));
19211 pm_accepts_block_stack_pop(parser);
19212 }
19213
19214 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
19215 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19216 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1)));
19217 } else {
19218 parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
19219 }
19220
19221 pm_constant_id_list_t locals;
19222 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19223
19224 pm_parser_scope_pop(parser);
19225 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM, &module_keyword);
19226
19227 if (context_def_p(parser)) {
19228 pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
19229 }
19230
19231 pop_block_exits(parser, previous_block_exits);
19232
19233 return UP(pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous));
19234 }
19235 case PM_TOKEN_KEYWORD_NIL:
19236 parser_lex(parser);
19237 return UP(pm_nil_node_create(parser, &parser->previous));
19238 case PM_TOKEN_KEYWORD_REDO: {
19239 parser_lex(parser);
19240
19241 pm_node_t *node = UP(pm_redo_node_create(parser, &parser->previous));
19242 if (!parser->partial_script) parse_block_exit(parser, node);
19243
19244 return node;
19245 }
19246 case PM_TOKEN_KEYWORD_RETRY: {
19247 parser_lex(parser);
19248
19249 pm_node_t *node = UP(pm_retry_node_create(parser, &parser->previous));
19250 parse_retry(parser, node);
19251
19252 return node;
19253 }
19254 case PM_TOKEN_KEYWORD_SELF:
19255 parser_lex(parser);
19256 return UP(pm_self_node_create(parser, &parser->previous));
19257 case PM_TOKEN_KEYWORD_TRUE:
19258 parser_lex(parser);
19259 return UP(pm_true_node_create(parser, &parser->previous));
19260 case PM_TOKEN_KEYWORD_UNTIL: {
19261 size_t opening_newline_index = token_newline_index(parser);
19262
19263 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19264 pm_do_loop_stack_push(parser, true);
19265
19266 parser_lex(parser);
19267 pm_token_t keyword = parser->previous;
19268 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
19269
19270 pm_do_loop_stack_pop(parser);
19271 context_pop(parser);
19272
19273 pm_token_t do_keyword = { 0 };
19274 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19275 do_keyword = parser->previous;
19276 } else {
19277 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19278 }
19279
19280 pm_statements_node_t *statements = NULL;
19281 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19282 pm_accepts_block_stack_push(parser, true);
19283 statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
19284 pm_accepts_block_stack_pop(parser);
19285 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19286 }
19287
19288 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19289 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM, &keyword);
19290
19291 return UP(pm_until_node_create(parser, &keyword, NTOK2PTR(do_keyword), &parser->previous, predicate, statements, 0));
19292 }
19293 case PM_TOKEN_KEYWORD_WHILE: {
19294 size_t opening_newline_index = token_newline_index(parser);
19295
19296 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19297 pm_do_loop_stack_push(parser, true);
19298
19299 parser_lex(parser);
19300 pm_token_t keyword = parser->previous;
19301 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
19302
19303 pm_do_loop_stack_pop(parser);
19304 context_pop(parser);
19305
19306 pm_token_t do_keyword = { 0 };
19307 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19308 do_keyword = parser->previous;
19309 } else {
19310 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19311 }
19312
19313 pm_statements_node_t *statements = NULL;
19314 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19315 pm_accepts_block_stack_push(parser, true);
19316 statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
19317 pm_accepts_block_stack_pop(parser);
19318 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19319 }
19320
19321 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19322 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM, &keyword);
19323
19324 return UP(pm_while_node_create(parser, &keyword, NTOK2PTR(do_keyword), &parser->previous, predicate, statements, 0));
19325 }
19326 case PM_TOKEN_PERCENT_LOWER_I: {
19327 parser_lex(parser);
19328 pm_token_t opening = parser->previous;
19329 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19330 pm_node_t *current = NULL;
19331
19332 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19333 accept1(parser, PM_TOKEN_WORDS_SEP);
19334 if (match1(parser, PM_TOKEN_STRING_END)) break;
19335
19336 // Interpolation is not possible but nested heredocs can still lead to
19337 // consecutive (disjoint) string tokens when the final newline is escaped.
19338 while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19339 // Record the string node, moving to interpolation if needed.
19340 if (current == NULL) {
19341 current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL));
19342 parser_lex(parser);
19343 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19344 pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
19345 parser_lex(parser);
19346 pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, string);
19347 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19348 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
19349 pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = parser->start + cast->value_loc.start, .end = parser->start + cast->value_loc.start + cast->value_loc.length };
19350 pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped));
19351 pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL));
19352 parser_lex(parser);
19353
19354 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
19355 pm_interpolated_symbol_node_append(parser->arena, interpolated, first_string);
19356 pm_interpolated_symbol_node_append(parser->arena, interpolated, second_string);
19357
19358 // current is arena-allocated so no explicit free is needed.
19359 current = UP(interpolated);
19360 } else {
19361 assert(false && "unreachable");
19362 }
19363 }
19364
19365 if (current) {
19366 pm_array_node_elements_append(parser->arena, array, current);
19367 current = NULL;
19368 } else {
19369 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
19370 }
19371 }
19372
19373 pm_token_t closing = parser->current;
19374 if (match1(parser, PM_TOKEN_EOF)) {
19375 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
19376 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
19377 } else {
19378 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
19379 }
19380 pm_array_node_close_set(parser, array, &closing);
19381
19382 return UP(array);
19383 }
19384 case PM_TOKEN_PERCENT_UPPER_I: {
19385 parser_lex(parser);
19386 pm_token_t opening = parser->previous;
19387 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19388
19389 // This is the current node that we are parsing that will be added to the
19390 // list of elements.
19391 pm_node_t *current = NULL;
19392
19393 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19394 switch (parser->current.type) {
19395 case PM_TOKEN_WORDS_SEP: {
19396 if (current == NULL) {
19397 // If we hit a separator before we have any content, then we don't
19398 // need to do anything.
19399 } else {
19400 // If we hit a separator after we've hit content, then we need to
19401 // append that content to the list and reset the current node.
19402 pm_array_node_elements_append(parser->arena, array, current);
19403 current = NULL;
19404 }
19405
19406 parser_lex(parser);
19407 break;
19408 }
19409 case PM_TOKEN_STRING_CONTENT: {
19410 if (current == NULL) {
19411 // If we hit content and the current node is NULL, then this is
19412 // the first string content we've seen. In that case we're going
19413 // to create a new string node and set that to the current.
19414 current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL));
19415 parser_lex(parser);
19416 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19417 // If we hit string content and the current node is an
19418 // interpolated string, then we need to append the string content
19419 // to the list of child nodes.
19420 pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
19421 parser_lex(parser);
19422
19423 pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, string);
19424 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19425 // If we hit string content and the current node is a symbol node,
19426 // then we need to convert the current node into an interpolated
19427 // string and add the string content to the list of child nodes.
19428 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
19429 pm_token_t content = {
19430 .type = PM_TOKEN_STRING_CONTENT,
19431 .start = parser->start + cast->value_loc.start,
19432 .end = parser->start + cast->value_loc.start + cast->value_loc.length
19433 };
19434
19435 pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped));
19436 pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL));
19437 parser_lex(parser);
19438
19439 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
19440 pm_interpolated_symbol_node_append(parser->arena, interpolated, first_string);
19441 pm_interpolated_symbol_node_append(parser->arena, interpolated, second_string);
19442
19443 // current is arena-allocated so no explicit free is needed.
19444 current = UP(interpolated);
19445 } else {
19446 assert(false && "unreachable");
19447 }
19448
19449 break;
19450 }
19451 case PM_TOKEN_EMBVAR: {
19452 bool start_location_set = false;
19453 if (current == NULL) {
19454 // If we hit an embedded variable and the current node is NULL,
19455 // then this is the start of a new string. We'll set the current
19456 // node to a new interpolated string.
19457 current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL));
19458 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19459 // If we hit an embedded variable and the current node is a string
19460 // node, then we'll convert the current into an interpolated
19461 // string and add the string node to the list of parts.
19462 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
19463
19464 current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
19465 pm_interpolated_symbol_node_append(parser->arena, interpolated, current);
19466 PM_NODE_START_SET_NODE(interpolated, current);
19467 start_location_set = true;
19468 current = UP(interpolated);
19469 } else {
19470 // If we hit an embedded variable and the current node is an
19471 // interpolated string, then we'll just add the embedded variable.
19472 }
19473
19474 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19475 pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, part);
19476 if (!start_location_set) {
19477 PM_NODE_START_SET_NODE(current, part);
19478 }
19479 break;
19480 }
19481 case PM_TOKEN_EMBEXPR_BEGIN: {
19482 bool start_location_set = false;
19483 if (current == NULL) {
19484 // If we hit an embedded expression and the current node is NULL,
19485 // then this is the start of a new string. We'll set the current
19486 // node to a new interpolated string.
19487 current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL));
19488 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19489 // If we hit an embedded expression and the current node is a
19490 // string node, then we'll convert the current into an
19491 // interpolated string and add the string node to the list of
19492 // parts.
19493 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
19494
19495 current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
19496 pm_interpolated_symbol_node_append(parser->arena, interpolated, current);
19497 PM_NODE_START_SET_NODE(interpolated, current);
19498 start_location_set = true;
19499 current = UP(interpolated);
19500 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19501 // If we hit an embedded expression and the current node is an
19502 // interpolated string, then we'll just continue on.
19503 } else {
19504 assert(false && "unreachable");
19505 }
19506
19507 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19508 pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, part);
19509 if (!start_location_set) {
19510 PM_NODE_START_SET_NODE(current, part);
19511 }
19512 break;
19513 }
19514 default:
19515 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
19516 parser_lex(parser);
19517 break;
19518 }
19519 }
19520
19521 // If we have a current node, then we need to append it to the list.
19522 if (current) {
19523 pm_array_node_elements_append(parser->arena, array, current);
19524 }
19525
19526 pm_token_t closing = parser->current;
19527 if (match1(parser, PM_TOKEN_EOF)) {
19528 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
19529 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
19530 } else {
19531 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
19532 }
19533 pm_array_node_close_set(parser, array, &closing);
19534
19535 return UP(array);
19536 }
19537 case PM_TOKEN_PERCENT_LOWER_W: {
19538 parser_lex(parser);
19539 pm_token_t opening = parser->previous;
19540 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19541 pm_node_t *current = NULL;
19542
19543 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19544 accept1(parser, PM_TOKEN_WORDS_SEP);
19545 if (match1(parser, PM_TOKEN_STRING_END)) break;
19546
19547 // Interpolation is not possible but nested heredocs can still lead to
19548 // consecutive (disjoint) string tokens when the final newline is escaped.
19549 while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19550 pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
19551
19552 // Record the string node, moving to interpolation if needed.
19553 if (current == NULL) {
19554 current = string;
19555 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
19556 pm_interpolated_string_node_append(parser->arena, (pm_interpolated_string_node_t *) current, string);
19557 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19558 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
19559 pm_interpolated_string_node_append(parser->arena, interpolated, current);
19560 pm_interpolated_string_node_append(parser->arena, interpolated, string);
19561 current = UP(interpolated);
19562 } else {
19563 assert(false && "unreachable");
19564 }
19565 parser_lex(parser);
19566 }
19567
19568 if (current) {
19569 pm_array_node_elements_append(parser->arena, array, current);
19570 current = NULL;
19571 } else {
19572 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
19573 }
19574 }
19575
19576 pm_token_t closing = parser->current;
19577 if (match1(parser, PM_TOKEN_EOF)) {
19578 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
19579 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
19580 } else {
19581 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
19582 }
19583
19584 pm_array_node_close_set(parser, array, &closing);
19585 return UP(array);
19586 }
19587 case PM_TOKEN_PERCENT_UPPER_W: {
19588 parser_lex(parser);
19589 pm_token_t opening = parser->previous;
19590 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19591
19592 // This is the current node that we are parsing that will be added
19593 // to the list of elements.
19594 pm_node_t *current = NULL;
19595
19596 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19597 switch (parser->current.type) {
19598 case PM_TOKEN_WORDS_SEP: {
19599 // Reset the explicit encoding if we hit a separator
19600 // since each element can have its own encoding.
19601 parser->explicit_encoding = NULL;
19602
19603 if (current == NULL) {
19604 // If we hit a separator before we have any content,
19605 // then we don't need to do anything.
19606 } else {
19607 // If we hit a separator after we've hit content,
19608 // then we need to append that content to the list
19609 // and reset the current node.
19610 pm_array_node_elements_append(parser->arena, array, current);
19611 current = NULL;
19612 }
19613
19614 parser_lex(parser);
19615 break;
19616 }
19617 case PM_TOKEN_STRING_CONTENT: {
19618 pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
19619 pm_node_flag_set(string, parse_unescaped_encoding(parser));
19620 parser_lex(parser);
19621
19622 if (current == NULL) {
19623 // If we hit content and the current node is NULL,
19624 // then this is the first string content we've seen.
19625 // In that case we're going to create a new string
19626 // node and set that to the current.
19627 current = string;
19628 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
19629 // If we hit string content and the current node is
19630 // an interpolated string, then we need to append
19631 // the string content to the list of child nodes.
19632 pm_interpolated_string_node_append(parser->arena, (pm_interpolated_string_node_t *) current, string);
19633 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19634 // If we hit string content and the current node is
19635 // a string node, then we need to convert the
19636 // current node into an interpolated string and add
19637 // the string content to the list of child nodes.
19638 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
19639 pm_interpolated_string_node_append(parser->arena, interpolated, current);
19640 pm_interpolated_string_node_append(parser->arena, interpolated, string);
19641 current = UP(interpolated);
19642 } else {
19643 assert(false && "unreachable");
19644 }
19645
19646 break;
19647 }
19648 case PM_TOKEN_EMBVAR: {
19649 if (current == NULL) {
19650 // If we hit an embedded variable and the current
19651 // node is NULL, then this is the start of a new
19652 // string. We'll set the current node to a new
19653 // interpolated string.
19654 current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL));
19655 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19656 // If we hit an embedded variable and the current
19657 // node is a string node, then we'll convert the
19658 // current into an interpolated string and add the
19659 // string node to the list of parts.
19660 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
19661 pm_interpolated_string_node_append(parser->arena, interpolated, current);
19662 current = UP(interpolated);
19663 } else {
19664 // If we hit an embedded variable and the current
19665 // node is an interpolated string, then we'll just
19666 // add the embedded variable.
19667 }
19668
19669 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19670 pm_interpolated_string_node_append(parser->arena, (pm_interpolated_string_node_t *) current, part);
19671 break;
19672 }
19673 case PM_TOKEN_EMBEXPR_BEGIN: {
19674 if (current == NULL) {
19675 // If we hit an embedded expression and the current
19676 // node is NULL, then this is the start of a new
19677 // string. We'll set the current node to a new
19678 // interpolated string.
19679 current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL));
19680 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19681 // If we hit an embedded expression and the current
19682 // node is a string node, then we'll convert the
19683 // current into an interpolated string and add the
19684 // string node to the list of parts.
19685 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
19686 pm_interpolated_string_node_append(parser->arena, interpolated, current);
19687 current = UP(interpolated);
19688 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
19689 // If we hit an embedded expression and the current
19690 // node is an interpolated string, then we'll just
19691 // continue on.
19692 } else {
19693 assert(false && "unreachable");
19694 }
19695
19696 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19697 pm_interpolated_string_node_append(parser->arena, (pm_interpolated_string_node_t *) current, part);
19698 break;
19699 }
19700 default:
19701 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
19702 parser_lex(parser);
19703 break;
19704 }
19705 }
19706
19707 // If we have a current node, then we need to append it to the list.
19708 if (current) {
19709 pm_array_node_elements_append(parser->arena, array, current);
19710 }
19711
19712 pm_token_t closing = parser->current;
19713 if (match1(parser, PM_TOKEN_EOF)) {
19714 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
19715 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
19716 } else {
19717 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
19718 }
19719
19720 pm_array_node_close_set(parser, array, &closing);
19721 return UP(array);
19722 }
19723 case PM_TOKEN_REGEXP_BEGIN: {
19724 pm_token_t opening = parser->current;
19725 parser_lex(parser);
19726
19727 if (match1(parser, PM_TOKEN_REGEXP_END)) {
19728 // If we get here, then we have an end immediately after a start. In
19729 // that case we'll create an empty content token and return an
19730 // uninterpolated regular expression.
19731 pm_token_t content = (pm_token_t) {
19732 .type = PM_TOKEN_STRING_CONTENT,
19733 .start = parser->previous.end,
19734 .end = parser->previous.end
19735 };
19736
19737 parser_lex(parser);
19738
19739 pm_regular_expression_node_t *node = pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
19740 pm_node_flag_set(UP(node), pm_regexp_parse(parser, node, NULL, NULL));
19741 return UP(node);
19742 }
19743
19745
19746 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19747 // In this case we've hit string content so we know the regular
19748 // expression at least has something in it. We'll need to check if the
19749 // following token is the end (in which case we can return a plain
19750 // regular expression) or if it's not then it has interpolation.
19751 pm_string_t unescaped = parser->current_string;
19752 pm_token_t content = parser->current;
19753 parser_lex(parser);
19754
19755 // If we hit an end, then we can create a regular expression
19756 // node without interpolation, which can be represented more
19757 // succinctly and more easily compiled.
19758 if (accept1(parser, PM_TOKEN_REGEXP_END)) {
19759 pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
19760
19761 // If we're not immediately followed by a =~, then we
19762 // parse and validate now. If it is followed by a =~,
19763 // then it will get parsed in the =~ handler where
19764 // named captures can also be extracted.
19765 if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
19766 pm_node_flag_set(UP(node), pm_regexp_parse(parser, node, NULL, NULL));
19767 }
19768
19769 return UP(node);
19770 }
19771
19772 // If we get here, then we have interpolation so we'll need to create
19773 // a regular expression node with interpolation.
19774 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
19775
19776 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
19777 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
19778 // This is extremely strange, but the first string part of a
19779 // regular expression will always be tagged as binary if we
19780 // are in a US-ASCII file, no matter its contents.
19781 pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
19782 }
19783
19784 pm_interpolated_regular_expression_node_append(parser->arena, interpolated, part);
19785 } else {
19786 // If the first part of the body of the regular expression is not a
19787 // string content, then we have interpolation and we need to create an
19788 // interpolated regular expression node.
19789 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
19790 }
19791
19792 // Now that we're here and we have interpolation, we'll parse all of the
19793 // parts into the list.
19794 pm_node_t *part;
19795 while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
19796 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
19797 pm_interpolated_regular_expression_node_append(parser->arena, interpolated, part);
19798 }
19799 }
19800
19801 pm_token_t closing = parser->current;
19802 if (match1(parser, PM_TOKEN_EOF)) {
19803 pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
19804 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
19805 } else {
19806 expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
19807 }
19808
19809 pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
19810 return UP(interpolated);
19811 }
19812 case PM_TOKEN_BACKTICK:
19813 case PM_TOKEN_PERCENT_LOWER_X: {
19814 parser_lex(parser);
19815 pm_token_t opening = parser->previous;
19816
19817 // When we get here, we don't know if this string is going to have
19818 // interpolation or not, even though it is allowed. Still, we want to be
19819 // able to return a string node without interpolation if we can since
19820 // it'll be faster.
19821 if (match1(parser, PM_TOKEN_STRING_END)) {
19822 // If we get here, then we have an end immediately after a start. In
19823 // that case we'll create an empty content token and return an
19824 // uninterpolated string.
19825 pm_token_t content = (pm_token_t) {
19826 .type = PM_TOKEN_STRING_CONTENT,
19827 .start = parser->previous.end,
19828 .end = parser->previous.end
19829 };
19830
19831 parser_lex(parser);
19832 return UP(pm_xstring_node_create(parser, &opening, &content, &parser->previous));
19833 }
19834
19836
19837 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19838 // In this case we've hit string content so we know the string
19839 // at least has something in it. We'll need to check if the
19840 // following token is the end (in which case we can return a
19841 // plain string) or if it's not then it has interpolation.
19842 pm_string_t unescaped = parser->current_string;
19843 pm_token_t content = parser->current;
19844 parser_lex(parser);
19845
19846 if (match1(parser, PM_TOKEN_STRING_END)) {
19847 pm_node_t *node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
19848 pm_node_flag_set(node, parse_unescaped_encoding(parser));
19849 parser_lex(parser);
19850 return node;
19851 }
19852
19853 // If we get here, then we have interpolation so we'll need to
19854 // create a string node with interpolation.
19855 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
19856
19857 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
19858 pm_node_flag_set(part, parse_unescaped_encoding(parser));
19859
19860 pm_interpolated_xstring_node_append(parser->arena, node, part);
19861 } else {
19862 // If the first part of the body of the string is not a string
19863 // content, then we have interpolation and we need to create an
19864 // interpolated string node.
19865 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
19866 }
19867
19868 pm_node_t *part;
19869 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19870 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
19871 pm_interpolated_xstring_node_append(parser->arena, node, part);
19872 }
19873 }
19874
19875 pm_token_t closing = parser->current;
19876 if (match1(parser, PM_TOKEN_EOF)) {
19877 pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
19878 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
19879 } else {
19880 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
19881 }
19882 pm_interpolated_xstring_node_closing_set(parser, node, &closing);
19883
19884 return UP(node);
19885 }
19886 case PM_TOKEN_USTAR: {
19887 parser_lex(parser);
19888
19889 // * operators at the beginning of expressions are only valid in the
19890 // context of a multiple assignment. We enforce that here. We'll
19891 // still lex past it though and create a missing node place.
19892 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19893 pm_parser_err_prefix(parser, diag_id);
19894 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
19895 }
19896
19897 pm_token_t operator = parser->previous;
19898 pm_node_t *name = NULL;
19899
19900 if (token_begins_expression_p(parser->current.type)) {
19901 name = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19902 }
19903
19904 pm_node_t *splat = UP(pm_splat_node_create(parser, &operator, name));
19905
19906 if (match1(parser, PM_TOKEN_COMMA)) {
19907 return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19908 } else {
19909 return parse_target_validate(parser, splat, true);
19910 }
19911 }
19912 case PM_TOKEN_BANG: {
19913 if (binding_power > PM_BINDING_POWER_UNARY) {
19914 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
19915 }
19916
19917 parser_lex(parser);
19918
19919 pm_token_t operator = parser->previous;
19920 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (binding_power < PM_BINDING_POWER_MATCH ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0), PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
19921 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
19922
19923 pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
19924 return UP(node);
19925 }
19926 case PM_TOKEN_TILDE: {
19927 if (binding_power > PM_BINDING_POWER_UNARY) {
19928 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
19929 }
19930 parser_lex(parser);
19931
19932 pm_token_t operator = parser->previous;
19933 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
19934 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
19935
19936 return UP(node);
19937 }
19938 case PM_TOKEN_UMINUS: {
19939 if (binding_power > PM_BINDING_POWER_UNARY) {
19940 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
19941 }
19942 parser_lex(parser);
19943
19944 pm_token_t operator = parser->previous;
19945 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
19946 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
19947
19948 return UP(node);
19949 }
19950 case PM_TOKEN_UMINUS_NUM: {
19951 parser_lex(parser);
19952
19953 pm_token_t operator = parser->previous;
19954 pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
19955
19956 if (accept1(parser, PM_TOKEN_STAR_STAR)) {
19957 pm_token_t exponent_operator = parser->previous;
19958 pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
19959 node = UP(pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0));
19960 node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
19961 } else {
19962 switch (PM_NODE_TYPE(node)) {
19963 case PM_INTEGER_NODE:
19964 case PM_FLOAT_NODE:
19965 case PM_RATIONAL_NODE:
19966 case PM_IMAGINARY_NODE:
19967 parse_negative_numeric(node);
19968 break;
19969 default:
19970 node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
19971 break;
19972 }
19973 }
19974
19975 return node;
19976 }
19977 case PM_TOKEN_MINUS_GREATER: {
19978 int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting;
19980
19981 size_t opening_newline_index = token_newline_index(parser);
19982 pm_accepts_block_stack_push(parser, true);
19983 parser_lex(parser);
19984
19985 pm_token_t operator = parser->previous;
19986 pm_parser_scope_push(parser, false);
19987
19988 pm_block_parameters_node_t *block_parameters;
19989
19990 switch (parser->current.type) {
19991 case PM_TOKEN_PARENTHESIS_LEFT: {
19992 pm_token_t opening = parser->current;
19993 parser_lex(parser);
19994
19995 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19996 block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
19997 } else {
19998 block_parameters = parse_block_parameters(parser, false, &opening, true, true, (uint16_t) (depth + 1));
19999 }
20000
20001 accept1(parser, PM_TOKEN_NEWLINE);
20002 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
20003
20004 pm_block_parameters_node_closing_set(parser, block_parameters, &parser->previous);
20005 break;
20006 }
20007 case PM_CASE_PARAMETER: {
20008 pm_accepts_block_stack_push(parser, false);
20009 block_parameters = parse_block_parameters(parser, false, NULL, true, false, (uint16_t) (depth + 1));
20010 pm_accepts_block_stack_pop(parser);
20011 break;
20012 }
20013 default: {
20014 block_parameters = NULL;
20015 break;
20016 }
20017 }
20018
20019 pm_token_t opening;
20020 pm_node_t *body = NULL;
20021 parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
20022
20023 if (accept1(parser, PM_TOKEN_LAMBDA_BEGIN)) {
20024 opening = parser->previous;
20025
20026 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
20027 body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1)));
20028 }
20029
20030 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20031 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE, &opening);
20032 } else {
20033 expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
20034 opening = parser->previous;
20035
20036 if (!match3(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20037 body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1)));
20038 }
20039
20040 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20041 assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
20042 body = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1)));
20043 } else {
20044 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20045 }
20046
20047 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END, &operator);
20048 }
20049
20050 pm_constant_id_list_t locals;
20051 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
20052 pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &operator, &parser->previous);
20053
20054 pm_parser_scope_pop(parser);
20055 pm_accepts_block_stack_pop(parser);
20056
20057 return UP(pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body));
20058 }
20059 case PM_TOKEN_UPLUS: {
20060 if (binding_power > PM_BINDING_POWER_UNARY) {
20061 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20062 }
20063 parser_lex(parser);
20064
20065 pm_token_t operator = parser->previous;
20066 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20067 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
20068
20069 return UP(node);
20070 }
20071 case PM_TOKEN_STRING_BEGIN:
20072 return parse_strings(parser, NULL, flags & PM_PARSE_ACCEPTS_LABEL, (uint16_t) (depth + 1));
20073 case PM_TOKEN_SYMBOL_BEGIN: {
20074 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
20075 parser_lex(parser);
20076
20077 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END, (uint16_t) (depth + 1));
20078 }
20079 default: {
20080 pm_context_t recoverable = context_recoverable(parser, &parser->current);
20081
20082 if (recoverable != PM_CONTEXT_NONE) {
20083 parser->recovering = true;
20084
20085 // If the given error is not the generic one, then we'll add it
20086 // here because it will provide more context in addition to the
20087 // recoverable error that we will also add.
20088 if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
20089 pm_parser_err_prefix(parser, diag_id);
20090 }
20091
20092 // If we get here, then we are assuming this token is closing a
20093 // parent context, so we'll indicate that to the user so that
20094 // they know how we behaved.
20095 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
20096 } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
20097 // We're going to make a special case here, because "cannot
20098 // parse expression" is pretty generic, and we know here that we
20099 // have an unexpected token.
20100 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
20101 } else {
20102 pm_parser_err_prefix(parser, diag_id);
20103 }
20104
20105 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
20106 }
20107 }
20108}
20109
20119static pm_node_t *
20120parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
20121 pm_node_t *value = parse_value_expression(parser, binding_power, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? (flags & PM_PARSE_ACCEPTS_COMMAND_CALL) : (previous_binding_power < PM_BINDING_POWER_MATCH ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), diag_id, (uint16_t) (depth + 1));
20122
20123 // Assignments whose value is a command call (e.g., a = b c) can only
20124 // be followed by modifiers (if/unless/while/until/rescue) and not by
20125 // operators with higher binding power. If we find one, emit an error
20126 // and skip the operator and its right-hand side.
20127 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER && (pm_command_call_value_p(value) || pm_block_call_p(value))) {
20128 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
20129 parser_lex(parser);
20130 parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20131 }
20132
20133 // Contradicting binding powers, the right-hand-side value of the assignment
20134 // allows the `rescue` modifier.
20135 if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20136 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20137
20138 pm_token_t rescue = parser->current;
20139 parser_lex(parser);
20140
20141 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20142 context_pop(parser);
20143
20144 return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
20145 }
20146
20147 return value;
20148}
20149
20154static void
20155parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
20156 switch (PM_NODE_TYPE(node)) {
20157 case PM_BEGIN_NODE: {
20158 const pm_begin_node_t *cast = (const pm_begin_node_t *) node;
20159 if (cast->statements != NULL) parse_assignment_value_local(parser, (const pm_node_t *) cast->statements);
20160 break;
20161 }
20162 case PM_LOCAL_VARIABLE_WRITE_NODE: {
20164 pm_locals_read(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
20165 break;
20166 }
20167 case PM_PARENTHESES_NODE: {
20168 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
20169 if (cast->body != NULL) parse_assignment_value_local(parser, cast->body);
20170 break;
20171 }
20172 case PM_STATEMENTS_NODE: {
20173 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
20174 const pm_node_t *statement;
20175
20176 PM_NODE_LIST_FOREACH(&cast->body, index, statement) {
20177 parse_assignment_value_local(parser, statement);
20178 }
20179 break;
20180 }
20181 default:
20182 break;
20183 }
20184}
20185
20198static pm_node_t *
20199parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
20200 bool permitted = true;
20201 if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
20202
20203 pm_node_t *value = parse_starred_expression(parser, binding_power, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? (flags & PM_PARSE_ACCEPTS_COMMAND_CALL) : (previous_binding_power < PM_BINDING_POWER_MODIFIER ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), diag_id, (uint16_t) (depth + 1));
20204 if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
20205
20206 parse_assignment_value_local(parser, value);
20207 bool single_value = true;
20208
20209 // Block calls (command call + do block, e.g., `foo bar do end`) cannot
20210 // be followed by a comma to form a multi-value RHS because each element
20211 // of a multi-value assignment must be an `arg`, not a `block_call`.
20212 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && !pm_block_call_p(value) && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
20213 single_value = false;
20214
20215 pm_array_node_t *array = pm_array_node_create(parser, NULL);
20216 pm_array_node_elements_append(parser->arena, array, value);
20217 value = UP(array);
20218
20219 while (accept1(parser, PM_TOKEN_COMMA)) {
20220 pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
20221
20222 pm_array_node_elements_append(parser->arena, array, element);
20223 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
20224
20225 parse_assignment_value_local(parser, element);
20226 }
20227 }
20228
20229 // Assignments whose value is a command call (e.g., a = b c) can only
20230 // be followed by modifiers (if/unless/while/until/rescue) and not by
20231 // operators with higher binding power. If we find one, emit an error
20232 // and skip the operator and its right-hand side.
20233 if (single_value && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER && (pm_command_call_value_p(value) || pm_block_call_p(value))) {
20234 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
20235 parser_lex(parser);
20236 parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20237 }
20238
20239 // Contradicting binding powers, the right-hand-side value of the assignment
20240 // allows the `rescue` modifier.
20241 if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20242 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20243
20244 pm_token_t rescue = parser->current;
20245 parser_lex(parser);
20246
20247 bool accepts_command_call_inner = false;
20248
20249 // RHS can accept command call iff the value is a call with arguments
20250 // but without parenthesis.
20251 if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
20252 pm_call_node_t *call_node = (pm_call_node_t *) value;
20253 if ((call_node->arguments != NULL) && (call_node->opening_loc.length == 0)) {
20254 accepts_command_call_inner = true;
20255 }
20256 }
20257
20258 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (accepts_command_call_inner ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0), PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20259 context_pop(parser);
20260
20261 return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
20262 }
20263
20264 return value;
20265}
20266
20274static void
20275parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
20276 if (call_node->arguments != NULL) {
20277 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
20278 pm_node_unreference(parser, UP(call_node->arguments));
20279 call_node->arguments = NULL;
20280 }
20281
20282 if (call_node->block != NULL) {
20283 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
20284 pm_node_unreference(parser, UP(call_node->block));
20285 call_node->block = NULL;
20286 }
20287}
20288
20289static inline const uint8_t *
20290pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20291 cursor++;
20292
20293 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20294 uint8_t value = escape_hexadecimal_digit(*cursor);
20295 cursor++;
20296
20297 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20298 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(*cursor));
20299 cursor++;
20300 }
20301
20302 pm_buffer_append_byte(unescaped, value);
20303 } else {
20304 pm_buffer_append_string(unescaped, "\\x", 2);
20305 }
20306
20307 return cursor;
20308}
20309
20310static inline const uint8_t *
20311pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20312 uint8_t value = (uint8_t) (*cursor - '0');
20313 cursor++;
20314
20315 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20316 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20317 cursor++;
20318
20319 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20320 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20321 cursor++;
20322 }
20323 }
20324
20325 pm_buffer_append_byte(unescaped, value);
20326 return cursor;
20327}
20328
20329static inline const uint8_t *
20330pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end, const pm_location_t *error_location) {
20331 const uint8_t *start = cursor - 1;
20332 cursor++;
20333
20334 if (cursor >= end) {
20335 pm_buffer_append_string(unescaped, "\\u", 2);
20336 return cursor;
20337 }
20338
20339 if (*cursor != '{') {
20340 size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
20341 uint32_t value = escape_unicode(parser, cursor, length, error_location, 0);
20342
20343 if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
20344 pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start));
20345 }
20346
20347 return cursor + length;
20348 }
20349
20350 cursor++;
20351 for (;;) {
20352 while (cursor < end && *cursor == ' ') cursor++;
20353
20354 if (cursor >= end) break;
20355 if (*cursor == '}') {
20356 cursor++;
20357 break;
20358 }
20359
20360 size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor);
20361 if (length == 0) {
20362 break;
20363 }
20364 uint32_t value = escape_unicode(parser, cursor, length, error_location, 0);
20365
20366 (void) pm_buffer_append_unicode_codepoint(unescaped, value);
20367 cursor += length;
20368 }
20369
20370 return cursor;
20371}
20372
20373static void
20374pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor, const pm_location_t *error_location) {
20375 const uint8_t *end = source + length;
20376 pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source));
20377
20378 for (;;) {
20379 if (++cursor >= end) {
20380 pm_buffer_append_byte(unescaped, '\\');
20381 return;
20382 }
20383
20384 switch (*cursor) {
20385 case 'x':
20386 cursor = pm_named_capture_escape_hex(unescaped, cursor, end);
20387 break;
20388 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
20389 cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
20390 break;
20391 case 'u':
20392 cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end, error_location);
20393 break;
20394 default:
20395 pm_buffer_append_byte(unescaped, '\\');
20396 break;
20397 }
20398
20399 const uint8_t *next_cursor = pm_memchr(cursor, '\\', (size_t) (end - cursor), parser->encoding_changed, parser->encoding);
20400 if (next_cursor == NULL) break;
20401
20402 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (next_cursor - cursor));
20403 cursor = next_cursor;
20404 }
20405
20406 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (end - cursor));
20407}
20408
20413static void
20414parse_regular_expression_named_capture(pm_parser_t *parser, const pm_string_t *capture, bool shared, pm_regexp_name_data_t *callback_data) {
20415 pm_call_node_t *call = callback_data->call;
20416 pm_constant_id_list_t *names = &callback_data->names;
20417
20418 const uint8_t *source = pm_string_source(capture);
20419 size_t length = pm_string_length(capture);
20420 pm_buffer_t unescaped = { 0 };
20421
20422 // First, we need to handle escapes within the name of the capture group.
20423 // This is because regular expressions have three different representations
20424 // in prism. The first is the plain source code. The second is the
20425 // representation that will be sent to the regular expression engine, which
20426 // is the value of the "unescaped" field. This is poorly named, because it
20427 // actually still contains escapes, just a subset of them that the regular
20428 // expression engine knows how to handle. The third representation is fully
20429 // unescaped, which is what we need.
20430 const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding);
20431 if (PRISM_UNLIKELY(cursor != NULL)) {
20432 pm_named_capture_escape(parser, &unescaped, source, length, cursor, shared ? NULL : &call->receiver->location);
20433 source = (const uint8_t *) pm_buffer_value(&unescaped);
20434 length = pm_buffer_length(&unescaped);
20435 }
20436
20437 const uint8_t *start;
20438 const uint8_t *end;
20439 pm_constant_id_t name;
20440
20441 // If the name of the capture group isn't a valid identifier, we do
20442 // not add it to the local table.
20443 if (!pm_slice_is_valid_local(parser, source, source + length)) {
20444 pm_buffer_free(&unescaped);
20445 return;
20446 }
20447
20448 if (shared) {
20449 // If the unescaped string is a slice of the source, then we can
20450 // copy the names directly. The pointers will line up.
20451 start = source;
20452 end = source + length;
20453 name = pm_parser_constant_id_raw(parser, start, end);
20454 } else {
20455 // Otherwise, the name is a slice of the malloc-ed owned string,
20456 // in which case we need to copy it out into a new string.
20457 start = parser->start + PM_NODE_START(call->receiver);
20458 end = parser->start + PM_NODE_END(call->receiver);
20459
20460 void *memory = xmalloc(length);
20461 if (memory == NULL) abort();
20462
20463 memcpy(memory, source, length);
20464 name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
20465 }
20466
20467 // Add this name to the list of constants if it is valid, not duplicated,
20468 // and not a keyword.
20469 if (name != 0 && !pm_constant_id_list_includes(names, name)) {
20470 pm_constant_id_list_append(parser->arena, names, name);
20471
20472 int depth;
20473 if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
20474 // If the local is not already a local but it is a keyword, then we
20475 // do not want to add a capture for this.
20476 if (pm_local_is_keyword((const char *) source, length)) {
20477 pm_buffer_free(&unescaped);
20478 return;
20479 }
20480
20481 // If the identifier is not already a local, then we will add it to
20482 // the local table.
20483 pm_parser_local_add(parser, name, start, end, 0);
20484 }
20485
20486 // Here we lazily create the MatchWriteNode since we know we're
20487 // about to add a target.
20488 if (callback_data->match == NULL) {
20489 callback_data->match = pm_match_write_node_create(parser, call);
20490 }
20491
20492 // Next, create the local variable target and add it to the list of
20493 // targets for the match.
20494 pm_node_t *target = UP(pm_local_variable_target_node_create(parser, &TOK2LOC(parser, &((pm_token_t) { .type = 0, .start = start, .end = end })), name, depth == -1 ? 0 : (uint32_t) depth));
20495 pm_node_list_append(parser->arena, &callback_data->match->targets, target);
20496 }
20497
20498 pm_buffer_free(&unescaped);
20499}
20500
20506static pm_node_t *
20507parse_interpolated_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
20508 pm_regexp_name_data_t callback_data = {
20509 .call = call,
20510 .match = NULL,
20511 .names = { 0 },
20512 };
20513
20514 pm_regexp_parse_named_captures(parser, pm_string_source(content), pm_string_length(content), false, extended_mode, parse_regular_expression_named_capture, &callback_data);
20515
20516 if (callback_data.match != NULL) {
20517 return UP(callback_data.match);
20518 } else {
20519 return UP(call);
20520 }
20521}
20522
20523static inline pm_node_t *
20524parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, uint8_t flags, uint16_t depth) {
20525 pm_token_t token = parser->current;
20526
20527 switch (token.type) {
20528 case PM_TOKEN_EQUAL: {
20529 switch (PM_NODE_TYPE(node)) {
20530 case PM_CALL_NODE: {
20531 // If we have no arguments to the call node and we need this
20532 // to be a target then this is either a method call or a
20533 // local variable write. This _must_ happen before the value
20534 // is parsed because it could be referenced in the value.
20535 pm_call_node_t *call_node = (pm_call_node_t *) node;
20536 if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20537 pm_parser_local_add_location(parser, &call_node->message_loc, 0);
20538 }
20539 }
20541 case PM_CASE_WRITABLE: {
20542 // When we have `it = value`, we need to add `it` as a local
20543 // variable before parsing the value, in case the value
20544 // references the variable.
20545 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
20546 pm_parser_local_add_location(parser, &node->location, 0);
20547 }
20548
20549 parser_lex(parser);
20550 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20551
20552 if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
20553 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
20554 }
20555
20556 return parse_write(parser, node, &token, value);
20557 }
20558 case PM_SPLAT_NODE: {
20559 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
20560 pm_multi_target_node_targets_append(parser, multi_target, node);
20561
20562 parser_lex(parser);
20563 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20564 return parse_write(parser, UP(multi_target), &token, value);
20565 }
20566 case PM_SOURCE_ENCODING_NODE:
20567 case PM_FALSE_NODE:
20568 case PM_SOURCE_FILE_NODE:
20569 case PM_SOURCE_LINE_NODE:
20570 case PM_NIL_NODE:
20571 case PM_SELF_NODE:
20572 case PM_TRUE_NODE: {
20573 // In these special cases, we have specific error messages
20574 // and we will replace them with local variable writes.
20575 parser_lex(parser);
20576 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20577 return parse_unwriteable_write(parser, node, &token, value);
20578 }
20579 default:
20580 // In this case we have an = sign, but we don't know what
20581 // it's for. We need to treat it as an error. We'll mark it
20582 // as an error and skip past it.
20583 parser_lex(parser);
20584 pm_parser_err_token(parser, &token, PM_ERR_EXPRESSION_NOT_WRITABLE);
20585 return node;
20586 }
20587 }
20588 case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL: {
20589 switch (PM_NODE_TYPE(node)) {
20590 case PM_BACK_REFERENCE_READ_NODE:
20591 case PM_NUMBERED_REFERENCE_READ_NODE:
20592 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20594 case PM_GLOBAL_VARIABLE_READ_NODE: {
20595 parser_lex(parser);
20596
20597 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20598 pm_node_t *result = UP(pm_global_variable_and_write_node_create(parser, node, &token, value));
20599
20600 return result;
20601 }
20602 case PM_CLASS_VARIABLE_READ_NODE: {
20603 parser_lex(parser);
20604
20605 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20606 pm_node_t *result = UP(pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
20607
20608 return result;
20609 }
20610 case PM_CONSTANT_PATH_NODE: {
20611 parser_lex(parser);
20612
20613 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20614 pm_node_t *write = UP(pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
20615
20616 return parse_shareable_constant_write(parser, write);
20617 }
20618 case PM_CONSTANT_READ_NODE: {
20619 parser_lex(parser);
20620
20621 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20622 pm_node_t *write = UP(pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
20623
20624 return parse_shareable_constant_write(parser, write);
20625 }
20626 case PM_INSTANCE_VARIABLE_READ_NODE: {
20627 parser_lex(parser);
20628
20629 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20630 pm_node_t *result = UP(pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
20631
20632 return result;
20633 }
20634 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
20635 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
20636 parser_lex(parser);
20637
20638 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20639 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0));
20640
20641 pm_node_unreference(parser, node);
20642 return result;
20643 }
20644 case PM_LOCAL_VARIABLE_READ_NODE: {
20645 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
20646 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.length, PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + node->location.start);
20647 pm_node_unreference(parser, node);
20648 }
20649
20651 parser_lex(parser);
20652
20653 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20654 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth));
20655
20656 return result;
20657 }
20658 case PM_CALL_NODE: {
20659 pm_call_node_t *cast = (pm_call_node_t *) node;
20660
20661 // If we have a vcall (a method with no arguments and no
20662 // receiver that could have been a local variable) then we
20663 // will transform it into a local variable write.
20664 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20665 pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
20666 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
20667 parser_lex(parser);
20668
20669 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20670 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
20671
20672 return result;
20673 }
20674
20675 // Move past the token here so that we have already added
20676 // the local variable by this point.
20677 parser_lex(parser);
20678
20679 // If there is no call operator and the message is "[]" then
20680 // this is an aref expression, and we can transform it into
20681 // an aset expression.
20682 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
20683 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20684 return UP(pm_index_and_write_node_create(parser, cast, &token, value));
20685 }
20686
20687 // If this node cannot be writable, then we have an error.
20688 if (pm_call_node_writable_p(parser, cast)) {
20689 parse_write_name(parser, &cast->name);
20690 } else {
20691 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
20692 }
20693
20694 parse_call_operator_write(parser, cast, &token);
20695 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20696 return UP(pm_call_and_write_node_create(parser, cast, &token, value));
20697 }
20698 case PM_MULTI_WRITE_NODE: {
20699 parser_lex(parser);
20700 pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
20701 return node;
20702 }
20703 default:
20704 parser_lex(parser);
20705
20706 // In this case we have an &&= sign, but we don't know what it's for.
20707 // We need to treat it as an error. For now, we'll mark it as an error
20708 // and just skip right past it.
20709 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
20710 return node;
20711 }
20712 }
20713 case PM_TOKEN_PIPE_PIPE_EQUAL: {
20714 switch (PM_NODE_TYPE(node)) {
20715 case PM_BACK_REFERENCE_READ_NODE:
20716 case PM_NUMBERED_REFERENCE_READ_NODE:
20717 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20719 case PM_GLOBAL_VARIABLE_READ_NODE: {
20720 parser_lex(parser);
20721
20722 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20723 pm_node_t *result = UP(pm_global_variable_or_write_node_create(parser, node, &token, value));
20724
20725 return result;
20726 }
20727 case PM_CLASS_VARIABLE_READ_NODE: {
20728 parser_lex(parser);
20729
20730 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20731 pm_node_t *result = UP(pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
20732
20733 return result;
20734 }
20735 case PM_CONSTANT_PATH_NODE: {
20736 parser_lex(parser);
20737
20738 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20739 pm_node_t *write = UP(pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
20740
20741 return parse_shareable_constant_write(parser, write);
20742 }
20743 case PM_CONSTANT_READ_NODE: {
20744 parser_lex(parser);
20745
20746 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20747 pm_node_t *write = UP(pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
20748
20749 return parse_shareable_constant_write(parser, write);
20750 }
20751 case PM_INSTANCE_VARIABLE_READ_NODE: {
20752 parser_lex(parser);
20753
20754 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20755 pm_node_t *result = UP(pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
20756
20757 return result;
20758 }
20759 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
20760 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
20761 parser_lex(parser);
20762
20763 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20764 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0));
20765
20766 pm_node_unreference(parser, node);
20767 return result;
20768 }
20769 case PM_LOCAL_VARIABLE_READ_NODE: {
20770 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
20771 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(node));
20772 pm_node_unreference(parser, node);
20773 }
20774
20776 parser_lex(parser);
20777
20778 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20779 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth));
20780
20781 return result;
20782 }
20783 case PM_CALL_NODE: {
20784 pm_call_node_t *cast = (pm_call_node_t *) node;
20785
20786 // If we have a vcall (a method with no arguments and no
20787 // receiver that could have been a local variable) then we
20788 // will transform it into a local variable write.
20789 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20790 pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
20791 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
20792 parser_lex(parser);
20793
20794 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20795 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
20796
20797 return result;
20798 }
20799
20800 // Move past the token here so that we have already added
20801 // the local variable by this point.
20802 parser_lex(parser);
20803
20804 // If there is no call operator and the message is "[]" then
20805 // this is an aref expression, and we can transform it into
20806 // an aset expression.
20807 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
20808 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20809 return UP(pm_index_or_write_node_create(parser, cast, &token, value));
20810 }
20811
20812 // If this node cannot be writable, then we have an error.
20813 if (pm_call_node_writable_p(parser, cast)) {
20814 parse_write_name(parser, &cast->name);
20815 } else {
20816 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
20817 }
20818
20819 parse_call_operator_write(parser, cast, &token);
20820 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20821 return UP(pm_call_or_write_node_create(parser, cast, &token, value));
20822 }
20823 case PM_MULTI_WRITE_NODE: {
20824 parser_lex(parser);
20825 pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
20826 return node;
20827 }
20828 default:
20829 parser_lex(parser);
20830
20831 // In this case we have an ||= sign, but we don't know what it's for.
20832 // We need to treat it as an error. For now, we'll mark it as an error
20833 // and just skip right past it.
20834 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
20835 return node;
20836 }
20837 }
20838 case PM_TOKEN_AMPERSAND_EQUAL:
20839 case PM_TOKEN_CARET_EQUAL:
20840 case PM_TOKEN_GREATER_GREATER_EQUAL:
20841 case PM_TOKEN_LESS_LESS_EQUAL:
20842 case PM_TOKEN_MINUS_EQUAL:
20843 case PM_TOKEN_PERCENT_EQUAL:
20844 case PM_TOKEN_PIPE_EQUAL:
20845 case PM_TOKEN_PLUS_EQUAL:
20846 case PM_TOKEN_SLASH_EQUAL:
20847 case PM_TOKEN_STAR_EQUAL:
20848 case PM_TOKEN_STAR_STAR_EQUAL: {
20849 switch (PM_NODE_TYPE(node)) {
20850 case PM_BACK_REFERENCE_READ_NODE:
20851 case PM_NUMBERED_REFERENCE_READ_NODE:
20852 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20854 case PM_GLOBAL_VARIABLE_READ_NODE: {
20855 parser_lex(parser);
20856
20857 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20858 pm_node_t *result = UP(pm_global_variable_operator_write_node_create(parser, node, &token, value));
20859
20860 return result;
20861 }
20862 case PM_CLASS_VARIABLE_READ_NODE: {
20863 parser_lex(parser);
20864
20865 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20866 pm_node_t *result = UP(pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
20867
20868 return result;
20869 }
20870 case PM_CONSTANT_PATH_NODE: {
20871 parser_lex(parser);
20872
20873 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20874 pm_node_t *write = UP(pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
20875
20876 return parse_shareable_constant_write(parser, write);
20877 }
20878 case PM_CONSTANT_READ_NODE: {
20879 parser_lex(parser);
20880
20881 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20882 pm_node_t *write = UP(pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
20883
20884 return parse_shareable_constant_write(parser, write);
20885 }
20886 case PM_INSTANCE_VARIABLE_READ_NODE: {
20887 parser_lex(parser);
20888
20889 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20890 pm_node_t *result = UP(pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
20891
20892 return result;
20893 }
20894 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
20895 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
20896 parser_lex(parser);
20897
20898 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20899 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0));
20900
20901 pm_node_unreference(parser, node);
20902 return result;
20903 }
20904 case PM_LOCAL_VARIABLE_READ_NODE: {
20905 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
20906 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(node));
20907 pm_node_unreference(parser, node);
20908 }
20909
20911 parser_lex(parser);
20912
20913 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20914 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth));
20915
20916 return result;
20917 }
20918 case PM_CALL_NODE: {
20919 parser_lex(parser);
20920 pm_call_node_t *cast = (pm_call_node_t *) node;
20921
20922 // If we have a vcall (a method with no arguments and no
20923 // receiver that could have been a local variable) then we
20924 // will transform it into a local variable write.
20925 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20926 pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
20927 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
20928 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20929 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
20930
20931 return result;
20932 }
20933
20934 // If there is no call operator and the message is "[]" then
20935 // this is an aref expression, and we can transform it into
20936 // an aset expression.
20937 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
20938 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20939 return UP(pm_index_operator_write_node_create(parser, cast, &token, value));
20940 }
20941
20942 // If this node cannot be writable, then we have an error.
20943 if (pm_call_node_writable_p(parser, cast)) {
20944 parse_write_name(parser, &cast->name);
20945 } else {
20946 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
20947 }
20948
20949 parse_call_operator_write(parser, cast, &token);
20950 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20951 return UP(pm_call_operator_write_node_create(parser, cast, &token, value));
20952 }
20953 case PM_MULTI_WRITE_NODE: {
20954 parser_lex(parser);
20955 pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
20956 return node;
20957 }
20958 default:
20959 parser_lex(parser);
20960
20961 // In this case we have an operator but we don't know what it's for.
20962 // We need to treat it as an error. For now, we'll mark it as an error
20963 // and just skip right past it.
20964 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
20965 return node;
20966 }
20967 }
20968 case PM_TOKEN_AMPERSAND_AMPERSAND:
20969 case PM_TOKEN_KEYWORD_AND: {
20970 parser_lex(parser);
20971
20972 pm_node_t *right = parse_expression(parser, binding_power, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (parser->previous.type == PM_TOKEN_KEYWORD_AND ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0), PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20973 return UP(pm_and_node_create(parser, node, &token, right));
20974 }
20975 case PM_TOKEN_KEYWORD_OR:
20976 case PM_TOKEN_PIPE_PIPE: {
20977 parser_lex(parser);
20978
20979 pm_node_t *right = parse_expression(parser, binding_power, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (parser->previous.type == PM_TOKEN_KEYWORD_OR ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0), PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20980 return UP(pm_or_node_create(parser, node, &token, right));
20981 }
20982 case PM_TOKEN_EQUAL_TILDE: {
20983 // Note that we _must_ parse the value before adding the local
20984 // variables in order to properly mirror the behavior of Ruby. For
20985 // example,
20986 //
20987 // /(?<foo>bar)/ =~ foo
20988 //
20989 // In this case, `foo` should be a method call and not a local yet.
20990 parser_lex(parser);
20991 pm_node_t *argument = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20992
20993 // By default, we're going to create a call node and then return it.
20994 pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
20995 pm_node_t *result = UP(call);
20996
20997 // If the receiver of this =~ is a regular expression node, then we
20998 // need to introduce local variables for it based on its named
20999 // capture groups.
21000 if (PM_NODE_TYPE_P(node, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE)) {
21001 // It's possible to have an interpolated regular expression node
21002 // that only contains strings. This is because it can be split
21003 // up by a heredoc. In this case we need to concat the unescaped
21004 // strings together and then parse them as a regular expression.
21006
21007 bool interpolated = false;
21008 size_t total_length = 0;
21009
21010 pm_node_t *part;
21011 PM_NODE_LIST_FOREACH(parts, index, part) {
21012 if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
21013 total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
21014 } else {
21015 interpolated = true;
21016 break;
21017 }
21018 }
21019
21020 if (!interpolated && total_length > 0) {
21021 void *memory = xmalloc(total_length);
21022 if (!memory) abort();
21023
21024 uint8_t *cursor = memory;
21025 PM_NODE_LIST_FOREACH(parts, index, part) {
21026 pm_string_t *unescaped = &((pm_string_node_t *) part)->unescaped;
21027 size_t length = pm_string_length(unescaped);
21028
21029 memcpy(cursor, pm_string_source(unescaped), length);
21030 cursor += length;
21031 }
21032
21033 pm_string_t owned;
21034 pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
21035
21036 result = parse_interpolated_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21037 pm_string_free(&owned);
21038 }
21039 } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
21040 // If we have a regular expression node, then we can parse
21041 // the named captures and validate encoding in one pass.
21043
21044 pm_regexp_name_data_t name_data = {
21045 .call = call,
21046 .match = NULL,
21047 .names = { 0 },
21048 };
21049
21050 pm_node_flag_set(UP(regexp), pm_regexp_parse(parser, regexp, parse_regular_expression_named_capture, &name_data));
21051
21052 if (name_data.match != NULL) {
21053 result = UP(name_data.match);
21054 }
21055 }
21056
21057 return result;
21058 }
21059 case PM_TOKEN_UAMPERSAND:
21060 case PM_TOKEN_USTAR:
21061 case PM_TOKEN_USTAR_STAR:
21062 // The only times this will occur are when we are in an error state,
21063 // but we'll put them in here so that errors can propagate.
21064 case PM_TOKEN_BANG_EQUAL:
21065 case PM_TOKEN_BANG_TILDE:
21066 case PM_TOKEN_EQUAL_EQUAL:
21067 case PM_TOKEN_EQUAL_EQUAL_EQUAL:
21068 case PM_TOKEN_LESS_EQUAL_GREATER:
21069 case PM_TOKEN_CARET:
21070 case PM_TOKEN_PIPE:
21071 case PM_TOKEN_AMPERSAND:
21072 case PM_TOKEN_GREATER_GREATER:
21073 case PM_TOKEN_LESS_LESS:
21074 case PM_TOKEN_MINUS:
21075 case PM_TOKEN_PLUS:
21076 case PM_TOKEN_PERCENT:
21077 case PM_TOKEN_SLASH:
21078 case PM_TOKEN_STAR:
21079 case PM_TOKEN_STAR_STAR: {
21080 parser_lex(parser);
21081 pm_token_t operator = parser->previous;
21082 switch (PM_NODE_TYPE(node)) {
21083 case PM_RESCUE_MODIFIER_NODE: {
21085 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21086 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21087 }
21088 break;
21089 }
21090 case PM_AND_NODE: {
21091 pm_and_node_t *cast = (pm_and_node_t *) node;
21092 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21093 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21094 }
21095 break;
21096 }
21097 case PM_OR_NODE: {
21098 pm_or_node_t *cast = (pm_or_node_t *) node;
21099 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21100 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21101 }
21102 break;
21103 }
21104 default:
21105 break;
21106 }
21107
21108 pm_node_t *argument = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21109 return UP(pm_call_node_binary_create(parser, node, &token, argument, 0));
21110 }
21111 case PM_TOKEN_GREATER:
21112 case PM_TOKEN_GREATER_EQUAL:
21113 case PM_TOKEN_LESS:
21114 case PM_TOKEN_LESS_EQUAL: {
21115 if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
21116 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
21117 }
21118
21119 parser_lex(parser);
21120 pm_node_t *argument = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21121 return UP(pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON));
21122 }
21123 case PM_TOKEN_AMPERSAND_DOT:
21124 case PM_TOKEN_DOT: {
21125 parser_lex(parser);
21126 pm_token_t operator = parser->previous;
21127 pm_arguments_t arguments = { 0 };
21128
21129 // This if statement handles the foo.() syntax.
21130 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
21131 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21132 return UP(pm_call_node_shorthand_create(parser, node, &operator, &arguments));
21133 }
21134
21135 switch (PM_NODE_TYPE(node)) {
21136 case PM_RESCUE_MODIFIER_NODE: {
21138 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21139 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21140 }
21141 break;
21142 }
21143 case PM_AND_NODE: {
21144 pm_and_node_t *cast = (pm_and_node_t *) node;
21145 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21146 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21147 }
21148 break;
21149 }
21150 case PM_OR_NODE: {
21151 pm_or_node_t *cast = (pm_or_node_t *) node;
21152 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21153 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21154 }
21155 break;
21156 }
21157 default:
21158 break;
21159 }
21160
21161 pm_token_t message;
21162
21163 switch (parser->current.type) {
21164 case PM_CASE_OPERATOR:
21165 case PM_CASE_KEYWORD:
21166 case PM_TOKEN_CONSTANT:
21167 case PM_TOKEN_IDENTIFIER:
21168 case PM_TOKEN_METHOD_NAME: {
21169 parser_lex(parser);
21170 message = parser->previous;
21171 break;
21172 }
21173 default: {
21174 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type));
21175 message = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
21176 }
21177 }
21178
21179 parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
21180 pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
21181
21182 if (
21183 (previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
21184 arguments.arguments == NULL &&
21185 arguments.opening_loc.length == 0 &&
21186 match1(parser, PM_TOKEN_COMMA)
21187 ) {
21188 return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21189 } else {
21190 return UP(call);
21191 }
21192 }
21193 case PM_TOKEN_DOT_DOT:
21194 case PM_TOKEN_DOT_DOT_DOT: {
21195 parser_lex(parser);
21196
21197 pm_node_t *right = NULL;
21198 if (token_begins_expression_p(parser->current.type)) {
21199 right = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21200 }
21201
21202 return UP(pm_range_node_create(parser, node, &token, right));
21203 }
21204 case PM_TOKEN_KEYWORD_IF_MODIFIER: {
21205 pm_token_t keyword = parser->current;
21206 parser_lex(parser);
21207
21208 pm_node_t *predicate = parse_value_expression(parser, binding_power, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
21209 return UP(pm_if_node_modifier_create(parser, node, &keyword, predicate));
21210 }
21211 case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: {
21212 pm_token_t keyword = parser->current;
21213 parser_lex(parser);
21214
21215 pm_node_t *predicate = parse_value_expression(parser, binding_power, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
21216 return UP(pm_unless_node_modifier_create(parser, node, &keyword, predicate));
21217 }
21218 case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: {
21219 parser_lex(parser);
21220 pm_statements_node_t *statements = pm_statements_node_create(parser);
21221 pm_statements_node_body_append(parser, statements, node, true);
21222
21223 pm_node_t *predicate = parse_value_expression(parser, binding_power, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
21224 return UP(pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
21225 }
21226 case PM_TOKEN_KEYWORD_WHILE_MODIFIER: {
21227 parser_lex(parser);
21228 pm_statements_node_t *statements = pm_statements_node_create(parser);
21229 pm_statements_node_body_append(parser, statements, node, true);
21230
21231 pm_node_t *predicate = parse_value_expression(parser, binding_power, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
21232 return UP(pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
21233 }
21234 case PM_TOKEN_QUESTION_MARK: {
21235 context_push(parser, PM_CONTEXT_TERNARY);
21236 pm_node_list_t current_block_exits = { 0 };
21237 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21238
21239 pm_token_t qmark = parser->current;
21240 parser_lex(parser);
21241
21242 pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
21243
21244 if (parser->recovering) {
21245 // If parsing the true expression of this ternary resulted in a syntax
21246 // error that we can recover from, then we're going to put missing nodes
21247 // and tokens into the remaining places. We want to be sure to do this
21248 // before the `expect` function call to make sure it doesn't
21249 // accidentally move past a ':' token that occurs after the syntax
21250 // error.
21251 pm_token_t colon = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
21252 pm_node_t *false_expression = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &colon), PM_TOKEN_LENGTH(&colon)));
21253
21254 context_pop(parser);
21255 pop_block_exits(parser, previous_block_exits);
21256 return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
21257 }
21258
21259 accept1(parser, PM_TOKEN_NEWLINE);
21260 expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
21261
21262 pm_token_t colon = parser->previous;
21263 pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
21264
21265 context_pop(parser);
21266 pop_block_exits(parser, previous_block_exits);
21267 return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
21268 }
21269 case PM_TOKEN_COLON_COLON: {
21270 parser_lex(parser);
21271 pm_token_t delimiter = parser->previous;
21272
21273 switch (parser->current.type) {
21274 case PM_TOKEN_CONSTANT: {
21275 parser_lex(parser);
21276 pm_node_t *path;
21277
21278 if (
21279 (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
21280 ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
21281 ) {
21282 // If we have a constant immediately following a '::' operator, then
21283 // this can either be a constant path or a method call, depending on
21284 // what follows the constant.
21285 //
21286 // If we have parentheses, then this is a method call. That would
21287 // look like Foo::Bar().
21288 pm_token_t message = parser->previous;
21289 pm_arguments_t arguments = { 0 };
21290
21291 parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
21292 path = UP(pm_call_node_call_create(parser, node, &delimiter, &message, &arguments));
21293 } else {
21294 // Otherwise, this is a constant path. That would look like Foo::Bar.
21295 path = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
21296 }
21297
21298 // If this is followed by a comma then it is a multiple assignment.
21299 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21300 return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21301 }
21302
21303 return path;
21304 }
21305 case PM_CASE_OPERATOR:
21306 case PM_CASE_KEYWORD:
21307 case PM_TOKEN_IDENTIFIER:
21308 case PM_TOKEN_METHOD_NAME: {
21309 parser_lex(parser);
21310 pm_token_t message = parser->previous;
21311
21312 // If we have an identifier following a '::' operator, then it is for
21313 // sure a method call.
21314 pm_arguments_t arguments = { 0 };
21315 parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
21316 pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21317
21318 // If this is followed by a comma then it is a multiple assignment.
21319 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21320 return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21321 }
21322
21323 return UP(call);
21324 }
21325 case PM_TOKEN_PARENTHESIS_LEFT: {
21326 // If we have a parenthesis following a '::' operator, then it is the
21327 // method call shorthand. That would look like Foo::(bar).
21328 pm_arguments_t arguments = { 0 };
21329 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21330
21331 return UP(pm_call_node_shorthand_create(parser, node, &delimiter, &arguments));
21332 }
21333 default: {
21334 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
21335 return UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
21336 }
21337 }
21338 }
21339 case PM_TOKEN_KEYWORD_RESCUE_MODIFIER: {
21340 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
21341 parser_lex(parser);
21342 accept1(parser, PM_TOKEN_NEWLINE);
21343
21344 pm_node_t *value = parse_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
21345 context_pop(parser);
21346
21347 return UP(pm_rescue_modifier_node_create(parser, node, &token, value));
21348 }
21349 case PM_TOKEN_BRACKET_LEFT: {
21350 parser_lex(parser);
21351
21352 pm_arguments_t arguments = { 0 };
21353 arguments.opening_loc = TOK2LOC(parser, &parser->previous);
21354
21355 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
21356 pm_accepts_block_stack_push(parser, true);
21357 parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint8_t) (flags & ~PM_PARSE_ACCEPTS_DO_BLOCK), (uint16_t) (depth + 1));
21358 pm_accepts_block_stack_pop(parser);
21359 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
21360 }
21361
21362 arguments.closing_loc = TOK2LOC(parser, &parser->previous);
21363
21364 // If we have a comma after the closing bracket then this is a multiple
21365 // assignment and we should parse the targets.
21366 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21367 pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
21368 return parse_targets_validate(parser, UP(aref), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21369 }
21370
21371 // If we're at the end of the arguments, we can now check if there is a
21372 // block node that starts with a {. If there is, then we can parse it and
21373 // add it to the arguments.
21374 pm_block_node_t *block = NULL;
21375 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
21376 block = parse_block(parser, (uint16_t) (depth + 1));
21377 pm_arguments_validate_block(parser, &arguments, block);
21378 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
21379 block = parse_block(parser, (uint16_t) (depth + 1));
21380 }
21381
21382 if (block != NULL) {
21383 if (arguments.block != NULL) {
21384 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_AFTER_BLOCK);
21385 if (arguments.arguments == NULL) {
21386 arguments.arguments = pm_arguments_node_create(parser);
21387 }
21388 pm_arguments_node_arguments_append(parser->arena, arguments.arguments, arguments.block);
21389 }
21390
21391 arguments.block = UP(block);
21392 }
21393
21394 return UP(pm_call_node_aref_create(parser, node, &arguments));
21395 }
21396 case PM_TOKEN_KEYWORD_IN: {
21397 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21398 parser->pattern_matching_newlines = true;
21399
21400 pm_token_t operator = parser->current;
21401 parser->command_start = false;
21402 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21403 parser_lex(parser);
21404
21405 pm_constant_id_list_t captures = { 0 };
21406 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
21407
21408 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21409
21410 return UP(pm_match_predicate_node_create(parser, node, pattern, &operator));
21411 }
21412 case PM_TOKEN_EQUAL_GREATER: {
21413 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21414 parser->pattern_matching_newlines = true;
21415
21416 pm_token_t operator = parser->current;
21417 parser->command_start = false;
21418 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21419 parser_lex(parser);
21420
21421 pm_constant_id_list_t captures = { 0 };
21422 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
21423
21424 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21425
21426 return UP(pm_match_required_node_create(parser, node, pattern, &operator));
21427 }
21428 default:
21429 assert(false && "unreachable");
21430 return NULL;
21431 }
21432}
21433
21434#undef PM_PARSE_PATTERN_SINGLE
21435#undef PM_PARSE_PATTERN_TOP
21436#undef PM_PARSE_PATTERN_MULTI
21437
21450static bool
21451parse_expression_terminator(pm_parser_t *parser, pm_node_t *node) {
21452 pm_binding_power_t left = pm_binding_powers[parser->current.type].left;
21453
21454 switch (PM_NODE_TYPE(node)) {
21455 case PM_MULTI_WRITE_NODE:
21456 case PM_RETURN_NODE:
21457 case PM_BREAK_NODE:
21458 case PM_NEXT_NODE:
21459 return left > PM_BINDING_POWER_MODIFIER;
21460 case PM_CLASS_VARIABLE_WRITE_NODE:
21461 case PM_CONSTANT_PATH_WRITE_NODE:
21462 case PM_CONSTANT_WRITE_NODE:
21463 case PM_GLOBAL_VARIABLE_WRITE_NODE:
21464 case PM_INSTANCE_VARIABLE_WRITE_NODE:
21465 case PM_LOCAL_VARIABLE_WRITE_NODE:
21466 return PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && left > PM_BINDING_POWER_MODIFIER;
21467 case PM_CALL_NODE: {
21468 // Calls with an implicit array on the right-hand side are
21469 // statements and can only be followed by modifiers.
21470 if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY)) {
21471 return left > PM_BINDING_POWER_MODIFIER;
21472 }
21473
21474 // Command-style calls (including block commands like
21475 // `foo bar do end`) can only be followed by composition
21476 // (and/or) and modifier (if/unless/etc.) operators.
21477 if (pm_command_call_value_p(node)) {
21478 return left > PM_BINDING_POWER_COMPOSITION;
21479 }
21480
21481 // A block call (command with do-block, or any call chained
21482 // from one) can only be followed by call chaining (., ::,
21483 // &.), composition (and/or), and modifier operators.
21484 if (pm_block_call_p(node)) {
21485 return left > PM_BINDING_POWER_COMPOSITION && left < PM_BINDING_POWER_CALL;
21486 }
21487
21488 return false;
21489 }
21490 case PM_SUPER_NODE:
21491 case PM_YIELD_NODE:
21492 // Command-style super/yield (without parens) can only be followed
21493 // by composition and modifier operators.
21494 if (pm_command_call_value_p(node)) {
21495 return left > PM_BINDING_POWER_COMPOSITION;
21496 }
21497 return false;
21498 case PM_DEF_NODE:
21499 // An endless method whose body is a command-style call (e.g.,
21500 // `def f = foo bar`) is a command assignment and can only be
21501 // followed by modifiers.
21502 return left > PM_BINDING_POWER_MODIFIER && pm_command_call_value_p(node);
21503 case PM_RESCUE_MODIFIER_NODE:
21504 // A rescue modifier whose handler is a pattern match (=> or in)
21505 // produces a statement and cannot be followed by operators above
21506 // the modifier level.
21507 if (left > PM_BINDING_POWER_MODIFIER) {
21509 pm_node_t *rescue_expression = cast->rescue_expression;
21510 return PM_NODE_TYPE_P(rescue_expression, PM_MATCH_REQUIRED_NODE) || PM_NODE_TYPE_P(rescue_expression, PM_MATCH_PREDICATE_NODE);
21511 }
21512 return false;
21513 default:
21514 return false;
21515 }
21516}
21517
21526static pm_node_t *
21527parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
21528 if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
21529 pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
21530 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
21531 }
21532
21533 pm_node_t *node = parse_expression_prefix(parser, binding_power, flags, diag_id, depth);
21534
21535 // Some prefix nodes are statements and can only be followed by modifiers
21536 // (if/unless/while/until/rescue) or nothing at all. We check these cheaply
21537 // here before entering the infix loop.
21538 switch (PM_NODE_TYPE(node)) {
21539 case PM_MISSING_NODE:
21540 return node;
21541 case PM_PRE_EXECUTION_NODE:
21542 return node;
21543 case PM_POST_EXECUTION_NODE:
21544 case PM_ALIAS_GLOBAL_VARIABLE_NODE:
21545 case PM_ALIAS_METHOD_NODE:
21546 case PM_UNDEF_NODE:
21547 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21548 return node;
21549 }
21550 break;
21551 case PM_CALL_NODE:
21552 case PM_SUPER_NODE:
21553 case PM_YIELD_NODE:
21554 case PM_DEF_NODE:
21555 if (parse_expression_terminator(parser, node)) {
21556 return node;
21557 }
21558 break;
21559 case PM_SYMBOL_NODE:
21560 if (pm_symbol_node_label_p(parser, node)) {
21561 return node;
21562 }
21563 break;
21564 default:
21565 break;
21566 }
21567
21568 // Look and see if the next token can be parsed as an infix operator. If it
21569 // can, then we'll parse it using parse_expression_infix.
21570 pm_binding_powers_t current_binding_powers;
21571 pm_token_type_t current_token_type;
21572
21573 while (
21574 current_token_type = parser->current.type,
21575 current_binding_powers = pm_binding_powers[current_token_type],
21576 binding_power <= current_binding_powers.left &&
21577 current_binding_powers.binary
21578 ) {
21579 node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, flags, (uint16_t) (depth + 1));
21580 if (parse_expression_terminator(parser, node)) return node;
21581
21582 // If the operator is nonassoc and we should not be able to parse the
21583 // upcoming infix operator, break.
21584 if (current_binding_powers.nonassoc) {
21585 // If this is a non-assoc operator and we are about to parse the
21586 // exact same operator, then we need to add an error.
21587 if (match1(parser, current_token_type)) {
21588 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
21589 break;
21590 }
21591
21592 // If this is an endless range, then we need to reject a couple of
21593 // additional operators because it violates the normal operator
21594 // precedence rules. Those patterns are:
21595 //
21596 // 1.. & 2
21597 // 1.. * 2
21598 //
21599 if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
21600 if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) {
21601 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
21602 break;
21603 }
21604
21605 if (PM_BINDING_POWER_TERM <= pm_binding_powers[parser->current.type].left) {
21606 break;
21607 }
21608 } else if (current_binding_powers.left <= pm_binding_powers[parser->current.type].left) {
21609 break;
21610 }
21611 }
21612
21613 if (flags & PM_PARSE_ACCEPTS_COMMAND_CALL) {
21614 // A command-style method call is only accepted on method chains.
21615 // Thus, we check whether the parsed node can continue method chains.
21616 // The method chain can continue if the parsed node is one of the following five kinds:
21617 // (1) index access: foo[1]
21618 // (2) attribute access: foo.bar
21619 // (3) method call with parenthesis: foo.bar(1)
21620 // (4) method call with a block: foo.bar do end
21621 // (5) constant path: foo::Bar
21622 switch (node->type) {
21623 case PM_CALL_NODE: {
21624 pm_call_node_t *cast = (pm_call_node_t *)node;
21625 if (
21626 // (1) foo[1]
21627 !(
21628 cast->call_operator_loc.length == 0 &&
21629 cast->message_loc.length > 0 &&
21630 parser->start[cast->message_loc.start] == '[' &&
21631 parser->start[cast->message_loc.start + cast->message_loc.length - 1] == ']'
21632 ) &&
21633 // (2) foo.bar
21634 !(
21635 cast->call_operator_loc.length > 0 &&
21636 cast->arguments == NULL &&
21637 cast->block == NULL &&
21638 cast->opening_loc.length == 0
21639 ) &&
21640 // (3) foo.bar(1)
21641 !(
21642 cast->call_operator_loc.length > 0 &&
21643 cast->opening_loc.length > 0
21644 ) &&
21645 // (4) foo.bar do end
21646 !(
21647 cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
21648 )
21649 ) {
21650 flags &= (uint8_t) ~PM_PARSE_ACCEPTS_COMMAND_CALL;
21651 }
21652 break;
21653 }
21654 // (5) foo::Bar
21655 case PM_CONSTANT_PATH_NODE:
21656 break;
21657 default:
21658 flags &= (uint8_t) ~PM_PARSE_ACCEPTS_COMMAND_CALL;
21659 break;
21660 }
21661 }
21662
21663 if (context_terminator(parser->current_context->context, &parser->current)) {
21664 pm_binding_powers_t next_binding_powers = pm_binding_powers[parser->current.type];
21665 if (
21666 !next_binding_powers.binary ||
21667 binding_power > next_binding_powers.left ||
21668 (PM_NODE_TYPE_P(node, PM_CALL_NODE) && pm_call_node_command_p((pm_call_node_t *) node))
21669 ) {
21670 return node;
21671 }
21672 }
21673 }
21674
21675 return node;
21676}
21677
21682static pm_statements_node_t *
21683wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
21684 if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
21685 if (statements == NULL) {
21686 statements = pm_statements_node_create(parser);
21687 }
21688
21689 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21690 pm_arguments_node_arguments_append(
21691 parser->arena,
21692 arguments,
21693 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2)))
21694 );
21695
21696 pm_statements_node_body_append(parser, statements, UP(pm_call_node_fcall_synthesized_create(
21697 parser,
21698 arguments,
21699 pm_parser_constant_id_constant(parser, "print", 5)
21700 )), true);
21701 }
21702
21703 if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
21704 if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
21705 if (statements == NULL) {
21706 statements = pm_statements_node_create(parser);
21707 }
21708
21709 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21710 pm_arguments_node_arguments_append(
21711 parser->arena,
21712 arguments,
21713 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2)))
21714 );
21715
21716 pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
21717 pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, UP(receiver), "split", arguments);
21718
21719 pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
21720 parser,
21721 pm_parser_constant_id_constant(parser, "$F", 2),
21722 UP(call)
21723 );
21724
21725 pm_statements_node_body_prepend(parser->arena, statements, UP(write));
21726 }
21727
21728 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21729 pm_arguments_node_arguments_append(
21730 parser->arena,
21731 arguments,
21732 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2)))
21733 );
21734
21735 if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
21736 pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
21737 pm_keyword_hash_node_elements_append(parser->arena, keywords, UP(pm_assoc_node_create(
21738 parser,
21739 UP(pm_symbol_node_synthesized_create(parser, "chomp")),
21740 NULL,
21741 UP(pm_true_node_synthesized_create(parser))
21742 )));
21743
21744 pm_arguments_node_arguments_append(parser->arena, arguments, UP(keywords));
21745 pm_node_flag_set(UP(arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
21746 }
21747
21748 pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
21749 pm_statements_node_body_append(parser, wrapped_statements, UP(pm_while_node_synthesized_create(
21750 parser,
21751 UP(pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4))),
21752 statements
21753 )), true);
21754
21755 statements = wrapped_statements;
21756 }
21757
21758 return statements;
21759}
21760
21764static pm_node_t *
21765parse_program(pm_parser_t *parser) {
21766 // If the current scope is NULL, then we want to push a new top level scope.
21767 // The current scope could exist in the event that we are parsing an eval
21768 // and the user has passed into scopes that already exist.
21769 if (parser->current_scope == NULL) {
21770 pm_parser_scope_push(parser, true);
21771 }
21772
21773 pm_node_list_t current_block_exits = { 0 };
21774 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21775
21776 parser_lex(parser);
21777 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
21778
21779 if (statements != NULL && !parser->parsing_eval) {
21780 // If we have statements, then the top-level statement should be
21781 // explicitly checked as well. We have to do this here because
21782 // everywhere else we check all but the last statement.
21783 assert(statements->body.size > 0);
21784 pm_void_statement_check(parser, statements->body.nodes[statements->body.size - 1]);
21785 }
21786
21787 pm_constant_id_list_t locals;
21788 pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
21789 pm_parser_scope_pop(parser);
21790
21791 // At the top level, see if we need to wrap the statements in a program
21792 // node with a while loop based on the options.
21794 statements = wrap_statements(parser, statements);
21795 } else {
21796 flush_block_exits(parser, previous_block_exits);
21797 }
21798
21799 // If this is an empty file, then we're still going to parse all of the
21800 // statements in order to gather up all of the comments and such. Here we'll
21801 // correct the location information.
21802 if (statements == NULL) {
21803 statements = pm_statements_node_create(parser);
21804 statements->base.location = (pm_location_t) { 0 };
21805 }
21806
21807 return UP(pm_program_node_create(parser, &locals, statements));
21808}
21809
21810/******************************************************************************/
21811/* External functions */
21812/******************************************************************************/
21813
21823static const char *
21824pm_strnstr(const char *big, const char *little, size_t big_length) {
21825 size_t little_length = strlen(little);
21826
21827 for (const char *max = big + big_length - little_length; big <= max; big++) {
21828 if (*big == *little && memcmp(big, little, little_length) == 0) return big;
21829 }
21830
21831 return NULL;
21832}
21833
21834#ifdef _WIN32
21835#define pm_parser_warn_shebang_carriage_return(parser, start, length) ((void) 0)
21836#else
21842static void
21843pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
21844 if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
21845 pm_parser_warn(parser, U32(start - parser->start), U32(length), PM_WARN_SHEBANG_CARRIAGE_RETURN);
21846 }
21847}
21848#endif
21849
21854static void
21855pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const char *engine, size_t length) {
21856 const char *switches = pm_strnstr(engine, " -", length);
21857 if (switches == NULL) return;
21858
21859 pm_options_t next_options = *options;
21860 options->shebang_callback(
21861 &next_options,
21862 (const uint8_t *) (switches + 1),
21863 length - ((size_t) (switches - engine)) - 1,
21864 options->shebang_callback_data
21865 );
21866
21867 size_t encoding_length;
21868 if ((encoding_length = pm_string_length(&next_options.encoding)) > 0) {
21869 const uint8_t *encoding_source = pm_string_source(&next_options.encoding);
21870 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
21871 }
21872
21873 parser->command_line = next_options.command_line;
21874 parser->frozen_string_literal = next_options.frozen_string_literal;
21875}
21876
21881pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
21882 assert(arena != NULL);
21883 assert(source != NULL);
21884
21885 *parser = (pm_parser_t) {
21886 .arena = arena,
21887 .node_id = 0,
21888 .lex_state = PM_LEX_STATE_BEG,
21889 .enclosure_nesting = 0,
21890 .lambda_enclosure_nesting = -1,
21891 .brace_nesting = 0,
21892 .do_loop_stack = 0,
21893 .accepts_block_stack = 0,
21894 .lex_modes = {
21895 .index = 0,
21896 .stack = {{ .mode = PM_LEX_DEFAULT }},
21897 .current = &parser->lex_modes.stack[0],
21898 },
21899 .start = source,
21900 .end = source + size,
21901 .previous = { .type = PM_TOKEN_EOF, .start = source, .end = source },
21902 .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
21903 .next_start = NULL,
21904 .heredoc_end = NULL,
21905 .data_loc = { 0 },
21906 .comment_list = { 0 },
21907 .magic_comment_list = { 0 },
21908 .warning_list = { 0 },
21909 .error_list = { 0 },
21910 .current_scope = NULL,
21911 .current_context = NULL,
21912 .encoding = PM_ENCODING_UTF_8_ENTRY,
21913 .encoding_changed_callback = NULL,
21914 .encoding_comment_start = source,
21915 .lex_callback = NULL,
21916 .filepath = { 0 },
21917 .constant_pool = { 0 },
21918 .line_offsets = { 0 },
21919 .integer_base = 0,
21920 .current_string = PM_STRING_EMPTY,
21921 .start_line = 1,
21922 .explicit_encoding = NULL,
21923 .command_line = 0,
21924 .parsing_eval = false,
21925 .partial_script = false,
21926 .command_start = true,
21927 .recovering = false,
21928 .continuable = true,
21929 .encoding_locked = false,
21930 .encoding_changed = false,
21931 .pattern_matching_newlines = false,
21932 .in_keyword_arg = false,
21933 .current_block_exits = NULL,
21934 .semantic_token_seen = false,
21935 .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
21936 .warn_mismatched_indentation = true
21937 };
21938
21939 // Initialize the constant pool. We're going to completely guess as to the
21940 // number of constants that we'll need based on the size of the input. The
21941 // ratio we chose here is actually less arbitrary than you might think.
21942 //
21943 // We took ~50K Ruby files and measured the size of the file versus the
21944 // number of constants that were found in those files. Then we found the
21945 // average and standard deviation of the ratios of constants/bytesize. Then
21946 // we added 1.34 standard deviations to the average to get a ratio that
21947 // would fit 75% of the files (for a two-tailed distribution). This works
21948 // because there was about a 0.77 correlation and the distribution was
21949 // roughly normal.
21950 //
21951 // This ratio will need to change if we add more constants to the constant
21952 // pool for another node type.
21953 uint32_t constant_size = ((uint32_t) size) / 95;
21954 pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size);
21955
21956 // Initialize the newline list. Similar to the constant pool, we're going to
21957 // guess at the number of newlines that we'll need based on the size of the
21958 // input.
21959 size_t newline_size = size / 22;
21960 pm_line_offset_list_init(&parser->line_offsets, newline_size < 4 ? 4 : newline_size);
21961
21962 // If options were provided to this parse, establish them here.
21963 if (options != NULL) {
21964 // filepath option
21965 parser->filepath = options->filepath;
21966
21967 // line option
21968 parser->start_line = options->line;
21969
21970 // encoding option
21971 size_t encoding_length = pm_string_length(&options->encoding);
21972 if (encoding_length > 0) {
21973 const uint8_t *encoding_source = pm_string_source(&options->encoding);
21974 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
21975 }
21976
21977 // encoding_locked option
21978 parser->encoding_locked = options->encoding_locked;
21979
21980 // frozen_string_literal option
21982
21983 // command_line option
21984 parser->command_line = options->command_line;
21985
21986 // version option
21987 parser->version = options->version;
21988
21989 // partial_script
21990 parser->partial_script = options->partial_script;
21991
21992 // scopes option
21993 parser->parsing_eval = options->scopes_count > 0;
21994 if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
21995
21996 for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
21997 const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index);
21998 pm_parser_scope_push(parser, scope_index == 0);
21999
22000 // Scopes given from the outside are not allowed to have numbered
22001 // parameters.
22002 parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
22003
22004 for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
22005 const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
22006
22007 const uint8_t *source = pm_string_source(local);
22008 size_t length = pm_string_length(local);
22009
22010 void *allocated = xmalloc(length);
22011 if (allocated == NULL) continue;
22012
22013 memcpy(allocated, source, length);
22014 pm_parser_local_add_owned(parser, (uint8_t *) allocated, length);
22015 }
22016 }
22017 }
22018
22019 // Now that we have established the user-provided options, check if
22020 // a version was given and parse as the latest version otherwise.
22021 if (parser->version == PM_OPTIONS_VERSION_UNSET) {
22023 }
22024
22025 pm_accepts_block_stack_push(parser, true);
22026
22027 // Skip past the UTF-8 BOM if it exists.
22028 if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
22029 parser->current.end += 3;
22030 parser->encoding_comment_start += 3;
22031
22032 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
22034 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
22035 }
22036 }
22037
22038 // If the -x command line flag is set, or the first shebang of the file does
22039 // not include "ruby", then we'll search for a shebang that does include
22040 // "ruby" and start parsing from there.
22041 bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
22042
22043 // If the first two bytes of the source are a shebang, then we will do a bit
22044 // of extra processing.
22045 //
22046 // First, we'll indicate that the encoding comment is at the end of the
22047 // shebang. This means that when a shebang is present the encoding comment
22048 // can begin on the second line.
22049 //
22050 // Second, we will check if the shebang includes "ruby". If it does, then we
22051 // we will start parsing from there. We will also potentially warning the
22052 // user if there is a carriage return at the end of the shebang. We will
22053 // also potentially call the shebang callback if this is the main script to
22054 // allow the caller to parse the shebang and find any command-line options.
22055 // If the shebang does not include "ruby" and this is the main script being
22056 // parsed, then we will start searching the file for a shebang that does
22057 // contain "ruby" as if -x were passed on the command line.
22058 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
22059 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->current.end);
22060
22061 if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
22062 const char *engine;
22063
22064 if ((engine = pm_strnstr((const char *) parser->start, "ruby", length)) != NULL) {
22065 if (newline != NULL) {
22066 parser->encoding_comment_start = newline + 1;
22067
22068 if (options == NULL || options->main_script) {
22069 pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1);
22070 }
22071 }
22072
22073 if (options != NULL && options->main_script && options->shebang_callback != NULL) {
22074 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) parser->start)));
22075 }
22076
22077 search_shebang = false;
22078 } else if (options != NULL && options->main_script && !parser->parsing_eval) {
22079 search_shebang = true;
22080 }
22081 }
22082
22083 // Here we're going to find the first shebang that includes "ruby" and start
22084 // parsing from there.
22085 if (search_shebang) {
22086 // If a shebang that includes "ruby" is not found, then we're going to a
22087 // a load error to the list of errors on the parser.
22088 bool found_shebang = false;
22089
22090 // This is going to point to the start of each line as we check it.
22091 // We'll maintain a moving window looking at each line at they come.
22092 const uint8_t *cursor = parser->start;
22093
22094 // The newline pointer points to the end of the current line that we're
22095 // considering. If it is NULL, then we're at the end of the file.
22096 const uint8_t *newline = next_newline(cursor, parser->end - cursor);
22097
22098 while (newline != NULL) {
22099 pm_line_offset_list_append(&parser->line_offsets, U32(newline - parser->start + 1));
22100
22101 cursor = newline + 1;
22102 newline = next_newline(cursor, parser->end - cursor);
22103
22104 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - cursor);
22105 if (length > 2 && cursor[0] == '#' && cursor[1] == '!') {
22106 const char *engine;
22107 if ((engine = pm_strnstr((const char *) cursor, "ruby", length)) != NULL) {
22108 found_shebang = true;
22109
22110 if (newline != NULL) {
22111 pm_parser_warn_shebang_carriage_return(parser, cursor, length + 1);
22112 parser->encoding_comment_start = newline + 1;
22113 }
22114
22115 if (options != NULL && options->shebang_callback != NULL) {
22116 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) cursor)));
22117 }
22118
22119 break;
22120 }
22121 }
22122 }
22123
22124 if (found_shebang) {
22125 parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22126 parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22127 } else {
22128 pm_parser_err(parser, 0, 0, PM_ERR_SCRIPT_NOT_FOUND);
22129 pm_line_offset_list_clear(&parser->line_offsets);
22130 }
22131 }
22132
22133 // The encoding comment can start after any amount of inline whitespace, so
22134 // here we'll advance it to the first non-inline-whitespace character so
22135 // that it is ready for future comparisons.
22136 parser->encoding_comment_start += pm_strspn_inline_whitespace(parser->encoding_comment_start, parser->end - parser->encoding_comment_start);
22137}
22138
22147
22151static inline void
22152pm_comment_list_free(pm_list_t *list) {
22153 pm_list_node_t *node, *next;
22154
22155 for (node = list->head; node != NULL; node = next) {
22156 next = node->next;
22157
22158 pm_comment_t *comment = (pm_comment_t *) node;
22159 xfree_sized(comment, sizeof(pm_comment_t));
22160 }
22161}
22162
22166static inline void
22167pm_magic_comment_list_free(pm_list_t *list) {
22168 pm_list_node_t *node, *next;
22169
22170 for (node = list->head; node != NULL; node = next) {
22171 next = node->next;
22172
22174 xfree_sized(magic_comment, sizeof(pm_magic_comment_t));
22175 }
22176}
22177
22183 pm_string_free(&parser->filepath);
22184 pm_diagnostic_list_free(&parser->error_list);
22185 pm_diagnostic_list_free(&parser->warning_list);
22186 pm_comment_list_free(&parser->comment_list);
22187 pm_magic_comment_list_free(&parser->magic_comment_list);
22188 pm_constant_pool_free(&parser->constant_pool);
22189 pm_line_offset_list_free(&parser->line_offsets);
22190
22191 while (parser->current_scope != NULL) {
22192 // Normally, popping the scope doesn't free the locals since it is
22193 // assumed that ownership has transferred to the AST. However if we have
22194 // scopes while we're freeing the parser, it's likely they came from
22195 // eval scopes and we need to free them explicitly here.
22196 pm_parser_scope_pop(parser);
22197 }
22198
22199 while (parser->lex_modes.index >= PM_LEX_STACK_SIZE) {
22200 lex_mode_pop(parser);
22201 }
22202}
22203
22209static bool
22210pm_parse_err_is_fatal(pm_diagnostic_id_t diag_id) {
22211 switch (diag_id) {
22212 case PM_ERR_ARRAY_EXPRESSION_AFTER_STAR:
22213 case PM_ERR_BEGIN_UPCASE_BRACE:
22214 case PM_ERR_CLASS_VARIABLE_BARE:
22215 case PM_ERR_END_UPCASE_BRACE:
22216 case PM_ERR_ESCAPE_INVALID_HEXADECIMAL:
22217 case PM_ERR_ESCAPE_INVALID_UNICODE_LIST:
22218 case PM_ERR_ESCAPE_INVALID_UNICODE_SHORT:
22219 case PM_ERR_EXPRESSION_NOT_WRITABLE:
22220 case PM_ERR_EXPRESSION_NOT_WRITABLE_SELF:
22221 case PM_ERR_FLOAT_PARSE:
22222 case PM_ERR_GLOBAL_VARIABLE_BARE:
22223 case PM_ERR_HASH_KEY:
22224 case PM_ERR_HEREDOC_IDENTIFIER:
22225 case PM_ERR_INSTANCE_VARIABLE_BARE:
22226 case PM_ERR_INVALID_BLOCK_EXIT:
22227 case PM_ERR_INVALID_ENCODING_MAGIC_COMMENT:
22228 case PM_ERR_INVALID_FLOAT_EXPONENT:
22229 case PM_ERR_INVALID_NUMBER_BINARY:
22230 case PM_ERR_INVALID_NUMBER_DECIMAL:
22231 case PM_ERR_INVALID_NUMBER_HEXADECIMAL:
22232 case PM_ERR_INVALID_NUMBER_OCTAL:
22233 case PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING:
22234 case PM_ERR_NO_LOCAL_VARIABLE:
22235 case PM_ERR_PARAMETER_ORDER:
22236 case PM_ERR_STATEMENT_UNDEF:
22237 case PM_ERR_VOID_EXPRESSION:
22238 return true;
22239 default:
22240 return false;
22241 }
22242}
22243
22277static void
22278pm_parse_continuable(pm_parser_t *parser) {
22279 // If there are no errors then there is nothing to continue.
22280 if (parser->error_list.size == 0) {
22281 parser->continuable = false;
22282 return;
22283 }
22284
22285 if (!parser->continuable) return;
22286
22287 size_t source_length = (size_t) (parser->end - parser->start);
22288
22289 // First pass: check if there are any non-stray, non-fatal errors.
22290 bool has_non_stray_error = false;
22291 for (pm_diagnostic_t *error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
22292 if (error->diag_id != PM_ERR_UNEXPECTED_TOKEN_IGNORE && error->diag_id != PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT && !pm_parse_err_is_fatal(error->diag_id)) {
22293 has_non_stray_error = true;
22294 break;
22295 }
22296 }
22297
22298 // Second pass: check each error. We track the minimum source position
22299 // among non-stray, non-fatal errors seen so far in list order, which
22300 // lets us detect cascade stray tokens.
22301 size_t non_stray_min_start = SIZE_MAX;
22302
22303 for (pm_diagnostic_t *error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
22304 size_t error_start = (size_t) error->location.start;
22305 size_t error_end = error_start + (size_t) error->location.length;
22306 bool at_eof = error_end >= source_length;
22307
22308 // Fatal errors are non-continuable unless they occur at EOF.
22309 if (pm_parse_err_is_fatal(error->diag_id) && !at_eof) {
22310 parser->continuable = false;
22311 return;
22312 }
22313
22314 // Track non-stray, non-fatal error positions in list order.
22315 if (error->diag_id != PM_ERR_UNEXPECTED_TOKEN_IGNORE &&
22316 error->diag_id != PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT) {
22317 if (error_start < non_stray_min_start) non_stray_min_start = error_start;
22318 continue;
22319 }
22320
22321 // This is a stray token. Determine if it is a cascade effect
22322 // of a preceding error or genuinely stray.
22323
22324 // Rule (a): a non-stray error was seen earlier in the list at a
22325 // strictly earlier position — this stray is a cascade effect.
22326 if (non_stray_min_start < error_start) continue;
22327
22328 // Rule (b): this stray is at EOF with valid code before it.
22329 // Single-byte stray tokens at EOF (like `\` for line continuation)
22330 // are likely truncated tokens. Multi-byte stray tokens (like the
22331 // keyword `end`) need additional evidence that they are cascade
22332 // effects (i.e. non-stray errors exist elsewhere).
22333 if (at_eof && error_start > 0) {
22334 // Exception: closing delimiters at EOF are genuinely stray.
22335 if (error->location.length == 1) {
22336 const uint8_t *byte = parser->start + error_start;
22337 if (*byte == ')' || *byte == ']' || *byte == '}') {
22338 parser->continuable = false;
22339 return;
22340 }
22341
22342 // Single-byte non-delimiter stray at EOF: cascade.
22343 continue;
22344 }
22345
22346 // Multi-byte stray at EOF: cascade only if there are
22347 // non-stray errors (evidence of a preceding parse failure).
22348 if (has_non_stray_error) continue;
22349 }
22350
22351 // Rule (c): a stray `=` at the start of a line could be the
22352 // beginning of an embedded document (`=begin`). The remaining
22353 // bytes after `=` parse as an identifier, so the error is not
22354 // at EOF, but the construct is genuinely incomplete.
22355 if (error->location.length == 1) {
22356 const uint8_t *byte = parser->start + error_start;
22357 if (*byte == '=' && (error_start == 0 || *(byte - 1) == '\n')) continue;
22358 }
22359
22360 // This stray token is genuinely non-continuable.
22361 parser->continuable = false;
22362 return;
22363 }
22364}
22365
22371 pm_node_t *node = parse_program(parser);
22372 pm_parse_continuable(parser);
22373 return node;
22374}
22375
22381static bool
22382pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof) {
22383#define LINE_SIZE 4096
22384 char line[LINE_SIZE];
22385
22386 while (memset(line, '\n', LINE_SIZE), stream_fgets(line, LINE_SIZE, stream) != NULL) {
22387 size_t length = LINE_SIZE;
22388 while (length > 0 && line[length - 1] == '\n') length--;
22389
22390 if (length == LINE_SIZE) {
22391 // If we read a line that is the maximum size and it doesn't end
22392 // with a newline, then we'll just append it to the buffer and
22393 // continue reading.
22394 length--;
22395 pm_buffer_append_string(buffer, line, length);
22396 continue;
22397 }
22398
22399 // Append the line to the buffer.
22400 length--;
22401 pm_buffer_append_string(buffer, line, length);
22402
22403 // Check if the line matches the __END__ marker. If it does, then stop
22404 // reading and return false. In most circumstances, this means we should
22405 // stop reading from the stream so that the DATA constant can pick it
22406 // up.
22407 switch (length) {
22408 case 7:
22409 if (strncmp(line, "__END__", 7) == 0) return false;
22410 break;
22411 case 8:
22412 if (strncmp(line, "__END__\n", 8) == 0) return false;
22413 break;
22414 case 9:
22415 if (strncmp(line, "__END__\r\n", 9) == 0) return false;
22416 break;
22417 }
22418
22419 // All data should be read via gets. If the string returned by gets
22420 // _doesn't_ end with a newline, then we assume we hit EOF condition.
22421 if (stream_feof(stream)) {
22422 break;
22423 }
22424 }
22425
22426 return true;
22427#undef LINE_SIZE
22428}
22429
22437pm_parse_stream(pm_arena_t *arena, pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) {
22438 pm_buffer_init(buffer);
22439
22440 bool eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
22441
22442 pm_parser_init(arena, parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22443 pm_node_t *node = pm_parse(parser);
22444
22445 while (!eof && parser->error_list.size > 0) {
22446 eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
22447
22448 pm_parser_free(parser);
22449 pm_arena_free(arena);
22450 pm_parser_init(arena, parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22451 node = pm_parse(parser);
22452 }
22453
22454 return node;
22455}
22456
22461pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
22462 pm_options_t options = { 0 };
22463 pm_options_read(&options, data);
22464
22465 pm_arena_t arena = { 0 };
22466 pm_parser_t parser;
22467 pm_parser_init(&arena, &parser, source, size, &options);
22468
22469 pm_parse(&parser);
22470
22471 bool result = parser.error_list.size == 0;
22472 pm_parser_free(&parser);
22473 pm_arena_free(&arena);
22474 pm_options_free(&options);
22475
22476 return result;
22477}
22478
22479#undef PM_CASE_KEYWORD
22480#undef PM_CASE_OPERATOR
22481#undef PM_CASE_WRITABLE
22482#undef PM_STRING_EMPTY
22483
22484// We optionally support serializing to a binary string. For systems that don't
22485// want or need this functionality, it can be turned off with the
22486// PRISM_EXCLUDE_SERIALIZATION define.
22487#ifndef PRISM_EXCLUDE_SERIALIZATION
22488
22489static inline void
22490pm_serialize_header(pm_buffer_t *buffer) {
22491 pm_buffer_append_string(buffer, "PRISM", 5);
22492 pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
22493 pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR);
22494 pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH);
22495 pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
22496}
22497
22502pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
22503 pm_serialize_header(buffer);
22504 pm_serialize_content(parser, node, buffer);
22505 pm_buffer_append_byte(buffer, '\0');
22506}
22507
22513pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22514 pm_options_t options = { 0 };
22515 pm_options_read(&options, data);
22516
22517 pm_arena_t arena = { 0 };
22518 pm_parser_t parser;
22519 pm_parser_init(&arena, &parser, source, size, &options);
22520
22521 pm_node_t *node = pm_parse(&parser);
22522
22523 pm_serialize_header(buffer);
22524 pm_serialize_content(&parser, node, buffer);
22525 pm_buffer_append_byte(buffer, '\0');
22526
22527 pm_parser_free(&parser);
22528 pm_arena_free(&arena);
22529 pm_options_free(&options);
22530}
22531
22537pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data) {
22538 pm_arena_t arena = { 0 };
22539 pm_parser_t parser;
22540 pm_options_t options = { 0 };
22541 pm_options_read(&options, data);
22542
22543 pm_buffer_t parser_buffer;
22544 pm_node_t *node = pm_parse_stream(&arena, &parser, &parser_buffer, stream, stream_fgets, stream_feof, &options);
22545 pm_serialize_header(buffer);
22546 pm_serialize_content(&parser, node, buffer);
22547 pm_buffer_append_byte(buffer, '\0');
22548
22549 pm_buffer_free(&parser_buffer);
22550 pm_parser_free(&parser);
22551 pm_arena_free(&arena);
22552 pm_options_free(&options);
22553}
22554
22559pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22560 pm_options_t options = { 0 };
22561 pm_options_read(&options, data);
22562
22563 pm_arena_t arena = { 0 };
22564 pm_parser_t parser;
22565 pm_parser_init(&arena, &parser, source, size, &options);
22566
22567 pm_parse(&parser);
22568 pm_serialize_header(buffer);
22569 pm_serialize_encoding(parser.encoding, buffer);
22570 pm_buffer_append_varsint(buffer, parser.start_line);
22571 pm_serialize_comment_list(&parser.comment_list, buffer);
22572
22573 pm_parser_free(&parser);
22574 pm_arena_free(&arena);
22575 pm_options_free(&options);
22576}
22577
22578#endif
22579
22580/******************************************************************************/
22581/* Slice queries for the Ruby API */
22582/******************************************************************************/
22583
22585typedef enum {
22587 PM_SLICE_TYPE_ERROR = -1,
22588
22590 PM_SLICE_TYPE_NONE,
22591
22593 PM_SLICE_TYPE_LOCAL,
22594
22596 PM_SLICE_TYPE_CONSTANT,
22597
22599 PM_SLICE_TYPE_METHOD_NAME
22600} pm_slice_type_t;
22601
22605pm_slice_type_t
22606pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
22607 // first, get the right encoding object
22608 const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
22609 if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
22610
22611 // check that there is at least one character
22612 if (length == 0) return PM_SLICE_TYPE_NONE;
22613
22614 size_t width;
22615 if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
22616 // valid because alphabetical
22617 } else if (*source == '_') {
22618 // valid because underscore
22619 width = 1;
22620 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
22621 // valid because multibyte
22622 } else {
22623 // invalid because no match
22624 return PM_SLICE_TYPE_NONE;
22625 }
22626
22627 // determine the type of the slice based on the first character
22628 const uint8_t *end = source + length;
22629 pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
22630
22631 // next, iterate through all of the bytes of the string to ensure that they
22632 // are all valid identifier characters
22633 source += width;
22634
22635 while (source < end) {
22636 if ((width = encoding->alnum_char(source, end - source)) != 0) {
22637 // valid because alphanumeric
22638 source += width;
22639 } else if (*source == '_') {
22640 // valid because underscore
22641 source++;
22642 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
22643 // valid because multibyte
22644 source += width;
22645 } else {
22646 // invalid because no match
22647 break;
22648 }
22649 }
22650
22651 // accept a ! or ? at the end of the slice as a method name
22652 if (*source == '!' || *source == '?' || *source == '=') {
22653 source++;
22654 result = PM_SLICE_TYPE_METHOD_NAME;
22655 }
22656
22657 // valid if we are at the end of the slice
22658 return source == end ? result : PM_SLICE_TYPE_NONE;
22659}
22660
22665pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
22666 switch (pm_slice_type(source, length, encoding_name)) {
22667 case PM_SLICE_TYPE_ERROR:
22668 return PM_STRING_QUERY_ERROR;
22669 case PM_SLICE_TYPE_NONE:
22670 case PM_SLICE_TYPE_CONSTANT:
22671 case PM_SLICE_TYPE_METHOD_NAME:
22672 return PM_STRING_QUERY_FALSE;
22673 case PM_SLICE_TYPE_LOCAL:
22674 return PM_STRING_QUERY_TRUE;
22675 }
22676
22677 assert(false && "unreachable");
22678 return PM_STRING_QUERY_FALSE;
22679}
22680
22685pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
22686 switch (pm_slice_type(source, length, encoding_name)) {
22687 case PM_SLICE_TYPE_ERROR:
22688 return PM_STRING_QUERY_ERROR;
22689 case PM_SLICE_TYPE_NONE:
22690 case PM_SLICE_TYPE_LOCAL:
22691 case PM_SLICE_TYPE_METHOD_NAME:
22692 return PM_STRING_QUERY_FALSE;
22693 case PM_SLICE_TYPE_CONSTANT:
22694 return PM_STRING_QUERY_TRUE;
22695 }
22696
22697 assert(false && "unreachable");
22698 return PM_STRING_QUERY_FALSE;
22699}
22700
22705pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
22706#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
22707#define C1(c) (*source == c)
22708#define C2(s) (memcmp(source, s, 2) == 0)
22709#define C3(s) (memcmp(source, s, 3) == 0)
22710
22711 switch (pm_slice_type(source, length, encoding_name)) {
22712 case PM_SLICE_TYPE_ERROR:
22713 return PM_STRING_QUERY_ERROR;
22714 case PM_SLICE_TYPE_NONE:
22715 break;
22716 case PM_SLICE_TYPE_LOCAL:
22717 // numbered parameters are not valid method names
22718 return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
22719 case PM_SLICE_TYPE_CONSTANT:
22720 // all constants are valid method names
22721 case PM_SLICE_TYPE_METHOD_NAME:
22722 // all method names are valid method names
22723 return PM_STRING_QUERY_TRUE;
22724 }
22725
22726 switch (length) {
22727 case 1:
22728 return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
22729 case 2:
22730 return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
22731 case 3:
22732 return B(C3("===") || C3("<=>") || C3("[]="));
22733 default:
22734 return PM_STRING_QUERY_FALSE;
22735 }
22736
22737#undef B
22738#undef C1
22739#undef C2
22740#undef C3
22741}
pm_diagnostic_id_t
The diagnostic IDs of all of the diagnostics, used to communicate the types of errors between the par...
Definition diagnostic.h:29
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
#define xcalloc
Old name of ruby_xcalloc.
Definition xmalloc.h:55
int len
Length of the buffer.
Definition io.h:8
VALUE type(ANYARGS)
ANYARGS-ed function type.
PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options)
Free the internal memory associated with the options.
Definition options.c:218
PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index)
Return a pointer to the local at the given index within the given scope.
Definition options.c:202
PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm_options_t *options, size_t index)
Return a pointer to the scope at the given index within the given options.
Definition options.c:182
static const uint8_t PM_OPTIONS_COMMAND_LINE_N
A bit representing whether or not the command line -n option was set.
Definition options.h:228
#define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
String literals should be made frozen.
Definition options.h:20
#define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
String literals should be made mutable.
Definition options.h:31
#define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
String literals may be frozen or mutable depending on the implementation default.
Definition options.h:26
static const uint8_t PM_OPTIONS_COMMAND_LINE_P
A bit representing whether or not the command line -p option was set.
Definition options.h:234
@ PM_OPTIONS_VERSION_CRUBY_3_3
The vendored version of prism in CRuby 3.3.x.
Definition options.h:92
@ PM_OPTIONS_VERSION_LATEST
The current version of prism.
Definition options.h:107
@ PM_OPTIONS_VERSION_CRUBY_4_1
The vendored version of prism in CRuby 4.1.x.
Definition options.h:104
@ PM_OPTIONS_VERSION_UNSET
If an explicit version is not provided, the current version of prism will be used.
Definition options.h:89
@ PM_OPTIONS_VERSION_CRUBY_3_4
The vendored version of prism in CRuby 3.4.x.
Definition options.h:95
@ PM_OPTIONS_VERSION_CRUBY_4_0
The vendored version of prism in CRuby 4.0.x.
Definition options.h:101
pm_heredoc_indent_t
The type of indentation that a heredoc uses.
Definition parser.h:80
struct pm_context_node pm_context_node_t
This is a node in a linked list of contexts.
#define PM_LEX_STACK_SIZE
We pre-allocate a certain number of lex states in order to avoid having to call malloc too many times...
Definition parser.h:263
struct pm_parser pm_parser_t
The parser used to parse Ruby source.
Definition parser.h:268
struct pm_comment pm_comment_t
This is a node in the linked list of comments that we've found while parsing.
pm_lex_state_t
This enum combines the various bits from the above enum into individual values that represent the var...
Definition parser.h:47
struct pm_scope pm_scope_t
This struct represents a node in a linked list of scopes.
pm_heredoc_quote_t
The type of quote that a heredoc uses.
Definition parser.h:70
void(* pm_encoding_changed_callback_t)(pm_parser_t *parser)
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:494
pm_context_t
While parsing, we keep track of a stack of contexts.
Definition parser.h:275
@ PM_CONTEXT_CLASS_RESCUE
a rescue statement within a class statement
Definition parser.h:325
@ PM_CONTEXT_ELSIF
an elsif clause
Definition parser.h:352
@ PM_CONTEXT_DEF_RESCUE
a rescue statement within a method definition
Definition parser.h:337
@ PM_CONTEXT_ELSE
an else clause
Definition parser.h:349
@ PM_CONTEXT_FOR_INDEX
a for loop's index
Definition parser.h:361
@ PM_CONTEXT_CASE_WHEN
a case when statements
Definition parser.h:310
@ PM_CONTEXT_BLOCK_RESCUE
a rescue statement within a do..end block
Definition parser.h:307
@ PM_CONTEXT_MODULE
a module declaration
Definition parser.h:388
@ PM_CONTEXT_DEF_PARAMS
a method definition's parameters
Definition parser.h:340
@ PM_CONTEXT_CASE_IN
a case in statements
Definition parser.h:313
@ PM_CONTEXT_BLOCK_ELSE
a rescue else statement within a do..end block
Definition parser.h:301
@ PM_CONTEXT_LOOP_PREDICATE
the predicate clause of a loop statement
Definition parser.h:382
@ PM_CONTEXT_SCLASS
a singleton class definition
Definition parser.h:418
@ PM_CONTEXT_UNLESS
an unless statement
Definition parser.h:433
@ PM_CONTEXT_POSTEXE
an END block
Definition parser.h:406
@ PM_CONTEXT_IF
an if statement
Definition parser.h:364
@ PM_CONTEXT_MULTI_TARGET
a multiple target expression
Definition parser.h:400
@ PM_CONTEXT_LAMBDA_RESCUE
a rescue statement within a lambda expression
Definition parser.h:379
@ PM_CONTEXT_BEGIN_ELSE
a rescue else statement with an explicit begin
Definition parser.h:286
@ PM_CONTEXT_NONE
a null context, used for returning a value from a function
Definition parser.h:277
@ PM_CONTEXT_CLASS_ELSE
a rescue else statement within a class statement
Definition parser.h:322
@ PM_CONTEXT_LAMBDA_ENSURE
an ensure statement within a lambda expression
Definition parser.h:373
@ PM_CONTEXT_BLOCK_ENSURE
an ensure statement within a do..end block
Definition parser.h:298
@ PM_CONTEXT_CLASS_ENSURE
an ensure statement within a class statement
Definition parser.h:319
@ PM_CONTEXT_LAMBDA_BRACES
a lambda expression with braces
Definition parser.h:367
@ PM_CONTEXT_MODULE_ELSE
a rescue else statement within a module statement
Definition parser.h:394
@ PM_CONTEXT_PARENS
a parenthesized expression
Definition parser.h:403
@ PM_CONTEXT_BLOCK_BRACES
expressions in block arguments using braces
Definition parser.h:292
@ PM_CONTEXT_BLOCK_PARAMETERS
expressions in block parameters foo do |...| end
Definition parser.h:304
@ PM_CONTEXT_DEF_ENSURE
an ensure statement within a method definition
Definition parser.h:331
@ PM_CONTEXT_SCLASS_RESCUE
a rescue statement with a singleton class
Definition parser.h:427
@ PM_CONTEXT_PREEXE
a BEGIN block
Definition parser.h:412
@ PM_CONTEXT_DEFINED
a defined? expression
Definition parser.h:343
@ PM_CONTEXT_MODULE_ENSURE
an ensure statement within a module statement
Definition parser.h:391
@ PM_CONTEXT_BEGIN_RESCUE
a rescue statement with an explicit begin
Definition parser.h:289
@ PM_CONTEXT_UNTIL
an until statement
Definition parser.h:436
@ PM_CONTEXT_DEF_ELSE
a rescue else statement within a method definition
Definition parser.h:334
@ PM_CONTEXT_FOR
a for loop
Definition parser.h:358
@ PM_CONTEXT_PREDICATE
a predicate inside an if/elsif/unless statement
Definition parser.h:409
@ PM_CONTEXT_BEGIN_ENSURE
an ensure statement with an explicit begin
Definition parser.h:283
@ PM_CONTEXT_SCLASS_ENSURE
an ensure statement with a singleton class
Definition parser.h:421
@ PM_CONTEXT_DEFAULT_PARAMS
a method definition's default parameter
Definition parser.h:346
@ PM_CONTEXT_LAMBDA_ELSE
a rescue else statement within a lambda expression
Definition parser.h:376
@ PM_CONTEXT_CLASS
a class declaration
Definition parser.h:316
@ PM_CONTEXT_MAIN
the top level context
Definition parser.h:385
@ PM_CONTEXT_LAMBDA_DO_END
a lambda expression with do..end
Definition parser.h:370
@ PM_CONTEXT_BEGIN
a begin statement
Definition parser.h:280
@ PM_CONTEXT_RESCUE_MODIFIER
a modifier rescue clause
Definition parser.h:415
@ PM_CONTEXT_EMBEXPR
an interpolated expression
Definition parser.h:355
@ PM_CONTEXT_TERNARY
a ternary expression
Definition parser.h:430
@ PM_CONTEXT_DEF
a method definition
Definition parser.h:328
@ PM_CONTEXT_SCLASS_ELSE
a rescue else statement with a singleton class
Definition parser.h:424
@ PM_CONTEXT_MODULE_RESCUE
a rescue statement within a module statement
Definition parser.h:397
@ PM_CONTEXT_BLOCK_KEYWORDS
expressions in block arguments using do..end
Definition parser.h:295
@ PM_CONTEXT_WHILE
a while statement
Definition parser.h:439
uint8_t pm_scope_parameters_t
The flags about scope parameters that can be set.
Definition parser.h:564
uint8_t pm_shareable_constant_value_t
The type of shareable constant value that can be set.
Definition parser.h:520
pm_comment_type_t
This is the type of a comment that we've found while parsing.
Definition parser.h:452
void pm_buffer_free(pm_buffer_t *buffer)
Free the memory associated with the buffer.
Definition pm_buffer.c:356
bool pm_buffer_init(pm_buffer_t *buffer)
Initialize a pm_buffer_t with its default values.
Definition pm_buffer.c:27
size_t pm_buffer_length(const pm_buffer_t *buffer)
Return the length of the buffer.
Definition pm_buffer.c:43
char * pm_buffer_value(const pm_buffer_t *buffer)
Return the value of the buffer.
Definition pm_buffer.c:35
#define PM_CONSTANT_ID_UNSET
When we allocate constants into the pool, we reserve 0 to mean that the slot is not yet filled.
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string)
Returns the length associated with the string.
Definition pm_string.c:352
PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string)
Returns the start pointer associated with the string.
Definition pm_string.c:360
PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string)
Free the associated memory of the given string.
Definition pm_string.c:368
#define PM_STRING_EMPTY
Defines an empty string.
Definition pm_string.h:70
#define PRISM_ALIGNOF
Get the alignment requirement of a type.
Definition defines.h:303
#define PRISM_FALLTHROUGH
We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
Definition defines.h:274
#define PRISM_UNLIKELY(x)
The compiler should predicate that this branch will not be taken.
Definition defines.h:258
#define PRISM_ATTRIBUTE_UNUSED
GCC will warn if you specify a function or parameter that is unused at runtime.
Definition defines.h:81
#define PRISM_DEPTH_MAXIMUM
When we are parsing using recursive descent, we want to protect against malicious payloads that could...
Definition defines.h:37
#define PM_STATIC_ASSERT(line, condition, message)
We want to be able to use static assertions, but they weren't standardized until C11.
Definition defines.h:116
#define PRISM_EXPORTED_FUNCTION
By default, we compile with -fvisibility=hidden.
Definition defines.h:53
#define PM_ENCODING_US_ASCII_ENTRY
This is the US-ASCII encoding.
Definition encoding.h:252
#define PM_ENCODING_UTF_8_ENTRY
This is the default UTF-8 encoding.
Definition encoding.h:245
#define PRISM_ENCODING_ALPHABETIC_BIT
All of the lookup tables use the first bit of each embedded byte to indicate whether the codepoint is...
Definition encoding.h:68
#define PRISM_ENCODING_ALPHANUMERIC_BIT
All of the lookup tables use the second bit of each embedded byte to indicate whether the codepoint i...
Definition encoding.h:74
#define PM_NODE_LIST_FOREACH(list, index, node)
Loop through each node in the node list, writing each node to the given pm_node_t pointer.
Definition node.h:17
#define PRISM_VERSION
The version of the Prism library as a constant string.
Definition version.h:27
#define PRISM_VERSION_PATCH
The patch version of the Prism library as an int.
Definition version.h:22
#define PRISM_VERSION_MINOR
The minor version of the Prism library as an int.
Definition version.h:17
#define PRISM_VERSION_MAJOR
The major version of the Prism library as an int.
Definition version.h:12
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser)
Parse the Ruby source associated with the given parser and return the tree.
Definition prism.c:22370
PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback)
Register a callback that will be called whenever prism changes the encoding it is using to parse base...
Definition prism.c:22144
PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options)
Initialize a parser with the given start and end pointers.
Definition prism.c:21881
PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser)
Free any memory associated with the given parser.
Definition prism.c:22182
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_arena_t *arena, pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options)
Parse a stream of Ruby source and return the tree.
Definition prism.c:22437
The main header file for the prism parser.
pm_string_query_t
Represents the results of a slice query.
Definition prism.h:272
@ PM_STRING_QUERY_TRUE
Returned if the result of the slice query is true.
Definition prism.h:280
@ PM_STRING_QUERY_ERROR
Returned if the encoding given to a slice query was invalid.
Definition prism.h:274
@ PM_STRING_QUERY_FALSE
Returned if the result of the slice query is false.
Definition prism.h:277
void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
Serialize the encoding, metadata, nodes, and constant pool.
Definition serialize.c:2128
char *() pm_parse_stream_fgets_t(char *string, int size, void *stream)
This function is used in pm_parse_stream() to retrieve a line of input from a stream.
Definition prism.h:109
void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer)
Serialize the name of the encoding to the buffer.
Definition serialize.c:2104
void pm_serialize_comment_list(pm_list_t *list, pm_buffer_t *buffer)
Serialize the given list of comments to the given buffer.
Definition serialize.c:2034
int() pm_parse_stream_feof_t(void *stream)
This function is used in pm_parse_stream to check whether a stream is EOF.
Definition prism.h:116
const char * pm_token_type_human(pm_token_type_t token_type)
Returns the human name of the given token type.
Definition token_type.c:360
AndNode.
Definition ast.h:1280
PM_NODE_ALIGNAS struct pm_node * left
AndNode::left.
Definition ast.h:1295
PM_NODE_ALIGNAS struct pm_node * right
AndNode::right.
Definition ast.h:1308
A bump allocator.
Definition pm_arena.h:39
ArgumentsNode.
Definition ast.h:1340
pm_node_t base
The embedded base node.
Definition ast.h:1342
struct pm_node_list arguments
ArgumentsNode::arguments.
Definition ast.h:1352
This is a special out parameter to the parse_arguments_list function that includes opening and closin...
Definition prism.c:1659
pm_node_t * block
The optional block attached to the call.
Definition prism.c:1670
bool has_forwarding
The flag indicating whether this arguments list has forwarding argument.
Definition prism.c:1673
pm_location_t opening_loc
The optional location of the opening parenthesis or bracket.
Definition prism.c:1661
pm_arguments_node_t * arguments
The lazily-allocated optional arguments node.
Definition prism.c:1664
pm_location_t closing_loc
The optional location of the closing parenthesis or bracket.
Definition prism.c:1667
ArrayNode.
Definition ast.h:1370
struct pm_node_list elements
ArrayNode::elements.
Definition ast.h:1379
ArrayPatternNode.
Definition ast.h:1430
PM_NODE_ALIGNAS struct pm_node * constant
ArrayPatternNode::constant.
Definition ast.h:1448
pm_location_t opening_loc
ArrayPatternNode::opening_loc.
Definition ast.h:1488
pm_location_t closing_loc
ArrayPatternNode::closing_loc.
Definition ast.h:1498
AssocNode.
Definition ast.h:1513
PM_NODE_ALIGNAS struct pm_node * value
AssocNode::value.
Definition ast.h:1544
PM_NODE_ALIGNAS struct pm_node * key
AssocNode::key.
Definition ast.h:1531
BeginNode.
Definition ast.h:1636
PM_NODE_ALIGNAS struct pm_else_node * else_clause
BeginNode::else_clause.
Definition ast.h:1678
PM_NODE_ALIGNAS struct pm_ensure_node * ensure_clause
BeginNode::ensure_clause.
Definition ast.h:1688
PM_NODE_ALIGNAS struct pm_statements_node * statements
BeginNode::statements.
Definition ast.h:1658
PM_NODE_ALIGNAS struct pm_rescue_node * rescue_clause
BeginNode::rescue_clause.
Definition ast.h:1668
pm_node_t base
The embedded base node.
Definition ast.h:1638
This struct represents a set of binding powers used for a given token.
Definition prism.c:12074
bool binary
Whether or not this token can be used as a binary operator.
Definition prism.c:12082
pm_binding_power_t left
The left binding power.
Definition prism.c:12076
bool nonassoc
Whether or not this token can be used as non-associative binary operator.
Definition prism.c:12088
pm_binding_power_t right
The right binding power.
Definition prism.c:12079
BlockLocalVariableNode.
Definition ast.h:1753
BlockNode.
Definition ast.h:1780
BlockParametersNode.
Definition ast.h:1908
A pm_buffer_t is a simple memory buffer that stores data in a contiguous block of memory.
Definition pm_buffer.h:22
size_t length
The length of the buffer in bytes.
Definition pm_buffer.h:24
char * value
A pointer to the start of the buffer.
Definition pm_buffer.h:30
CallNode.
Definition ast.h:2132
pm_location_t opening_loc
CallNode::opening_loc.
Definition ast.h:2193
pm_location_t closing_loc
CallNode::closing_loc.
Definition ast.h:2213
pm_constant_id_t name
CallNode::name.
Definition ast.h:2173
PM_NODE_ALIGNAS struct pm_arguments_node * arguments
CallNode::arguments.
Definition ast.h:2203
pm_location_t equal_loc
CallNode::equal_loc.
Definition ast.h:2226
pm_location_t call_operator_loc
CallNode::call_operator_loc.
Definition ast.h:2163
pm_location_t message_loc
CallNode::message_loc.
Definition ast.h:2183
PM_NODE_ALIGNAS struct pm_node * block
CallNode::block.
Definition ast.h:2236
PM_NODE_ALIGNAS struct pm_node * receiver
CallNode::receiver.
Definition ast.h:2150
CaseMatchNode.
Definition ast.h:2567
struct pm_node_list conditions
CaseMatchNode::conditions.
Definition ast.h:2589
PM_NODE_ALIGNAS struct pm_else_node * else_clause
CaseMatchNode::else_clause.
Definition ast.h:2599
CaseNode.
Definition ast.h:2636
PM_NODE_ALIGNAS struct pm_else_node * else_clause
CaseNode::else_clause.
Definition ast.h:2668
struct pm_node_list conditions
CaseNode::conditions.
Definition ast.h:2658
ClassVariableReadNode.
Definition ast.h:2925
ClassVariableTargetNode.
Definition ast.h:2953
ClassVariableWriteNode.
Definition ast.h:2975
This is a node in the linked list of comments that we've found while parsing.
Definition parser.h:462
pm_comment_type_t type
The type of comment that we've found.
Definition parser.h:470
pm_location_t location
The location of the comment in the source.
Definition parser.h:467
A list of constant IDs.
size_t size
The number of constant ids in the list.
ConstantPathNode.
Definition ast.h:3184
ConstantPathTargetNode.
Definition ast.h:3319
ConstantReadNode.
Definition ast.h:3412
A constant in the pool which effectively stores a string.
size_t length
The length of the string.
const uint8_t * start
A pointer to the start of the string.
ConstantTargetNode.
Definition ast.h:3440
ConstantWriteNode.
Definition ast.h:3462
This is a node in a linked list of contexts.
Definition parser.h:443
pm_context_t context
The context that this node represents.
Definition parser.h:445
struct pm_context_node * prev
A pointer to the previous context in the linked list.
Definition parser.h:448
DefNode.
Definition ast.h:3524
pm_location_t equal_loc
DefNode::equal_loc.
Definition ast.h:3581
PM_NODE_ALIGNAS struct pm_node * body
DefNode::body.
Definition ast.h:3551
This struct represents a diagnostic generated during parsing.
Definition diagnostic.h:369
ElseNode.
Definition ast.h:3638
PM_NODE_ALIGNAS struct pm_statements_node * statements
ElseNode::statements.
Definition ast.h:3650
This struct defines the functions necessary to implement the encoding interface so we can determine h...
Definition encoding.h:23
size_t(* alpha_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphab...
Definition encoding.h:36
size_t(* char_width)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding.
Definition encoding.h:29
bool(* isupper_char)(const uint8_t *b, ptrdiff_t n)
Return true if the next character is valid in the encoding and is an uppercase character.
Definition encoding.h:50
const char * name
The name of the encoding.
Definition encoding.h:56
size_t(* alnum_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphan...
Definition encoding.h:43
EnsureNode.
Definition ast.h:3733
PM_NODE_ALIGNAS struct pm_statements_node * statements
EnsureNode::statements.
Definition ast.h:3745
FindPatternNode.
Definition ast.h:3791
pm_location_t opening_loc
FindPatternNode::opening_loc.
Definition ast.h:3855
PM_NODE_ALIGNAS struct pm_node * constant
FindPatternNode::constant.
Definition ast.h:3803
pm_location_t closing_loc
FindPatternNode::closing_loc.
Definition ast.h:3868
FlipFlopNode.
Definition ast.h:3886
FloatNode.
Definition ast.h:3918
double value
FloatNode::value.
Definition ast.h:3927
pm_node_t base
The embedded base node.
Definition ast.h:3920
ForwardingParameterNode.
Definition ast.h:4051
GlobalVariableReadNode.
Definition ast.h:4213
GlobalVariableTargetNode.
Definition ast.h:4241
GlobalVariableWriteNode.
Definition ast.h:4263
HashNode.
Definition ast.h:4324
struct pm_node_list elements
HashNode::elements.
Definition ast.h:4349
HashPatternNode.
Definition ast.h:4383
PM_NODE_ALIGNAS struct pm_node * constant
HashPatternNode::constant.
Definition ast.h:4398
pm_location_t opening_loc
HashPatternNode::opening_loc.
Definition ast.h:4437
pm_location_t closing_loc
HashPatternNode::closing_loc.
Definition ast.h:4450
All of the information necessary to store to lexing a heredoc.
Definition parser.h:89
size_t ident_length
The length of the heredoc identifier.
Definition parser.h:94
pm_heredoc_quote_t quote
The type of quote that the heredoc uses.
Definition parser.h:97
pm_heredoc_indent_t indent
The type of indentation that the heredoc uses.
Definition parser.h:100
const uint8_t * ident_start
A pointer to the start of the heredoc identifier.
Definition parser.h:91
IfNode.
Definition ast.h:4471
PM_NODE_ALIGNAS struct pm_statements_node * statements
IfNode::statements.
Definition ast.h:4530
PM_NODE_ALIGNAS struct pm_node * subsequent
IfNode::subsequent.
Definition ast.h:4549
ImaginaryNode.
Definition ast.h:4576
InNode.
Definition ast.h:4652
PM_NODE_ALIGNAS struct pm_statements_node * statements
InNode::statements.
Definition ast.h:4664
InstanceVariableReadNode.
Definition ast.h:5055
InstanceVariableTargetNode.
Definition ast.h:5083
InstanceVariableWriteNode.
Definition ast.h:5105
IntegerNode.
Definition ast.h:5172
pm_integer_t value
IntegerNode::value.
Definition ast.h:5181
pm_node_t base
The embedded base node.
Definition ast.h:5174
bool negative
Whether or not the integer is negative.
Definition pm_integer.h:42
InterpolatedMatchLastLineNode.
Definition ast.h:5209
InterpolatedRegularExpressionNode.
Definition ast.h:5254
InterpolatedStringNode.
Definition ast.h:5290
pm_node_t base
The embedded base node.
Definition ast.h:5292
pm_location_t opening_loc
InterpolatedStringNode::opening_loc.
Definition ast.h:5297
InterpolatedSymbolNode.
Definition ast.h:5322
InterpolatedXStringNode.
Definition ast.h:5354
pm_location_t opening_loc
InterpolatedXStringNode::opening_loc.
Definition ast.h:5361
pm_node_t base
The embedded base node.
Definition ast.h:5356
struct pm_node_list parts
InterpolatedXStringNode::parts.
Definition ast.h:5366
KeywordHashNode.
Definition ast.h:5423
void(* callback)(void *data, pm_parser_t *parser, pm_token_t *token)
This is the callback that is called when a token is lexed.
Definition parser.h:516
void * data
This opaque pointer is used to provide whatever information the user deemed necessary to the callback...
Definition parser.h:510
When lexing Ruby source, the lexer has a small amount of state to tell which kind of token it is curr...
Definition parser.h:110
uint8_t terminator
This is the terminator of the list literal.
Definition parser.h:166
size_t nesting
This keeps track of the nesting level of the list.
Definition parser.h:154
bool interpolation
Whether or not interpolation is allowed in this list.
Definition parser.h:157
uint8_t incrementor
When lexing a list, it takes into account balancing the terminator if the terminator is one of (),...
Definition parser.h:163
enum pm_lex_mode::@98 mode
The type of this lex mode.
uint8_t breakpoints[11]
This is the character set that should be used to delimit the tokens within the list.
Definition parser.h:172
pm_heredoc_lex_mode_t base
All of the data necessary to lex a heredoc.
Definition parser.h:234
bool line_continuation
True if the previous token ended with a line continuation.
Definition parser.h:250
struct pm_lex_mode * prev
The previous lex state so that it knows how to pop.
Definition parser.h:255
bool label_allowed
Whether or not at the end of the string we should allow a :, which would indicate this was a dynamic ...
Definition parser.h:209
const uint8_t * next_start
This is the pointer to the character where lexing should resume once the heredoc has been completely ...
Definition parser.h:240
union pm_lex_mode::@99 as
The data associated with this type of lex mode.
size_t * common_whitespace
This is used to track the amount of common whitespace on each line so that we know how much to dedent...
Definition parser.h:247
int32_t line
The line number.
uint32_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
This struct represents an abstract linked list that provides common functionality.
Definition pm_list.h:46
struct pm_list_node * next
A pointer to the next node in the list.
Definition pm_list.h:48
This represents the overall linked list.
Definition pm_list.h:55
pm_list_node_t * head
A pointer to the head of the list.
Definition pm_list.h:60
size_t size
The size of the list.
Definition pm_list.h:57
This tracks an individual local variable in a certain lexical context, as well as the number of times...
Definition parser.h:530
pm_constant_id_t name
The name of the local variable.
Definition parser.h:532
pm_location_t location
The location of the local variable in the source.
Definition parser.h:535
uint32_t hash
The hash of the local variable.
Definition parser.h:544
uint32_t index
The index of the local variable in the local table.
Definition parser.h:538
uint32_t reads
The number of times the local variable is read.
Definition parser.h:541
LocalVariableReadNode.
Definition ast.h:5659
uint32_t depth
LocalVariableReadNode::depth.
Definition ast.h:5689
pm_constant_id_t name
LocalVariableReadNode::name.
Definition ast.h:5676
LocalVariableTargetNode.
Definition ast.h:5707
LocalVariableWriteNode.
Definition ast.h:5734
uint32_t depth
LocalVariableWriteNode::depth.
Definition ast.h:5760
pm_constant_id_t name
LocalVariableWriteNode::name.
Definition ast.h:5747
This is a set of local variables in a certain lexical context (method, class, module,...
Definition parser.h:552
pm_local_t * locals
The nullable allocated memory for the local variables in the set.
Definition parser.h:560
uint32_t capacity
The capacity of the local variables set.
Definition parser.h:557
uint32_t size
The number of local variables in the set.
Definition parser.h:554
This struct represents a slice in the source code, defined by an offset and a length.
Definition ast.h:543
uint32_t start
The offset of the location from the start of the source.
Definition ast.h:545
uint32_t length
The length of the location.
Definition ast.h:548
This is a node in the linked list of magic comments that we've found while parsing.
Definition parser.h:479
MatchLastLineNode.
Definition ast.h:5825
struct pm_node_list targets
MatchWriteNode::targets.
Definition ast.h:5992
MissingNode.
Definition ast.h:6004
MultiTargetNode.
Definition ast.h:6073
pm_location_t lparen_loc
MultiTargetNode::lparen_loc.
Definition ast.h:6130
struct pm_node_list lefts
MultiTargetNode::lefts.
Definition ast.h:6090
pm_location_t rparen_loc
MultiTargetNode::rparen_loc.
Definition ast.h:6140
MultiWriteNode.
Definition ast.h:6155
A list of nodes in the source, most often used for lists of children.
Definition ast.h:556
size_t size
The number of nodes in the list.
Definition ast.h:558
struct pm_node ** nodes
The nodes in the list.
Definition ast.h:564
This is the base structure that represents a node in the syntax tree.
Definition ast.h:1054
pm_node_type_t type
This represents the type of the node.
Definition ast.h:1059
pm_location_t location
This is the location of the node in the source.
Definition ast.h:1077
OptionalParameterNode.
Definition ast.h:6449
A scope of locals surrounding the code that is being parsed.
Definition options.h:36
size_t locals_count
The number of locals in the scope.
Definition options.h:38
uint8_t forwarding
Flags for the set of forwarding parameters in this scope.
Definition options.h:44
The options that can be passed to the parser.
Definition options.h:113
uint8_t command_line
A bitset of the various options that were set on the command line.
Definition options.h:162
void * shebang_callback_data
Any additional data that should be passed along to the shebang callback if one was set.
Definition options.h:124
bool encoding_locked
Whether or not the encoding magic comments should be respected.
Definition options.h:178
bool main_script
When the file being parsed is the main script, the shebang will be considered for command-line flags ...
Definition options.h:185
pm_string_t encoding
The name of the encoding that the source file is in.
Definition options.h:139
int32_t line
The line within the file that the parse starts on.
Definition options.h:133
pm_options_shebang_callback_t shebang_callback
The callback to call when additional switches are found in a shebang comment.
Definition options.h:118
int8_t frozen_string_literal
Whether or not the frozen string literal option has been set.
Definition options.h:171
bool partial_script
When the file being parsed is considered a "partial" script, jumps will not be marked as errors if th...
Definition options.h:195
size_t scopes_count
The number of scopes surrounding the code that is being parsed.
Definition options.h:144
pm_string_t filepath
The name of the file that is currently being parsed.
Definition options.h:127
pm_options_version_t version
The version of prism that we should be parsing with.
Definition options.h:159
OrNode.
Definition ast.h:6486
PM_NODE_ALIGNAS struct pm_node * right
OrNode::right.
Definition ast.h:6514
PM_NODE_ALIGNAS struct pm_node * left
OrNode::left.
Definition ast.h:6501
ParametersNode.
Definition ast.h:6540
PM_NODE_ALIGNAS struct pm_node * block
ParametersNode::block.
Definition ast.h:6577
PM_NODE_ALIGNAS struct pm_node * rest
ParametersNode::rest.
Definition ast.h:6557
PM_NODE_ALIGNAS struct pm_node * keyword_rest
ParametersNode::keyword_rest.
Definition ast.h:6572
ParenthesesNode.
Definition ast.h:6595
PM_NODE_ALIGNAS struct pm_node * body
ParenthesesNode::body.
Definition ast.h:6602
This struct represents the overall parser.
Definition parser.h:638
const pm_encoding_t * explicit_encoding
When a string-like expression is being lexed, any byte or escape sequence that resolves to a value wh...
Definition parser.h:841
pm_lex_state_t lex_state
The current state of the lexer.
Definition parser.h:650
uint8_t command_line
The command line flags given from the options.
Definition parser.h:860
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
Definition parser.h:756
bool partial_script
Whether or not we are parsing a "partial" script, which is a script that will be evaluated in the con...
Definition parser.h:883
bool pattern_matching_newlines
This flag indicates that we are currently parsing a pattern matching expression and impacts that calc...
Definition parser.h:925
const uint8_t * end
The pointer to the end of the source.
Definition parser.h:695
bool recovering
Whether or not we're currently recovering from a syntax error.
Definition parser.h:896
pm_node_flags_t integer_base
We want to add a flag to integer nodes that indicates their base.
Definition parser.h:798
bool warn_mismatched_indentation
By default, Ruby always warns about mismatched indentation.
Definition parser.h:940
pm_constant_pool_t constant_pool
This constant pool keeps all of the constants defined throughout the file so that we can reference th...
Definition parser.h:787
bool in_keyword_arg
This flag indicates that we are currently parsing a keyword argument.
Definition parser.h:928
const uint8_t * next_start
This is a special field set on the parser when we need the parser to jump to a specific location when...
Definition parser.h:708
pm_static_literals_t * current_hash_keys
The hash keys for the hash that is currently being parsed.
Definition parser.h:750
pm_list_t magic_comment_list
The list of magic comments that have been found while parsing.
Definition parser.h:722
int lambda_enclosure_nesting
Used to temporarily track the nesting of enclosures to determine if a { is the beginning of a lambda ...
Definition parser.h:659
pm_lex_callback_t * lex_callback
This is an optional callback that can be attached to the parser that will be called whenever a new to...
Definition parser.h:775
pm_options_version_t version
The version of prism that we should use to parse.
Definition parser.h:857
pm_token_t previous
The previous token we were considering.
Definition parser.h:698
pm_string_t current_string
This string is used to pass information from the lexer to the parser.
Definition parser.h:804
bool parsing_eval
Whether or not we are parsing an eval string.
Definition parser.h:876
bool encoding_changed
Whether or not the encoding has been changed by a magic comment.
Definition parser.h:919
pm_location_t data_loc
An optional location that represents the location of the END marker and the rest of the content of th...
Definition parser.h:729
pm_context_node_t * current_context
The current parsing context.
Definition parser.h:741
const uint8_t * start
The pointer to the start of the source.
Definition parser.h:692
int enclosure_nesting
Tracks the current nesting of (), [], and {}.
Definition parser.h:653
pm_line_offset_list_t line_offsets
This is the list of line offsets in the source file.
Definition parser.h:790
pm_list_t error_list
The list of errors that have been found while parsing.
Definition parser.h:735
int8_t frozen_string_literal
Whether or not we have found a frozen_string_literal magic comment with a true or false value.
Definition parser.h:870
pm_node_list_t * current_block_exits
When parsing block exits (e.g., break, next, redo), we need to validate that they are in correct cont...
Definition parser.h:854
const uint8_t * encoding_comment_start
This pointer indicates where a comment must start if it is to be considered an encoding comment.
Definition parser.h:769
bool continuable
Whether or not the source being parsed could become valid if more input were appended.
Definition parser.h:904
pm_lex_mode_t stack[PM_LEX_STACK_SIZE]
The stack of lexer modes.
Definition parser.h:685
pm_list_t warning_list
The list of warnings that have been found while parsing.
Definition parser.h:732
const uint8_t * heredoc_end
This field indicates the end of a heredoc whose identifier was found on the current line.
Definition parser.h:716
int brace_nesting
Used to track the nesting of braces to ensure we get the correct value when we are interpolating bloc...
Definition parser.h:665
pm_encoding_changed_callback_t encoding_changed_callback
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:763
struct pm_parser::@104 lex_modes
A stack of lex modes.
int32_t start_line
The line number at the start of the parse.
Definition parser.h:810
bool encoding_locked
This is very specialized behavior for when you want to parse in a context that does not respect encod...
Definition parser.h:912
pm_lex_mode_t * current
The current mode of the lexer.
Definition parser.h:682
pm_arena_t * arena
The arena used for all AST-lifetime allocations.
Definition parser.h:640
pm_list_t comment_list
The list of comments that have been found while parsing.
Definition parser.h:719
size_t index
The current index into the lexer mode stack.
Definition parser.h:688
pm_string_t filepath
This is the path of the file being parsed.
Definition parser.h:781
pm_scope_t * current_scope
The current local scope.
Definition parser.h:738
bool command_start
Whether or not we're at the beginning of a command.
Definition parser.h:886
bool semantic_token_seen
Whether or not the parser has seen a token that has semantic meaning (i.e., a token that is not a com...
Definition parser.h:934
RangeNode.
Definition ast.h:6825
PM_NODE_ALIGNAS struct pm_node * right
RangeNode::right.
Definition ast.h:6854
PM_NODE_ALIGNAS struct pm_node * left
RangeNode::left.
Definition ast.h:6840
RationalNode.
Definition ast.h:6882
pm_node_t base
The embedded base node.
Definition ast.h:6884
pm_integer_t numerator
RationalNode::numerator.
Definition ast.h:6893
Accumulation state for named capture groups found during regexp parsing.
Definition regexp.h:25
pm_call_node_t * call
The call node wrapping the regular expression node (for =~).
Definition regexp.h:27
pm_constant_id_list_t names
The list of capture names found so far (for deduplication).
Definition regexp.h:33
pm_match_write_node_t * match
The match write node being built, or NULL if no captures found yet.
Definition regexp.h:30
In order to properly set a regular expression's encoding and to validate the byte sequence for the un...
Definition prism.c:9371
pm_buffer_t regexp_buffer
The buffer holding the regexp source.
Definition prism.c:9376
pm_token_buffer_t base
The embedded base buffer.
Definition prism.c:9373
RegularExpressionNode.
Definition ast.h:6947
RequiredParameterNode.
Definition ast.h:7019
RescueModifierNode.
Definition ast.h:7041
PM_NODE_ALIGNAS struct pm_node * rescue_expression
RescueModifierNode::rescue_expression.
Definition ast.h:7058
RescueNode.
Definition ast.h:7078
PM_NODE_ALIGNAS struct pm_rescue_node * subsequent
RescueNode::subsequent.
Definition ast.h:7115
pm_location_t then_keyword_loc
RescueNode::then_keyword_loc.
Definition ast.h:7105
This struct represents a node in a linked list of scopes.
Definition parser.h:578
struct pm_scope * previous
A pointer to the previous scope in the linked list.
Definition parser.h:580
pm_node_list_t implicit_parameters
This is a list of the implicit parameters contained within the block.
Definition parser.h:591
pm_shareable_constant_value_t shareable_constant
The current state of constant shareability for this scope.
Definition parser.h:618
pm_locals_t locals
The IDs of the locals in the given scope.
Definition parser.h:583
pm_scope_parameters_t parameters
This is a bitfield that indicates the parameters that are being used in this scope.
Definition parser.h:612
bool closed
A boolean indicating whether or not this scope can see into its parent.
Definition parser.h:624
SplatNode.
Definition ast.h:7368
PM_NODE_ALIGNAS struct pm_node * expression
SplatNode::expression.
Definition ast.h:7380
StatementsNode.
Definition ast.h:7395
struct pm_node_list body
StatementsNode::body.
Definition ast.h:7402
pm_node_t base
The embedded base node.
Definition ast.h:7397
Certain sets of nodes (hash keys and when clauses) check for duplicate nodes to alert the user of pot...
StringNode.
Definition ast.h:7429
pm_node_t base
The embedded base node.
Definition ast.h:7431
pm_string_t unescaped
StringNode::unescaped.
Definition ast.h:7451
pm_location_t content_loc
StringNode::content_loc.
Definition ast.h:7441
pm_location_t closing_loc
StringNode::closing_loc.
Definition ast.h:7446
pm_location_t opening_loc
StringNode::opening_loc.
Definition ast.h:7436
A generic string type that can have various ownership semantics.
Definition pm_string.h:33
const uint8_t * source
A pointer to the start of the string.
Definition pm_string.h:35
size_t length
The length of the string in bytes of memory.
Definition pm_string.h:38
enum pm_string_t::@105 type
The type of the string.
SuperNode.
Definition ast.h:7471
PM_NODE_ALIGNAS struct pm_arguments_node * arguments
SuperNode::arguments.
Definition ast.h:7490
pm_location_t lparen_loc
SuperNode::lparen_loc.
Definition ast.h:7483
PM_NODE_ALIGNAS struct pm_node * block
SuperNode::block.
Definition ast.h:7500
SymbolNode.
Definition ast.h:7523
pm_location_t value_loc
SymbolNode::value_loc.
Definition ast.h:7535
pm_string_t unescaped
SymbolNode::unescaped.
Definition ast.h:7545
When we're lexing certain types (strings, symbols, lists, etc.) we have string content associated wit...
Definition prism.c:9345
pm_buffer_t buffer
The buffer that we're using to keep track of the string content.
Definition prism.c:9350
const uint8_t * cursor
The cursor into the source string that points to how far we have currently copied into the buffer.
Definition prism.c:9356
This struct represents a token in the Ruby source.
Definition ast.h:524
const uint8_t * end
A pointer to the end location of the token in the source.
Definition ast.h:532
const uint8_t * start
A pointer to the start location of the token in the source.
Definition ast.h:529
pm_token_type_t type
The type of the token.
Definition ast.h:526
UndefNode.
Definition ast.h:7577
UnlessNode.
Definition ast.h:7607
PM_NODE_ALIGNAS struct pm_statements_node * statements
UnlessNode::statements.
Definition ast.h:7656
PM_NODE_ALIGNAS struct pm_else_node * else_clause
UnlessNode::else_clause.
Definition ast.h:7666
WhenNode.
Definition ast.h:7741
PM_NODE_ALIGNAS struct pm_statements_node * statements
WhenNode::statements.
Definition ast.h:7763
XStringNode.
Definition ast.h:7830
YieldNode.
Definition ast.h:7867
pm_location_t lparen_loc
YieldNode::lparen_loc.
Definition ast.h:7879
PM_NODE_ALIGNAS struct pm_arguments_node * arguments
YieldNode::arguments.
Definition ast.h:7884