Ruby 4.1.0dev (2026-03-06 revision a6cb8f0774987082c217b185a50cd474cb38672d)
prism.c
1#include "prism.h"
2#include "prism/node_new.h"
3
7const char *
8pm_version(void) {
9 return PRISM_VERSION;
10}
11
16#define PM_TAB_WHITESPACE_SIZE 8
17
18// Macros for min/max.
19#define MIN(a,b) (((a)<(b))?(a):(b))
20#define MAX(a,b) (((a)>(b))?(a):(b))
21
22/******************************************************************************/
23/* Helpful AST-related macros */
24/******************************************************************************/
25
26#define U32(value_) ((uint32_t) (value_))
27
28#define FL PM_NODE_FLAGS
29#define UP PM_NODE_UPCAST
30
31#define PM_LOCATION_START(location_) ((location_)->start)
32#define PM_LOCATION_END(location_) ((location_)->start + (location_)->length)
33
34#define PM_TOKEN_START(parser_, token_) U32((token_)->start - (parser_)->start)
35#define PM_TOKEN_END(parser_, token_) U32((token_)->end - (parser_)->start)
36#define PM_TOKEN_LENGTH(token_) U32((token_)->end - (token_)->start)
37#define PM_TOKENS_LENGTH(left_, right_) U32((right_)->end - (left_)->start)
38
39#define PM_NODE_START(node_) (UP(node_)->location.start)
40#define PM_NODE_LENGTH(node_) (UP(node_)->location.length)
41#define PM_NODE_END(node_) (UP(node_)->location.start + UP(node_)->location.length)
42#define PM_NODES_LENGTH(left_, right_) (PM_NODE_END(right_) - PM_NODE_START(left_))
43
44#define PM_TOKEN_NODE_LENGTH(parser_, token_, node_) (PM_NODE_END(node_) - PM_TOKEN_START(parser_, token_))
45#define PM_NODE_TOKEN_LENGTH(parser_, node_, token_) (PM_TOKEN_END(parser_, token_) - PM_NODE_START(node_))
46
47#define PM_NODE_START_SET_NODE(left_, right_) (PM_NODE_START(left_) = PM_NODE_START(right_))
48#define PM_NODE_START_SET_TOKEN(parser_, node_, token_) (PM_NODE_START(node_) = PM_TOKEN_START(parser_, token_))
49#define PM_NODE_LENGTH_SET_NODE(left_, right_) (PM_NODE_LENGTH(left_) = PM_NODE_END(right_) - PM_NODE_START(left_))
50#define PM_NODE_LENGTH_SET_TOKEN(parser_, node_, token_) (PM_NODE_LENGTH(node_) = PM_TOKEN_END(parser_, token_) - PM_NODE_START(node_))
51#define PM_NODE_LENGTH_SET_LOCATION(node_, location_) (PM_NODE_LENGTH(node_) = PM_LOCATION_END(location_) - PM_NODE_START(node_))
52
53#define PM_LOCATION_INIT(start_, length_) ((pm_location_t) { .start = (start_), .length = (length_) })
54#define PM_LOCATION_INIT_UNSET PM_LOCATION_INIT(0, 0)
55#define PM_LOCATION_INIT_TOKEN(parser_, token_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_))
56#define PM_LOCATION_INIT_NODE(node_) UP(node_)->location
57
58#define PM_LOCATION_INIT_TOKENS(parser_, left_, right_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, left_), PM_TOKENS_LENGTH(left_, right_))
59#define PM_LOCATION_INIT_NODES(left_, right_) PM_LOCATION_INIT(PM_NODE_START(left_), PM_NODES_LENGTH(left_, right_))
60#define PM_LOCATION_INIT_TOKEN_NODE(parser_, token_, node_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, token_), PM_TOKEN_NODE_LENGTH(parser_, token_, node_))
61#define PM_LOCATION_INIT_NODE_TOKEN(parser_, node_, token_) PM_LOCATION_INIT(PM_NODE_START(node_), PM_NODE_TOKEN_LENGTH(parser_, node_, token_))
62
63#define TOK2LOC(parser_, token_) PM_LOCATION_INIT_TOKEN(parser_, token_)
64#define NTOK2LOC(parser_, token_) ((token_) == NULL ? PM_LOCATION_INIT_UNSET : TOK2LOC(parser_, token_))
65#define NTOK2PTR(token_) ((token_).start == NULL ? NULL : &(token_))
66
67/******************************************************************************/
68/* Lex mode manipulations */
69/******************************************************************************/
70
75static inline uint8_t
76lex_mode_incrementor(const uint8_t start) {
77 switch (start) {
78 case '(':
79 case '[':
80 case '{':
81 case '<':
82 return start;
83 default:
84 return '\0';
85 }
86}
87
92static inline uint8_t
93lex_mode_terminator(const uint8_t start) {
94 switch (start) {
95 case '(':
96 return ')';
97 case '[':
98 return ']';
99 case '{':
100 return '}';
101 case '<':
102 return '>';
103 default:
104 return start;
105 }
106}
107
113static bool
114lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
115 lex_mode.prev = parser->lex_modes.current;
116 parser->lex_modes.index++;
117
118 if (parser->lex_modes.index > PM_LEX_STACK_SIZE - 1) {
119 parser->lex_modes.current = (pm_lex_mode_t *) xmalloc(sizeof(pm_lex_mode_t));
120 if (parser->lex_modes.current == NULL) return false;
121
122 *parser->lex_modes.current = lex_mode;
123 } else {
124 parser->lex_modes.stack[parser->lex_modes.index] = lex_mode;
125 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
126 }
127
128 return true;
129}
130
134static inline bool
135lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
136 uint8_t incrementor = lex_mode_incrementor(delimiter);
137 uint8_t terminator = lex_mode_terminator(delimiter);
138
139 pm_lex_mode_t lex_mode = {
140 .mode = PM_LEX_LIST,
141 .as.list = {
142 .nesting = 0,
143 .interpolation = interpolation,
144 .incrementor = incrementor,
145 .terminator = terminator
146 }
147 };
148
149 // These are the places where we need to split up the content of the list.
150 // We'll use strpbrk to find the first of these characters.
151 uint8_t *breakpoints = lex_mode.as.list.breakpoints;
152 memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
153 size_t index = 7;
154
155 // Now we'll add the terminator to the list of breakpoints. If the
156 // terminator is not already a NULL byte, add it to the list.
157 if (terminator != '\0') {
158 breakpoints[index++] = terminator;
159 }
160
161 // If interpolation is allowed, then we're going to check for the #
162 // character. Otherwise we'll only look for escapes and the terminator.
163 if (interpolation) {
164 breakpoints[index++] = '#';
165 }
166
167 // If there is an incrementor, then we'll check for that as well.
168 if (incrementor != '\0') {
169 breakpoints[index++] = incrementor;
170 }
171
172 parser->explicit_encoding = NULL;
173 return lex_mode_push(parser, lex_mode);
174}
175
181static inline bool
182lex_mode_push_list_eof(pm_parser_t *parser) {
183 return lex_mode_push_list(parser, false, '\0');
184}
185
189static inline bool
190lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
191 pm_lex_mode_t lex_mode = {
192 .mode = PM_LEX_REGEXP,
193 .as.regexp = {
194 .nesting = 0,
195 .incrementor = incrementor,
196 .terminator = terminator
197 }
198 };
199
200 // These are the places where we need to split up the content of the
201 // regular expression. We'll use strpbrk to find the first of these
202 // characters.
203 uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
204 memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
205 size_t index = 4;
206
207 // First we'll add the terminator.
208 if (terminator != '\0') {
209 breakpoints[index++] = terminator;
210 }
211
212 // Next, if there is an incrementor, then we'll check for that as well.
213 if (incrementor != '\0') {
214 breakpoints[index++] = incrementor;
215 }
216
217 parser->explicit_encoding = NULL;
218 return lex_mode_push(parser, lex_mode);
219}
220
224static inline bool
225lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
226 pm_lex_mode_t lex_mode = {
227 .mode = PM_LEX_STRING,
228 .as.string = {
229 .nesting = 0,
230 .interpolation = interpolation,
231 .label_allowed = label_allowed,
232 .incrementor = incrementor,
233 .terminator = terminator
234 }
235 };
236
237 // These are the places where we need to split up the content of the
238 // string. We'll use strpbrk to find the first of these characters.
239 uint8_t *breakpoints = lex_mode.as.string.breakpoints;
240 memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
241 size_t index = 3;
242
243 // Now add in the terminator. If the terminator is not already a NULL byte,
244 // then we'll add it.
245 if (terminator != '\0') {
246 breakpoints[index++] = terminator;
247 }
248
249 // If interpolation is allowed, then we're going to check for the #
250 // character. Otherwise we'll only look for escapes and the terminator.
251 if (interpolation) {
252 breakpoints[index++] = '#';
253 }
254
255 // If we have an incrementor, then we'll add that in as a breakpoint as
256 // well.
257 if (incrementor != '\0') {
258 breakpoints[index++] = incrementor;
259 }
260
261 parser->explicit_encoding = NULL;
262 return lex_mode_push(parser, lex_mode);
263}
264
270static inline bool
271lex_mode_push_string_eof(pm_parser_t *parser) {
272 return lex_mode_push_string(parser, false, false, '\0', '\0');
273}
274
280static void
281lex_mode_pop(pm_parser_t *parser) {
282 if (parser->lex_modes.index == 0) {
283 parser->lex_modes.current->mode = PM_LEX_DEFAULT;
284 } else if (parser->lex_modes.index < PM_LEX_STACK_SIZE) {
285 parser->lex_modes.index--;
286 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
287 } else {
288 parser->lex_modes.index--;
289 pm_lex_mode_t *prev = parser->lex_modes.current->prev;
290 xfree_sized(parser->lex_modes.current, sizeof(pm_lex_mode_t));
291 parser->lex_modes.current = prev;
292 }
293}
294
298static inline bool
299lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
300 return parser->lex_state & state;
301}
302
303typedef enum {
304 PM_IGNORED_NEWLINE_NONE = 0,
305 PM_IGNORED_NEWLINE_ALL,
306 PM_IGNORED_NEWLINE_PATTERN
307} pm_ignored_newline_type_t;
308
309static inline pm_ignored_newline_type_t
310lex_state_ignored_p(pm_parser_t *parser) {
311 bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
312
313 if (ignored) {
314 return PM_IGNORED_NEWLINE_ALL;
315 } else if ((parser->lex_state & ~((unsigned int) PM_LEX_STATE_LABEL)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) {
316 return PM_IGNORED_NEWLINE_PATTERN;
317 } else {
318 return PM_IGNORED_NEWLINE_NONE;
319 }
320}
321
322static inline bool
323lex_state_beg_p(pm_parser_t *parser) {
324 return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
325}
326
327static inline bool
328lex_state_arg_p(pm_parser_t *parser) {
329 return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
330}
331
332static inline bool
333lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
334 if (parser->current.end >= parser->end) {
335 return false;
336 }
337 return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
338}
339
340static inline bool
341lex_state_end_p(pm_parser_t *parser) {
342 return lex_state_p(parser, PM_LEX_STATE_END_ANY);
343}
344
348static inline bool
349lex_state_operator_p(pm_parser_t *parser) {
350 return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
351}
352
357static inline void
358lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
359 parser->lex_state = state;
360}
361
362#ifndef PM_DEBUG_LOGGING
367#define PM_DEBUG_LOGGING 0
368#endif
369
370#if PM_DEBUG_LOGGING
371PRISM_ATTRIBUTE_UNUSED static void
372debug_state(pm_parser_t *parser) {
373 fprintf(stderr, "STATE: ");
374 bool first = true;
375
376 if (parser->lex_state == PM_LEX_STATE_NONE) {
377 fprintf(stderr, "NONE\n");
378 return;
379 }
380
381#define CHECK_STATE(state) \
382 if (parser->lex_state & state) { \
383 if (!first) fprintf(stderr, "|"); \
384 fprintf(stderr, "%s", #state); \
385 first = false; \
386 }
387
388 CHECK_STATE(PM_LEX_STATE_BEG)
389 CHECK_STATE(PM_LEX_STATE_END)
390 CHECK_STATE(PM_LEX_STATE_ENDARG)
391 CHECK_STATE(PM_LEX_STATE_ENDFN)
392 CHECK_STATE(PM_LEX_STATE_ARG)
393 CHECK_STATE(PM_LEX_STATE_CMDARG)
394 CHECK_STATE(PM_LEX_STATE_MID)
395 CHECK_STATE(PM_LEX_STATE_FNAME)
396 CHECK_STATE(PM_LEX_STATE_DOT)
397 CHECK_STATE(PM_LEX_STATE_CLASS)
398 CHECK_STATE(PM_LEX_STATE_LABEL)
399 CHECK_STATE(PM_LEX_STATE_LABELED)
400 CHECK_STATE(PM_LEX_STATE_FITEM)
401
402#undef CHECK_STATE
403
404 fprintf(stderr, "\n");
405}
406
407static void
408debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * caller_name, int line_number) {
409 fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
410 debug_state(parser);
411 lex_state_set(parser, state);
412 fprintf(stderr, "Now: ");
413 debug_state(parser);
414 fprintf(stderr, "\n");
415}
416
417#define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
418#endif
419
420/******************************************************************************/
421/* Command-line macro helpers */
422/******************************************************************************/
423
425#define PM_PARSER_COMMAND_LINE_OPTION(parser, option) ((parser)->command_line & (option))
426
428#define PM_PARSER_COMMAND_LINE_OPTION_A(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_A)
429
431#define PM_PARSER_COMMAND_LINE_OPTION_E(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_E)
432
434#define PM_PARSER_COMMAND_LINE_OPTION_L(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_L)
435
437#define PM_PARSER_COMMAND_LINE_OPTION_N(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_N)
438
440#define PM_PARSER_COMMAND_LINE_OPTION_P(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_P)
441
443#define PM_PARSER_COMMAND_LINE_OPTION_X(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_X)
444
445/******************************************************************************/
446/* Diagnostic-related functions */
447/******************************************************************************/
448
452static inline void
453pm_parser_err(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) {
454 pm_diagnostic_list_append(&parser->error_list, start, length, diag_id);
455}
456
461static inline void
462pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
463 pm_parser_err(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id);
464}
465
470static inline void
471pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
472 pm_parser_err_token(parser, &parser->current, diag_id);
473}
474
479static inline void
480pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
481 pm_parser_err_token(parser, &parser->previous, diag_id);
482}
483
488static inline void
489pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
490 pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id);
491}
492
496#define PM_PARSER_ERR_FORMAT(parser_, start_, length_, diag_id_, ...) \
497 pm_diagnostic_list_append_format(&(parser_)->error_list, start_, length_, diag_id_, __VA_ARGS__)
498
503#define PM_PARSER_ERR_NODE_FORMAT(parser_, node_, diag_id_, ...) \
504 PM_PARSER_ERR_FORMAT(parser_, PM_NODE_START(node_), PM_NODE_LENGTH(node_), diag_id_, __VA_ARGS__)
505
510#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser_, node_, diag_id_) \
511 PM_PARSER_ERR_NODE_FORMAT(parser_, node_, diag_id_, (int) PM_NODE_LENGTH(node_), (const char *) (parser_->start + PM_NODE_START(node_)))
512
517#define PM_PARSER_ERR_TOKEN_FORMAT(parser_, token_, diag_id, ...) \
518 PM_PARSER_ERR_FORMAT(parser_, PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_), diag_id, __VA_ARGS__)
519
524#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser_, token_, diag_id_) \
525 PM_PARSER_ERR_TOKEN_FORMAT(parser_, token_, diag_id_, (int) PM_TOKEN_LENGTH(token_), (const char *) (token_)->start)
526
530static inline void
531pm_parser_warn(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) {
532 pm_diagnostic_list_append(&parser->warning_list, start, length, diag_id);
533}
534
539static inline void
540pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
541 pm_parser_warn(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id);
542}
543
548static inline void
549pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
550 pm_parser_warn(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id);
551}
552
557#define PM_PARSER_WARN_FORMAT(parser_, start_, length_, diag_id_, ...) \
558 pm_diagnostic_list_append_format(&(parser_)->warning_list, start_, length_, diag_id_, __VA_ARGS__)
559
564#define PM_PARSER_WARN_TOKEN_FORMAT(parser_, token_, diag_id_, ...) \
565 PM_PARSER_WARN_FORMAT(parser_, PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_), diag_id_, __VA_ARGS__)
566
571#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser_, token_, diag_id_) \
572 PM_PARSER_WARN_TOKEN_FORMAT(parser_, token_, diag_id_, (int) PM_TOKEN_LENGTH(token_), (const char *) (token_)->start)
573
578#define PM_PARSER_WARN_NODE_FORMAT(parser_, node_, diag_id_, ...) \
579 PM_PARSER_WARN_FORMAT(parser_, PM_NODE_START(node_), PM_NODE_LENGTH(node_), diag_id_, __VA_ARGS__)
580
586static void
587pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
588 PM_PARSER_ERR_FORMAT(
589 parser,
590 U32(ident_start - parser->start),
591 U32(ident_length),
592 PM_ERR_HEREDOC_TERM,
593 (int) ident_length,
594 (const char *) ident_start
595 );
596}
597
598/******************************************************************************/
599/* Scope-related functions */
600/******************************************************************************/
601
605static bool
606pm_parser_scope_push(pm_parser_t *parser, bool closed) {
607 pm_scope_t *scope = (pm_scope_t *) xmalloc(sizeof(pm_scope_t));
608 if (scope == NULL) return false;
609
610 *scope = (pm_scope_t) {
611 .previous = parser->current_scope,
612 .locals = { 0 },
613 .parameters = PM_SCOPE_PARAMETERS_NONE,
614 .implicit_parameters = { 0 },
615 .shareable_constant = parser->current_scope == NULL ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
616 .closed = closed
617 };
618
619 parser->current_scope = scope;
620 return true;
621}
622
627static bool
628pm_parser_scope_toplevel_p(pm_parser_t *parser) {
629 pm_scope_t *scope = parser->current_scope;
630
631 do {
632 if (scope->previous == NULL) return true;
633 if (scope->closed) return false;
634 } while ((scope = scope->previous) != NULL);
635
636 assert(false && "unreachable");
637 return true;
638}
639
643static pm_scope_t *
644pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
645 pm_scope_t *scope = parser->current_scope;
646
647 while (depth-- > 0) {
648 assert(scope != NULL);
649 scope = scope->previous;
650 }
651
652 return scope;
653}
654
655typedef enum {
656 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
657 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
658 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
659} pm_scope_forwarding_param_check_result_t;
660
661static pm_scope_forwarding_param_check_result_t
662pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
663 pm_scope_t *scope = parser->current_scope;
664 bool conflict = false;
665
666 while (scope != NULL) {
667 if (scope->parameters & mask) {
668 if (scope->closed) {
669 if (conflict) {
670 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
671 } else {
672 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
673 }
674 }
675
676 conflict = true;
677 }
678
679 if (scope->closed) break;
680 scope = scope->previous;
681 }
682
683 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
684}
685
686static void
687pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
688 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
689 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
690 // Pass.
691 break;
692 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
693 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
694 break;
695 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
696 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
697 break;
698 }
699}
700
701static void
702pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
703 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
704 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
705 // Pass.
706 break;
707 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
708 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
709 break;
710 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
711 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
712 break;
713 }
714}
715
716static void
717pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
718 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
719 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
720 // Pass.
721 break;
722 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
723 // This shouldn't happen, because ... is not allowed in the
724 // declaration of blocks. If we get here, we assume we already have
725 // an error for this.
726 break;
727 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
728 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
729 break;
730 }
731}
732
733static void
734pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
735 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
736 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
737 // Pass.
738 break;
739 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
740 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
741 break;
742 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
743 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
744 break;
745 }
746}
747
752pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
753 return parser->current_scope->shareable_constant;
754}
755
760static void
761pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constant_value_t shareable_constant) {
762 pm_scope_t *scope = parser->current_scope;
763
764 do {
765 scope->shareable_constant = shareable_constant;
766 } while (!scope->closed && (scope = scope->previous) != NULL);
767}
768
769/******************************************************************************/
770/* Local variable-related functions */
771/******************************************************************************/
772
776#define PM_LOCALS_HASH_THRESHOLD 9
777
778static void
779pm_locals_free(pm_locals_t *locals) {
780 if (locals->capacity > 0) {
781 xfree_sized(locals->locals, locals->capacity * sizeof(pm_local_t));
782 }
783}
784
789static uint32_t
790pm_locals_hash(pm_constant_id_t name) {
791 name = ((name >> 16) ^ name) * 0x45d9f3b;
792 name = ((name >> 16) ^ name) * 0x45d9f3b;
793 name = (name >> 16) ^ name;
794 return name;
795}
796
801static void
802pm_locals_resize(pm_locals_t *locals) {
803 uint32_t next_capacity = locals->capacity == 0 ? 4 : (locals->capacity * 2);
804 assert(next_capacity > locals->capacity);
805
806 pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
807 if (next_locals == NULL) abort();
808
809 if (next_capacity < PM_LOCALS_HASH_THRESHOLD) {
810 if (locals->size > 0) {
811 memcpy(next_locals, locals->locals, locals->size * sizeof(pm_local_t));
812 }
813 } else {
814 // If we just switched from a list to a hash, then we need to fill in
815 // the hash values of all of the locals.
816 bool hash_needed = (locals->capacity <= PM_LOCALS_HASH_THRESHOLD);
817 uint32_t mask = next_capacity - 1;
818
819 for (uint32_t index = 0; index < locals->capacity; index++) {
820 pm_local_t *local = &locals->locals[index];
821
822 if (local->name != PM_CONSTANT_ID_UNSET) {
823 if (hash_needed) local->hash = pm_locals_hash(local->name);
824
825 uint32_t hash = local->hash;
826 while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
827 next_locals[hash & mask] = *local;
828 }
829 }
830 }
831
832 pm_locals_free(locals);
833 locals->locals = next_locals;
834 locals->capacity = next_capacity;
835}
836
852static bool
853pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, uint32_t start, uint32_t length, uint32_t reads) {
854 if (locals->size >= (locals->capacity / 4 * 3)) {
855 pm_locals_resize(locals);
856 }
857
858 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
859 for (uint32_t index = 0; index < locals->capacity; index++) {
860 pm_local_t *local = &locals->locals[index];
861
862 if (local->name == PM_CONSTANT_ID_UNSET) {
863 *local = (pm_local_t) {
864 .name = name,
865 .location = { .start = start, .length = length },
866 .index = locals->size++,
867 .reads = reads,
868 .hash = 0
869 };
870 return true;
871 } else if (local->name == name) {
872 return false;
873 }
874 }
875 } else {
876 uint32_t mask = locals->capacity - 1;
877 uint32_t hash = pm_locals_hash(name);
878 uint32_t initial_hash = hash;
879
880 do {
881 pm_local_t *local = &locals->locals[hash & mask];
882
883 if (local->name == PM_CONSTANT_ID_UNSET) {
884 *local = (pm_local_t) {
885 .name = name,
886 .location = { .start = start, .length = length },
887 .index = locals->size++,
888 .reads = reads,
889 .hash = initial_hash
890 };
891 return true;
892 } else if (local->name == name) {
893 return false;
894 } else {
895 hash++;
896 }
897 } while ((hash & mask) != initial_hash);
898 }
899
900 assert(false && "unreachable");
901 return true;
902}
903
908static uint32_t
909pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
910 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
911 for (uint32_t index = 0; index < locals->size; index++) {
912 pm_local_t *local = &locals->locals[index];
913 if (local->name == name) return index;
914 }
915 } else {
916 uint32_t mask = locals->capacity - 1;
917 uint32_t hash = pm_locals_hash(name);
918 uint32_t initial_hash = hash & mask;
919
920 do {
921 pm_local_t *local = &locals->locals[hash & mask];
922
923 if (local->name == PM_CONSTANT_ID_UNSET) {
924 return UINT32_MAX;
925 } else if (local->name == name) {
926 return hash & mask;
927 } else {
928 hash++;
929 }
930 } while ((hash & mask) != initial_hash);
931 }
932
933 return UINT32_MAX;
934}
935
940static void
941pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
942 uint32_t index = pm_locals_find(locals, name);
943 assert(index != UINT32_MAX);
944
945 pm_local_t *local = &locals->locals[index];
946 assert(local->reads < UINT32_MAX);
947
948 local->reads++;
949}
950
955static void
956pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
957 uint32_t index = pm_locals_find(locals, name);
958 assert(index != UINT32_MAX);
959
960 pm_local_t *local = &locals->locals[index];
961 assert(local->reads > 0);
962
963 local->reads--;
964}
965
969static uint32_t
970pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
971 uint32_t index = pm_locals_find(locals, name);
972 assert(index != UINT32_MAX);
973
974 return locals->locals[index].reads;
975}
976
985static void
986pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
987 pm_constant_id_list_init_capacity(parser->arena, list, locals->size);
988
989 // If we're still below the threshold for switching to a hash, then we only
990 // need to loop over the locals until we hit the size because the locals are
991 // stored in a list.
992 uint32_t capacity = locals->capacity < PM_LOCALS_HASH_THRESHOLD ? locals->size : locals->capacity;
993
994 // We will only warn for unused variables if we're not at the top level, or
995 // if we're parsing a file outside of eval or -e.
996 bool warn_unused = !toplevel || (!parser->parsing_eval && !PM_PARSER_COMMAND_LINE_OPTION_E(parser));
997
998 for (uint32_t index = 0; index < capacity; index++) {
999 pm_local_t *local = &locals->locals[index];
1000
1001 if (local->name != PM_CONSTANT_ID_UNSET) {
1002 pm_constant_id_list_insert(list, (size_t) local->index, local->name);
1003
1004 if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_line_offset_list_line(&parser->line_offsets, local->location.start, parser->start_line) >= 0))) {
1005 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
1006
1007 if (constant->length >= 1 && *constant->start != '_') {
1008 PM_PARSER_WARN_FORMAT(
1009 parser,
1010 local->location.start,
1011 local->location.length,
1012 PM_WARN_UNUSED_LOCAL_VARIABLE,
1013 (int) constant->length,
1014 (const char *) constant->start
1015 );
1016 }
1017 }
1018 }
1019 }
1020}
1021
1022/******************************************************************************/
1023/* Node-related functions */
1024/******************************************************************************/
1025
1029static inline pm_constant_id_t
1030pm_parser_constant_id_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
1031 return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
1032}
1033
1037static inline pm_constant_id_t
1038pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
1039 return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
1040}
1041
1045static inline pm_constant_id_t
1046pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
1047 return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length);
1048}
1049
1053static inline pm_constant_id_t
1054pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1055 return pm_parser_constant_id_raw(parser, token->start, token->end);
1056}
1057
1062#define PM_CASE_VOID_VALUE PM_RETURN_NODE: case PM_BREAK_NODE: case PM_NEXT_NODE: \
1063 case PM_REDO_NODE: case PM_RETRY_NODE: case PM_MATCH_REQUIRED_NODE
1064
1070static pm_node_t *
1071pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
1072 pm_node_t *void_node = NULL;
1073
1074 while (node != NULL) {
1075 switch (PM_NODE_TYPE(node)) {
1076 case PM_CASE_VOID_VALUE:
1077 return void_node != NULL ? void_node : node;
1078 case PM_MATCH_PREDICATE_NODE:
1079 return NULL;
1080 case PM_BEGIN_NODE: {
1081 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1082
1083 if (cast->ensure_clause != NULL) {
1084 if (cast->rescue_clause != NULL) {
1085 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->rescue_clause));
1086 if (vn != NULL) return vn;
1087 }
1088
1089 if (cast->statements != NULL) {
1090 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1091 if (vn != NULL) return vn;
1092 }
1093
1094 node = UP(cast->ensure_clause);
1095 } else if (cast->rescue_clause != NULL) {
1096 // https://bugs.ruby-lang.org/issues/21669
1097 if (cast->else_clause == NULL || parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
1098 if (cast->statements == NULL) return NULL;
1099
1100 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1101 if (vn == NULL) return NULL;
1102 if (void_node == NULL) void_node = vn;
1103 }
1104
1105 for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
1106 pm_node_t *vn = pm_check_value_expression(parser, UP(rescue_clause->statements));
1107
1108 if (vn == NULL) {
1109 // https://bugs.ruby-lang.org/issues/21669
1110 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
1111 return NULL;
1112 }
1113 void_node = NULL;
1114 break;
1115 }
1116 }
1117
1118 if (cast->else_clause != NULL) {
1119 node = UP(cast->else_clause);
1120
1121 // https://bugs.ruby-lang.org/issues/21669
1122 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
1123 pm_node_t *vn = pm_check_value_expression(parser, node);
1124 if (vn != NULL) return vn;
1125 }
1126 } else {
1127 return void_node;
1128 }
1129 } else {
1130 node = UP(cast->statements);
1131 }
1132
1133 break;
1134 }
1135 case PM_CASE_NODE: {
1136 // https://bugs.ruby-lang.org/issues/21669
1137 if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
1138 return NULL;
1139 }
1140
1141 pm_case_node_t *cast = (pm_case_node_t *) node;
1142 if (cast->else_clause == NULL) return NULL;
1143
1144 pm_node_t *condition;
1145 PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
1146 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
1147
1148 pm_when_node_t *cast = (pm_when_node_t *) condition;
1149 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1150 if (vn == NULL) return NULL;
1151 if (void_node == NULL) void_node = vn;
1152 }
1153
1154 node = UP(cast->else_clause);
1155 break;
1156 }
1157 case PM_CASE_MATCH_NODE: {
1158 // https://bugs.ruby-lang.org/issues/21669
1159 if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
1160 return NULL;
1161 }
1162
1164 if (cast->else_clause == NULL) return NULL;
1165
1166 pm_node_t *condition;
1167 PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
1168 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
1169
1170 pm_in_node_t *cast = (pm_in_node_t *) condition;
1171 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1172 if (vn == NULL) return NULL;
1173 if (void_node == NULL) void_node = vn;
1174 }
1175
1176 node = UP(cast->else_clause);
1177 break;
1178 }
1179 case PM_ENSURE_NODE: {
1180 pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
1181 node = UP(cast->statements);
1182 break;
1183 }
1184 case PM_PARENTHESES_NODE: {
1186 node = UP(cast->body);
1187 break;
1188 }
1189 case PM_STATEMENTS_NODE: {
1191
1192 // https://bugs.ruby-lang.org/issues/21669
1193 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
1194 pm_node_t *body_part;
1195 PM_NODE_LIST_FOREACH(&cast->body, index, body_part) {
1196 switch (PM_NODE_TYPE(body_part)) {
1197 case PM_CASE_VOID_VALUE:
1198 if (void_node == NULL) {
1199 void_node = body_part;
1200 }
1201 return void_node;
1202 default: break;
1203 }
1204 }
1205 }
1206
1207 node = cast->body.nodes[cast->body.size - 1];
1208 break;
1209 }
1210 case PM_IF_NODE: {
1211 pm_if_node_t *cast = (pm_if_node_t *) node;
1212 if (cast->statements == NULL || cast->subsequent == NULL) {
1213 return NULL;
1214 }
1215 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1216 if (vn == NULL) {
1217 return NULL;
1218 }
1219 if (void_node == NULL) {
1220 void_node = vn;
1221 }
1222 node = cast->subsequent;
1223 break;
1224 }
1225 case PM_UNLESS_NODE: {
1226 pm_unless_node_t *cast = (pm_unless_node_t *) node;
1227 if (cast->statements == NULL || cast->else_clause == NULL) {
1228 return NULL;
1229 }
1230 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1231 if (vn == NULL) {
1232 return NULL;
1233 }
1234 if (void_node == NULL) {
1235 void_node = vn;
1236 }
1237 node = UP(cast->else_clause);
1238 break;
1239 }
1240 case PM_ELSE_NODE: {
1241 pm_else_node_t *cast = (pm_else_node_t *) node;
1242 node = UP(cast->statements);
1243 break;
1244 }
1245 case PM_AND_NODE: {
1246 pm_and_node_t *cast = (pm_and_node_t *) node;
1247 node = cast->left;
1248 break;
1249 }
1250 case PM_OR_NODE: {
1251 pm_or_node_t *cast = (pm_or_node_t *) node;
1252 node = cast->left;
1253 break;
1254 }
1255 case PM_LOCAL_VARIABLE_WRITE_NODE: {
1257
1258 pm_scope_t *scope = parser->current_scope;
1259 for (uint32_t depth = 0; depth < cast->depth; depth++) scope = scope->previous;
1260
1261 pm_locals_read(&scope->locals, cast->name);
1262 return NULL;
1263 }
1264 default:
1265 return NULL;
1266 }
1267 }
1268
1269 return NULL;
1270}
1271
1272static inline void
1273pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
1274 pm_node_t *void_node = pm_check_value_expression(parser, node);
1275 if (void_node != NULL) {
1276 pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
1277 }
1278}
1279
1283static void
1284pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
1285 const char *type = NULL;
1286 int length = 0;
1287
1288 switch (PM_NODE_TYPE(node)) {
1289 case PM_BACK_REFERENCE_READ_NODE:
1290 case PM_CLASS_VARIABLE_READ_NODE:
1291 case PM_GLOBAL_VARIABLE_READ_NODE:
1292 case PM_INSTANCE_VARIABLE_READ_NODE:
1293 case PM_LOCAL_VARIABLE_READ_NODE:
1294 case PM_NUMBERED_REFERENCE_READ_NODE:
1295 type = "a variable";
1296 length = 10;
1297 break;
1298 case PM_CALL_NODE: {
1299 const pm_call_node_t *cast = (const pm_call_node_t *) node;
1300 if (cast->call_operator_loc.length > 0 || cast->message_loc.length == 0) break;
1301
1302 const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
1303 switch (message->length) {
1304 case 1:
1305 switch (message->start[0]) {
1306 case '+':
1307 case '-':
1308 case '*':
1309 case '/':
1310 case '%':
1311 case '|':
1312 case '^':
1313 case '&':
1314 case '>':
1315 case '<':
1316 type = (const char *) message->start;
1317 length = 1;
1318 break;
1319 }
1320 break;
1321 case 2:
1322 switch (message->start[1]) {
1323 case '=':
1324 if (message->start[0] == '<' || message->start[0] == '>' || message->start[0] == '!' || message->start[0] == '=') {
1325 type = (const char *) message->start;
1326 length = 2;
1327 }
1328 break;
1329 case '@':
1330 if (message->start[0] == '+' || message->start[0] == '-') {
1331 type = (const char *) message->start;
1332 length = 2;
1333 }
1334 break;
1335 case '*':
1336 if (message->start[0] == '*') {
1337 type = (const char *) message->start;
1338 length = 2;
1339 }
1340 break;
1341 }
1342 break;
1343 case 3:
1344 if (memcmp(message->start, "<=>", 3) == 0) {
1345 type = "<=>";
1346 length = 3;
1347 }
1348 break;
1349 }
1350
1351 break;
1352 }
1353 case PM_CONSTANT_PATH_NODE:
1354 type = "::";
1355 length = 2;
1356 break;
1357 case PM_CONSTANT_READ_NODE:
1358 type = "a constant";
1359 length = 10;
1360 break;
1361 case PM_DEFINED_NODE:
1362 type = "defined?";
1363 length = 8;
1364 break;
1365 case PM_FALSE_NODE:
1366 type = "false";
1367 length = 5;
1368 break;
1369 case PM_FLOAT_NODE:
1370 case PM_IMAGINARY_NODE:
1371 case PM_INTEGER_NODE:
1372 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1373 case PM_INTERPOLATED_STRING_NODE:
1374 case PM_RATIONAL_NODE:
1375 case PM_REGULAR_EXPRESSION_NODE:
1376 case PM_SOURCE_ENCODING_NODE:
1377 case PM_SOURCE_FILE_NODE:
1378 case PM_SOURCE_LINE_NODE:
1379 case PM_STRING_NODE:
1380 case PM_SYMBOL_NODE:
1381 type = "a literal";
1382 length = 9;
1383 break;
1384 case PM_NIL_NODE:
1385 type = "nil";
1386 length = 3;
1387 break;
1388 case PM_RANGE_NODE: {
1389 const pm_range_node_t *cast = (const pm_range_node_t *) node;
1390
1391 if (PM_NODE_FLAG_P(cast, PM_RANGE_FLAGS_EXCLUDE_END)) {
1392 type = "...";
1393 length = 3;
1394 } else {
1395 type = "..";
1396 length = 2;
1397 }
1398
1399 break;
1400 }
1401 case PM_SELF_NODE:
1402 type = "self";
1403 length = 4;
1404 break;
1405 case PM_TRUE_NODE:
1406 type = "true";
1407 length = 4;
1408 break;
1409 default:
1410 break;
1411 }
1412
1413 if (type != NULL) {
1414 PM_PARSER_WARN_NODE_FORMAT(parser, node, PM_WARN_VOID_STATEMENT, length, type);
1415 }
1416}
1417
1422static void
1423pm_void_statements_check(pm_parser_t *parser, const pm_statements_node_t *node, bool last_value) {
1424 assert(node->body.size > 0);
1425 const size_t size = node->body.size - (last_value ? 1 : 0);
1426 for (size_t index = 0; index < size; index++) {
1427 pm_void_statement_check(parser, node->body.nodes[index]);
1428 }
1429}
1430
1436typedef enum {
1437 PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL,
1438 PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP,
1439 PM_CONDITIONAL_PREDICATE_TYPE_NOT
1440} pm_conditional_predicate_type_t;
1441
1445static void
1446pm_parser_warn_conditional_predicate_literal(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type, pm_diagnostic_id_t diag_id, const char *prefix) {
1447 switch (type) {
1448 case PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL:
1449 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "condition");
1450 break;
1451 case PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP:
1452 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "flip-flop");
1453 break;
1454 case PM_CONDITIONAL_PREDICATE_TYPE_NOT:
1455 break;
1456 }
1457}
1458
1463static bool
1464pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1465 switch (PM_NODE_TYPE(node)) {
1466 case PM_ARRAY_NODE: {
1467 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1468
1469 const pm_array_node_t *cast = (const pm_array_node_t *) node;
1470 for (size_t index = 0; index < cast->elements.size; index++) {
1471 if (!pm_conditional_predicate_warn_write_literal_p(cast->elements.nodes[index])) return false;
1472 }
1473
1474 return true;
1475 }
1476 case PM_HASH_NODE: {
1477 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1478
1479 const pm_hash_node_t *cast = (const pm_hash_node_t *) node;
1480 for (size_t index = 0; index < cast->elements.size; index++) {
1481 const pm_node_t *element = cast->elements.nodes[index];
1482 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) return false;
1483
1484 const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element;
1485 if (!pm_conditional_predicate_warn_write_literal_p(assoc->key) || !pm_conditional_predicate_warn_write_literal_p(assoc->value)) return false;
1486 }
1487
1488 return true;
1489 }
1490 case PM_FALSE_NODE:
1491 case PM_FLOAT_NODE:
1492 case PM_IMAGINARY_NODE:
1493 case PM_INTEGER_NODE:
1494 case PM_NIL_NODE:
1495 case PM_RATIONAL_NODE:
1496 case PM_REGULAR_EXPRESSION_NODE:
1497 case PM_SOURCE_ENCODING_NODE:
1498 case PM_SOURCE_FILE_NODE:
1499 case PM_SOURCE_LINE_NODE:
1500 case PM_STRING_NODE:
1501 case PM_SYMBOL_NODE:
1502 case PM_TRUE_NODE:
1503 return true;
1504 default:
1505 return false;
1506 }
1507}
1508
1513static inline void
1514pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1515 if (pm_conditional_predicate_warn_write_literal_p(node)) {
1516 pm_parser_warn_node(parser, node, parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1517 }
1518}
1519
1532static void
1533pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type) {
1534 switch (PM_NODE_TYPE(node)) {
1535 case PM_AND_NODE: {
1536 pm_and_node_t *cast = (pm_and_node_t *) node;
1537 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1538 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1539 break;
1540 }
1541 case PM_OR_NODE: {
1542 pm_or_node_t *cast = (pm_or_node_t *) node;
1543 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1544 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1545 break;
1546 }
1547 case PM_PARENTHESES_NODE: {
1549
1550 if ((cast->body != NULL) && PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE)) {
1551 pm_statements_node_t *statements = (pm_statements_node_t *) cast->body;
1552 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1553 }
1554
1555 break;
1556 }
1557 case PM_BEGIN_NODE: {
1558 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1559 if (cast->statements != NULL) {
1560 pm_statements_node_t *statements = cast->statements;
1561 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1562 }
1563 break;
1564 }
1565 case PM_RANGE_NODE: {
1566 pm_range_node_t *cast = (pm_range_node_t *) node;
1567
1568 if (cast->left != NULL) pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1569 if (cast->right != NULL) pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1570
1571 // Here we change the range node into a flip flop node. We can do
1572 // this since the nodes are exactly the same except for the type.
1573 // We're only asserting against the size when we should probably
1574 // assert against the entire layout, but we'll assume tests will
1575 // catch this.
1576 assert(sizeof(pm_range_node_t) == sizeof(pm_flip_flop_node_t));
1577 node->type = PM_FLIP_FLOP_NODE;
1578
1579 break;
1580 }
1581 case PM_REGULAR_EXPRESSION_NODE:
1582 // Here we change the regular expression node into a match last line
1583 // node. We can do this since the nodes are exactly the same except
1584 // for the type.
1586 node->type = PM_MATCH_LAST_LINE_NODE;
1587
1588 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1589 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "regex ");
1590 }
1591
1592 break;
1593 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1594 // Here we change the interpolated regular expression node into an
1595 // interpolated match last line node. We can do this since the nodes
1596 // are exactly the same except for the type.
1598 node->type = PM_INTERPOLATED_MATCH_LAST_LINE_NODE;
1599
1600 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1601 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "regex ");
1602 }
1603
1604 break;
1605 case PM_INTEGER_NODE:
1606 if (type == PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP) {
1607 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1608 pm_parser_warn_node(parser, node, PM_WARN_INTEGER_IN_FLIP_FLOP);
1609 }
1610 } else {
1611 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1612 }
1613 break;
1614 case PM_STRING_NODE:
1615 case PM_SOURCE_FILE_NODE:
1616 case PM_INTERPOLATED_STRING_NODE:
1617 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "string ");
1618 break;
1619 case PM_SYMBOL_NODE:
1620 case PM_INTERPOLATED_SYMBOL_NODE:
1621 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "symbol ");
1622 break;
1623 case PM_SOURCE_LINE_NODE:
1624 case PM_SOURCE_ENCODING_NODE:
1625 case PM_FLOAT_NODE:
1626 case PM_RATIONAL_NODE:
1627 case PM_IMAGINARY_NODE:
1628 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1629 break;
1630 case PM_CLASS_VARIABLE_WRITE_NODE:
1631 pm_conditional_predicate_warn_write_literal(parser, ((pm_class_variable_write_node_t *) node)->value);
1632 break;
1633 case PM_CONSTANT_WRITE_NODE:
1634 pm_conditional_predicate_warn_write_literal(parser, ((pm_constant_write_node_t *) node)->value);
1635 break;
1636 case PM_GLOBAL_VARIABLE_WRITE_NODE:
1637 pm_conditional_predicate_warn_write_literal(parser, ((pm_global_variable_write_node_t *) node)->value);
1638 break;
1639 case PM_INSTANCE_VARIABLE_WRITE_NODE:
1640 pm_conditional_predicate_warn_write_literal(parser, ((pm_instance_variable_write_node_t *) node)->value);
1641 break;
1642 case PM_LOCAL_VARIABLE_WRITE_NODE:
1643 pm_conditional_predicate_warn_write_literal(parser, ((pm_local_variable_write_node_t *) node)->value);
1644 break;
1645 case PM_MULTI_WRITE_NODE:
1646 pm_conditional_predicate_warn_write_literal(parser, ((pm_multi_write_node_t *) node)->value);
1647 break;
1648 default:
1649 break;
1650 }
1651}
1652
1675
1679static inline const pm_location_t *
1680pm_arguments_end(pm_arguments_t *arguments) {
1681 if (arguments->block != NULL) {
1682 uint32_t end = PM_NODE_END(arguments->block);
1683
1684 if (arguments->closing_loc.length > 0) {
1685 uint32_t arguments_end = PM_LOCATION_END(&arguments->closing_loc);
1686 if (arguments_end > end) {
1687 return &arguments->closing_loc;
1688 }
1689 }
1690 return &arguments->block->location;
1691 }
1692 if (arguments->closing_loc.length > 0) {
1693 return &arguments->closing_loc;
1694 }
1695 if (arguments->arguments != NULL) {
1696 return &arguments->arguments->base.location;
1697 }
1698 if (arguments->opening_loc.length > 0) {
1699 return &arguments->opening_loc;
1700 }
1701 return NULL;
1702}
1703
1708static void
1709pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
1710 // First, check that we have arguments and that we don't have a closing
1711 // location for them.
1712 if (arguments->arguments == NULL || arguments->closing_loc.length > 0) {
1713 return;
1714 }
1715
1716 // Next, check that we don't have a single parentheses argument. This would
1717 // look like:
1718 //
1719 // foo (1) {}
1720 //
1721 // In this case, it's actually okay for the block to be attached to the
1722 // call, even though it looks like it's attached to the argument.
1723 if (arguments->arguments->arguments.size == 1 && PM_NODE_TYPE_P(arguments->arguments->arguments.nodes[0], PM_PARENTHESES_NODE)) {
1724 return;
1725 }
1726
1727 // If we didn't hit a case before this check, then at this point we need to
1728 // add a syntax error.
1729 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
1730}
1731
1732/******************************************************************************/
1733/* Basic character checks */
1734/******************************************************************************/
1735
1742static inline size_t
1743char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1744 if (n <= 0) return 0;
1745
1746 if (parser->encoding_changed) {
1747 size_t width;
1748
1749 if ((width = parser->encoding->alpha_char(b, n)) != 0) {
1750 return width;
1751 } else if (*b == '_') {
1752 return 1;
1753 } else if (*b >= 0x80) {
1754 return parser->encoding->char_width(b, n);
1755 } else {
1756 return 0;
1757 }
1758 } else if (*b < 0x80) {
1759 return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
1760 } else {
1761 return pm_encoding_utf_8_char_width(b, n);
1762 }
1763}
1764
1769static inline size_t
1770char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
1771 if (n <= 0) {
1772 return 0;
1773 } else if (*b < 0x80) {
1774 return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
1775 } else {
1776 return pm_encoding_utf_8_char_width(b, n);
1777 }
1778}
1779
1785static inline size_t
1786char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1787 if (n <= 0) {
1788 return 0;
1789 } else if (parser->encoding_changed) {
1790 size_t width;
1791
1792 if ((width = parser->encoding->alnum_char(b, n)) != 0) {
1793 return width;
1794 } else if (*b == '_') {
1795 return 1;
1796 } else if (*b >= 0x80) {
1797 return parser->encoding->char_width(b, n);
1798 } else {
1799 return 0;
1800 }
1801 } else {
1802 return char_is_identifier_utf8(b, n);
1803 }
1804}
1805
1806// Here we're defining a perfect hash for the characters that are allowed in
1807// global names. This is used to quickly check the next character after a $ to
1808// see if it's a valid character for a global name.
1809#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
1810#define PUNCT(idx) ( \
1811 BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
1812 BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
1813 BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
1814 BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
1815 BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
1816 BIT('0', idx))
1817
1818const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
1819
1820#undef BIT
1821#undef PUNCT
1822
1823static inline bool
1824char_is_global_name_punctuation(const uint8_t b) {
1825 const unsigned int i = (const unsigned int) b;
1826 if (i <= 0x20 || 0x7e < i) return false;
1827
1828 return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
1829}
1830
1831static inline bool
1832token_is_setter_name(pm_token_t *token) {
1833 return (
1834 (token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
1835 ((token->type == PM_TOKEN_IDENTIFIER) &&
1836 (token->end - token->start >= 2) &&
1837 (token->end[-1] == '='))
1838 );
1839}
1840
1844static bool
1845pm_local_is_keyword(const char *source, size_t length) {
1846#define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
1847
1848 switch (length) {
1849 case 2:
1850 switch (source[0]) {
1851 case 'd': KEYWORD("do"); return false;
1852 case 'i': KEYWORD("if"); KEYWORD("in"); return false;
1853 case 'o': KEYWORD("or"); return false;
1854 default: return false;
1855 }
1856 case 3:
1857 switch (source[0]) {
1858 case 'a': KEYWORD("and"); return false;
1859 case 'd': KEYWORD("def"); return false;
1860 case 'e': KEYWORD("end"); return false;
1861 case 'f': KEYWORD("for"); return false;
1862 case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
1863 default: return false;
1864 }
1865 case 4:
1866 switch (source[0]) {
1867 case 'c': KEYWORD("case"); return false;
1868 case 'e': KEYWORD("else"); return false;
1869 case 'n': KEYWORD("next"); return false;
1870 case 'r': KEYWORD("redo"); return false;
1871 case 's': KEYWORD("self"); return false;
1872 case 't': KEYWORD("then"); KEYWORD("true"); return false;
1873 case 'w': KEYWORD("when"); return false;
1874 default: return false;
1875 }
1876 case 5:
1877 switch (source[0]) {
1878 case 'a': KEYWORD("alias"); return false;
1879 case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
1880 case 'c': KEYWORD("class"); return false;
1881 case 'e': KEYWORD("elsif"); return false;
1882 case 'f': KEYWORD("false"); return false;
1883 case 'r': KEYWORD("retry"); return false;
1884 case 's': KEYWORD("super"); return false;
1885 case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
1886 case 'w': KEYWORD("while"); return false;
1887 case 'y': KEYWORD("yield"); return false;
1888 default: return false;
1889 }
1890 case 6:
1891 switch (source[0]) {
1892 case 'e': KEYWORD("ensure"); return false;
1893 case 'm': KEYWORD("module"); return false;
1894 case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
1895 case 'u': KEYWORD("unless"); return false;
1896 default: return false;
1897 }
1898 case 8:
1899 KEYWORD("__LINE__");
1900 KEYWORD("__FILE__");
1901 return false;
1902 case 12:
1903 KEYWORD("__ENCODING__");
1904 return false;
1905 default:
1906 return false;
1907 }
1908
1909#undef KEYWORD
1910}
1911
1912/******************************************************************************/
1913/* Node flag handling functions */
1914/******************************************************************************/
1915
1919static inline void
1920pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
1921 node->flags |= flag;
1922}
1923
1927static inline void
1928pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
1929 node->flags &= (pm_node_flags_t) ~flag;
1930}
1931
1935static inline void
1936pm_node_flag_set_repeated_parameter(pm_node_t *node) {
1937 assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
1938 PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
1939 PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
1940 PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
1941 PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
1942 PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
1943 PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
1944 PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
1945
1946 pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
1947}
1948
1949/******************************************************************************/
1950/* Node creation functions */
1951/******************************************************************************/
1952
1958#define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
1959
1963static inline pm_node_flags_t
1964pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
1965 pm_node_flags_t flags = 0;
1966
1967 if (closing->type == PM_TOKEN_REGEXP_END) {
1968 pm_buffer_t unknown_flags = { 0 };
1969
1970 for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
1971 switch (*flag) {
1972 case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
1973 case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
1974 case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
1975 case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
1976
1977 case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
1978 case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
1979 case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
1980 case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
1981
1982 default: pm_buffer_append_byte(&unknown_flags, *flag);
1983 }
1984 }
1985
1986 size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
1987 if (unknown_flags_length != 0) {
1988 const char *word = unknown_flags_length >= 2 ? "options" : "option";
1989 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
1990 }
1991 pm_buffer_free(&unknown_flags);
1992 }
1993
1994 return flags;
1995}
1996
1997#undef PM_REGULAR_EXPRESSION_ENCODING_MASK
1998
1999static pm_statements_node_t *
2000pm_statements_node_create(pm_parser_t *parser);
2001
2002static void
2003pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline);
2004
2005static size_t
2006pm_statements_node_body_length(pm_statements_node_t *node);
2007
2012static inline void
2013pm_integer_arena_move(pm_arena_t *arena, pm_integer_t *integer) {
2014 if (integer->values != NULL) {
2015 size_t byte_size = integer->length * sizeof(uint32_t);
2016 uint32_t *old_values = integer->values;
2017 integer->values = (uint32_t *) pm_arena_memdup(arena, old_values, byte_size, PRISM_ALIGNOF(uint32_t));
2018 xfree(old_values);
2019 }
2020}
2021
2025static pm_missing_node_t *
2026pm_missing_node_create(pm_parser_t *parser, uint32_t start, uint32_t length) {
2027 return pm_missing_node_new(
2028 parser->arena,
2029 ++parser->node_id,
2030 0,
2031 ((pm_location_t) { .start = start, .length = length })
2032 );
2033}
2034
2038static pm_alias_global_variable_node_t *
2039pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
2040 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
2041
2042 return pm_alias_global_variable_node_new(
2043 parser->arena,
2044 ++parser->node_id,
2045 0,
2046 PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, old_name),
2047 new_name,
2048 old_name,
2049 TOK2LOC(parser, keyword)
2050 );
2051}
2052
2056static pm_alias_method_node_t *
2057pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
2058 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
2059
2060 return pm_alias_method_node_new(
2061 parser->arena,
2062 ++parser->node_id,
2063 0,
2064 PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, old_name),
2065 new_name,
2066 old_name,
2067 TOK2LOC(parser, keyword)
2068 );
2069}
2070
2074static pm_alternation_pattern_node_t *
2075pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
2076 return pm_alternation_pattern_node_new(
2077 parser->arena,
2078 ++parser->node_id,
2079 0,
2080 PM_LOCATION_INIT_NODES(left, right),
2081 left,
2082 right,
2083 TOK2LOC(parser, operator)
2084 );
2085}
2086
2090static pm_and_node_t *
2091pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
2092 pm_assert_value_expression(parser, left);
2093
2094 return pm_and_node_new(
2095 parser->arena,
2096 ++parser->node_id,
2097 0,
2098 PM_LOCATION_INIT_NODES(left, right),
2099 left,
2100 right,
2101 TOK2LOC(parser, operator)
2102 );
2103}
2104
2108static pm_arguments_node_t *
2109pm_arguments_node_create(pm_parser_t *parser) {
2110 return pm_arguments_node_new(
2111 parser->arena,
2112 ++parser->node_id,
2113 0,
2114 PM_LOCATION_INIT_UNSET,
2115 ((pm_node_list_t) { 0 })
2116 );
2117}
2118
2122static size_t
2123pm_arguments_node_size(pm_arguments_node_t *node) {
2124 return node->arguments.size;
2125}
2126
2130static void
2131pm_arguments_node_arguments_append(pm_arena_t *arena, pm_arguments_node_t *node, pm_node_t *argument) {
2132 if (pm_arguments_node_size(node) == 0) {
2133 PM_NODE_START_SET_NODE(node, argument);
2134 }
2135
2136 if (PM_NODE_END(node) < PM_NODE_END(argument)) {
2137 PM_NODE_LENGTH_SET_NODE(node, argument);
2138 }
2139
2140 pm_node_list_append(arena, &node->arguments, argument);
2141
2142 if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
2143 if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
2144 pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
2145 } else {
2146 pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
2147 }
2148 }
2149}
2150
2154static pm_array_node_t *
2155pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
2156 if (opening == NULL) {
2157 return pm_array_node_new(
2158 parser->arena,
2159 ++parser->node_id,
2160 PM_NODE_FLAG_STATIC_LITERAL,
2161 PM_LOCATION_INIT_UNSET,
2162 ((pm_node_list_t) { 0 }),
2163 ((pm_location_t) { 0 }),
2164 ((pm_location_t) { 0 })
2165 );
2166 } else {
2167 return pm_array_node_new(
2168 parser->arena,
2169 ++parser->node_id,
2170 PM_NODE_FLAG_STATIC_LITERAL,
2171 PM_LOCATION_INIT_TOKEN(parser, opening),
2172 ((pm_node_list_t) { 0 }),
2173 TOK2LOC(parser, opening),
2174 TOK2LOC(parser, opening)
2175 );
2176 }
2177}
2178
2182static inline void
2183pm_array_node_elements_append(pm_arena_t *arena, pm_array_node_t *node, pm_node_t *element) {
2184 if (!node->elements.size && !node->opening_loc.length) {
2185 PM_NODE_START_SET_NODE(node, element);
2186 }
2187
2188 pm_node_list_append(arena, &node->elements, element);
2189 PM_NODE_LENGTH_SET_NODE(node, element);
2190
2191 // If the element is not a static literal, then the array is not a static
2192 // literal. Turn that flag off.
2193 if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
2194 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
2195 }
2196
2197 if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
2198 pm_node_flag_set(UP(node), PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
2199 }
2200}
2201
2205static void
2206pm_array_node_close_set(const pm_parser_t *parser, pm_array_node_t *node, const pm_token_t *closing) {
2207 assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == 0);
2208 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
2209 node->closing_loc = TOK2LOC(parser, closing);
2210}
2211
2216static pm_array_pattern_node_t *
2217pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
2218 pm_array_pattern_node_t *node = pm_array_pattern_node_new(
2219 parser->arena,
2220 ++parser->node_id,
2221 0,
2222 PM_LOCATION_INIT_NODES(nodes->nodes[0], nodes->nodes[nodes->size - 1]),
2223 NULL,
2224 ((pm_node_list_t) { 0 }),
2225 NULL,
2226 ((pm_node_list_t) { 0 }),
2227 ((pm_location_t) { 0 }),
2228 ((pm_location_t) { 0 })
2229 );
2230
2231 // For now we're going to just copy over each pointer manually. This could be
2232 // much more efficient, as we could instead resize the node list.
2233 bool found_rest = false;
2234 pm_node_t *child;
2235
2236 PM_NODE_LIST_FOREACH(nodes, index, child) {
2237 if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
2238 node->rest = child;
2239 found_rest = true;
2240 } else if (found_rest) {
2241 pm_node_list_append(parser->arena, &node->posts, child);
2242 } else {
2243 pm_node_list_append(parser->arena, &node->requireds, child);
2244 }
2245 }
2246
2247 return node;
2248}
2249
2253static pm_array_pattern_node_t *
2254pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
2255 return pm_array_pattern_node_new(
2256 parser->arena,
2257 ++parser->node_id,
2258 0,
2259 PM_LOCATION_INIT_NODE(rest),
2260 NULL,
2261 ((pm_node_list_t) { 0 }),
2262 rest,
2263 ((pm_node_list_t) { 0 }),
2264 ((pm_location_t) { 0 }),
2265 ((pm_location_t) { 0 })
2266 );
2267}
2268
2273static pm_array_pattern_node_t *
2274pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
2275 return pm_array_pattern_node_new(
2276 parser->arena,
2277 ++parser->node_id,
2278 0,
2279 PM_LOCATION_INIT_NODE_TOKEN(parser, constant, closing),
2280 constant,
2281 ((pm_node_list_t) { 0 }),
2282 NULL,
2283 ((pm_node_list_t) { 0 }),
2284 TOK2LOC(parser, opening),
2285 TOK2LOC(parser, closing)
2286 );
2287}
2288
2293static pm_array_pattern_node_t *
2294pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
2295 return pm_array_pattern_node_new(
2296 parser->arena,
2297 ++parser->node_id,
2298 0,
2299 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
2300 NULL,
2301 ((pm_node_list_t) { 0 }),
2302 NULL,
2303 ((pm_node_list_t) { 0 }),
2304 TOK2LOC(parser, opening),
2305 TOK2LOC(parser, closing)
2306 );
2307}
2308
2309static inline void
2310pm_array_pattern_node_requireds_append(pm_arena_t *arena, pm_array_pattern_node_t *node, pm_node_t *inner) {
2311 pm_node_list_append(arena, &node->requireds, inner);
2312}
2313
2317static pm_assoc_node_t *
2318pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
2319 uint32_t end;
2320
2321 if (value != NULL && PM_NODE_END(value) > PM_NODE_END(key)) {
2322 end = PM_NODE_END(value);
2323 } else if (operator != NULL) {
2324 end = PM_TOKEN_END(parser, operator);
2325 } else {
2326 end = PM_NODE_END(key);
2327 }
2328
2329 // Hash string keys will be frozen, so we can mark them as frozen here so
2330 // that the compiler picks them up and also when we check for static literal
2331 // on the keys it gets factored in.
2332 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
2333 key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
2334 }
2335
2336 // If the key and value of this assoc node are both static literals, then
2337 // we can mark this node as a static literal.
2338 pm_node_flags_t flags = 0;
2339 if (
2340 !PM_NODE_TYPE_P(key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(key, PM_HASH_NODE) && !PM_NODE_TYPE_P(key, PM_RANGE_NODE) &&
2341 value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)
2342 ) {
2343 flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
2344 }
2345
2346 return pm_assoc_node_new(
2347 parser->arena,
2348 ++parser->node_id,
2349 flags,
2350 ((pm_location_t) { .start = PM_NODE_START(key), .length = U32(end - PM_NODE_START(key)) }),
2351 key,
2352 value,
2353 NTOK2LOC(parser, operator)
2354 );
2355}
2356
2360static pm_assoc_splat_node_t *
2361pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
2362 assert(operator->type == PM_TOKEN_USTAR_STAR);
2363
2364 return pm_assoc_splat_node_new(
2365 parser->arena,
2366 ++parser->node_id,
2367 0,
2368 (value == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, value),
2369 value,
2370 TOK2LOC(parser, operator)
2371 );
2372}
2373
2377static pm_back_reference_read_node_t *
2378pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
2379 assert(name->type == PM_TOKEN_BACK_REFERENCE);
2380
2381 return pm_back_reference_read_node_new(
2382 parser->arena,
2383 ++parser->node_id,
2384 0,
2385 PM_LOCATION_INIT_TOKEN(parser, name),
2386 pm_parser_constant_id_token(parser, name)
2387 );
2388}
2389
2393static pm_begin_node_t *
2394pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
2395 uint32_t start = begin_keyword == NULL ? 0 : PM_TOKEN_START(parser, begin_keyword);
2396 uint32_t end = statements == NULL ? (begin_keyword == NULL ? 0 : PM_TOKEN_END(parser, begin_keyword)) : PM_NODE_END(statements);
2397
2398 return pm_begin_node_new(
2399 parser->arena,
2400 ++parser->node_id,
2401 0,
2402 ((pm_location_t) { .start = start, .length = U32(end - start) }),
2403 NTOK2LOC(parser, begin_keyword),
2404 statements,
2405 NULL,
2406 NULL,
2407 NULL,
2408 ((pm_location_t) { 0 })
2409 );
2410}
2411
2415static void
2416pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
2417 if (node->begin_keyword_loc.length == 0) {
2418 PM_NODE_START_SET_NODE(node, rescue_clause);
2419 }
2420 PM_NODE_LENGTH_SET_NODE(node, rescue_clause);
2421 node->rescue_clause = rescue_clause;
2422}
2423
2427static void
2428pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
2429 if ((node->begin_keyword_loc.length == 0) && PM_NODE_START(node) == 0) {
2430 PM_NODE_START_SET_NODE(node, else_clause);
2431 }
2432 PM_NODE_LENGTH_SET_NODE(node, else_clause);
2433 node->else_clause = else_clause;
2434}
2435
2439static void
2440pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
2441 if ((node->begin_keyword_loc.length == 0) && PM_NODE_START(node) == 0) {
2442 PM_NODE_START_SET_NODE(node, ensure_clause);
2443 }
2444 PM_NODE_LENGTH_SET_NODE(node, ensure_clause);
2445 node->ensure_clause = ensure_clause;
2446}
2447
2451static void
2452pm_begin_node_end_keyword_set(const pm_parser_t *parser, pm_begin_node_t *node, const pm_token_t *end_keyword) {
2453 assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == 0);
2454 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
2455 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
2456}
2457
2461static pm_block_argument_node_t *
2462pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
2463 assert(operator->type == PM_TOKEN_UAMPERSAND);
2464
2465 return pm_block_argument_node_new(
2466 parser->arena,
2467 ++parser->node_id,
2468 0,
2469 (expression == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, expression),
2470 expression,
2471 TOK2LOC(parser, operator)
2472 );
2473}
2474
2478static pm_block_node_t *
2479pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
2480 return pm_block_node_new(
2481 parser->arena,
2482 ++parser->node_id,
2483 0,
2484 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
2485 *locals,
2486 parameters,
2487 body,
2488 TOK2LOC(parser, opening),
2489 TOK2LOC(parser, closing)
2490 );
2491}
2492
2496static pm_block_parameter_node_t *
2497pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
2498 assert(operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
2499
2500 return pm_block_parameter_node_new(
2501 parser->arena,
2502 ++parser->node_id,
2503 0,
2504 (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
2505 name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
2506 NTOK2LOC(parser, name),
2507 TOK2LOC(parser, operator)
2508 );
2509}
2510
2514static pm_block_parameters_node_t *
2515pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
2516 uint32_t start;
2517 if (opening != NULL) {
2518 start = PM_TOKEN_START(parser, opening);
2519 } else if (parameters != NULL) {
2520 start = PM_NODE_START(parameters);
2521 } else {
2522 start = 0;
2523 }
2524
2525 uint32_t end;
2526 if (parameters != NULL) {
2527 end = PM_NODE_END(parameters);
2528 } else if (opening != NULL) {
2529 end = PM_TOKEN_END(parser, opening);
2530 } else {
2531 end = 0;
2532 }
2533
2534 return pm_block_parameters_node_new(
2535 parser->arena,
2536 ++parser->node_id,
2537 0,
2538 ((pm_location_t) { .start = start, .length = U32(end - start) }),
2539 parameters,
2540 ((pm_node_list_t) { 0 }),
2541 NTOK2LOC(parser, opening),
2542 ((pm_location_t) { 0 })
2543 );
2544}
2545
2549static void
2550pm_block_parameters_node_closing_set(const pm_parser_t *parser, pm_block_parameters_node_t *node, const pm_token_t *closing) {
2551 assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == 0);
2552 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
2553 node->closing_loc = TOK2LOC(parser, closing);
2554}
2555
2559static pm_block_local_variable_node_t *
2560pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
2561 return pm_block_local_variable_node_new(
2562 parser->arena,
2563 ++parser->node_id,
2564 0,
2565 PM_LOCATION_INIT_TOKEN(parser, name),
2566 pm_parser_constant_id_token(parser, name)
2567 );
2568}
2569
2573static void
2574pm_block_parameters_node_append_local(pm_arena_t *arena, pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
2575 pm_node_list_append(arena, &node->locals, UP(local));
2576
2577 if (PM_NODE_LENGTH(node) == 0) {
2578 PM_NODE_START_SET_NODE(node, local);
2579 }
2580
2581 PM_NODE_LENGTH_SET_NODE(node, local);
2582}
2583
2587static pm_break_node_t *
2588pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
2589 assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
2590
2591 return pm_break_node_new(
2592 parser->arena,
2593 ++parser->node_id,
2594 0,
2595 (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
2596 arguments,
2597 TOK2LOC(parser, keyword)
2598 );
2599}
2600
2601// There are certain flags that we want to use internally but don't want to
2602// expose because they are not relevant beyond parsing. Therefore we'll define
2603// them here and not define them in config.yml/a header file.
2604static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = (1 << 2);
2605
2606static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = ((PM_CALL_NODE_FLAGS_LAST - 1) << 1);
2607static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = ((PM_CALL_NODE_FLAGS_LAST - 1) << 2);
2608static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = ((PM_CALL_NODE_FLAGS_LAST - 1) << 3);
2609
2615static pm_call_node_t *
2616pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
2617 return pm_call_node_new(
2618 parser->arena,
2619 ++parser->node_id,
2620 flags,
2621 PM_LOCATION_INIT_UNSET,
2622 NULL,
2623 ((pm_location_t) { 0 }),
2624 0,
2625 ((pm_location_t) { 0 }),
2626 ((pm_location_t) { 0 }),
2627 NULL,
2628 ((pm_location_t) { 0 }),
2629 ((pm_location_t) { 0 }),
2630 NULL
2631 );
2632}
2633
2638static inline pm_node_flags_t
2639pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
2640 return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
2641}
2642
2647static pm_call_node_t *
2648pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
2649 pm_assert_value_expression(parser, receiver);
2650
2651 pm_node_flags_t flags = pm_call_node_ignore_visibility_flag(receiver);
2652 if (arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_ARGUMENT_NODE)) {
2653 flags |= PM_CALL_NODE_FLAGS_INDEX;
2654 }
2655
2656 pm_call_node_t *node = pm_call_node_create(parser, flags);
2657
2658 PM_NODE_START_SET_NODE(node, receiver);
2659
2660 const pm_location_t *end = pm_arguments_end(arguments);
2661 assert(end != NULL && "unreachable");
2662 PM_NODE_LENGTH_SET_LOCATION(node, end);
2663
2664 node->receiver = receiver;
2665 node->message_loc.start = arguments->opening_loc.start;
2666 node->message_loc.length = (arguments->closing_loc.start + arguments->closing_loc.length) - arguments->opening_loc.start;
2667
2668 node->opening_loc = arguments->opening_loc;
2669 node->arguments = arguments->arguments;
2670 node->closing_loc = arguments->closing_loc;
2671 node->block = arguments->block;
2672
2673 node->name = pm_parser_constant_id_constant(parser, "[]", 2);
2674 return node;
2675}
2676
2680static pm_call_node_t *
2681pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument, pm_node_flags_t flags) {
2682 pm_assert_value_expression(parser, receiver);
2683 pm_assert_value_expression(parser, argument);
2684
2685 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
2686
2687 PM_NODE_START_SET_NODE(node, PM_NODE_START(receiver) < PM_NODE_START(argument) ? receiver : argument);
2688 PM_NODE_LENGTH_SET_NODE(node, PM_NODE_END(receiver) > PM_NODE_END(argument) ? receiver : argument);
2689
2690 node->receiver = receiver;
2691 node->message_loc = TOK2LOC(parser, operator);
2692
2693 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
2694 pm_arguments_node_arguments_append(parser->arena, arguments, argument);
2695 node->arguments = arguments;
2696
2697 node->name = pm_parser_constant_id_token(parser, operator);
2698 return node;
2699}
2700
2701static const uint8_t * parse_operator_symbol_name(const pm_token_t *);
2702
2706static pm_call_node_t *
2707pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
2708 pm_assert_value_expression(parser, receiver);
2709
2710 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2711
2712 PM_NODE_START_SET_NODE(node, receiver);
2713 const pm_location_t *end = pm_arguments_end(arguments);
2714 if (end == NULL) {
2715 PM_NODE_LENGTH_SET_TOKEN(parser, node, message);
2716 } else {
2717 PM_NODE_LENGTH_SET_LOCATION(node, end);
2718 }
2719
2720 node->receiver = receiver;
2721 node->call_operator_loc = TOK2LOC(parser, operator);
2722 node->message_loc = TOK2LOC(parser, message);
2723 node->opening_loc = arguments->opening_loc;
2724 node->arguments = arguments->arguments;
2725 node->closing_loc = arguments->closing_loc;
2726 node->block = arguments->block;
2727
2728 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2729 pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2730 }
2731
2736 node->name = pm_parser_constant_id_raw(parser, message->start, parse_operator_symbol_name(message));
2737 return node;
2738}
2739
2743static pm_call_node_t *
2744pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
2745 pm_call_node_t *node = pm_call_node_create(parser, 0);
2746 node->base.location = (pm_location_t) { .start = 0, .length = U32(parser->end - parser->start) };
2747
2748 node->receiver = receiver;
2749 node->arguments = arguments;
2750
2751 node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
2752 return node;
2753}
2754
2759static pm_call_node_t *
2760pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
2761 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2762
2763 PM_NODE_START_SET_TOKEN(parser, node, message);
2764 const pm_location_t *end = pm_arguments_end(arguments);
2765 assert(end != NULL && "unreachable");
2766 PM_NODE_LENGTH_SET_LOCATION(node, end);
2767
2768 node->message_loc = TOK2LOC(parser, message);
2769 node->opening_loc = arguments->opening_loc;
2770 node->arguments = arguments->arguments;
2771 node->closing_loc = arguments->closing_loc;
2772 node->block = arguments->block;
2773
2774 node->name = pm_parser_constant_id_token(parser, message);
2775 return node;
2776}
2777
2782static pm_call_node_t *
2783pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
2784 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2785
2786 node->base.location = (pm_location_t) { 0 };
2787 node->arguments = arguments;
2788
2789 node->name = name;
2790 return node;
2791}
2792
2796static pm_call_node_t *
2797pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
2798 pm_assert_value_expression(parser, receiver);
2799 if (receiver != NULL) pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
2800
2801 pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
2802
2803 PM_NODE_START_SET_TOKEN(parser, node, message);
2804 if (arguments->closing_loc.length > 0) {
2805 PM_NODE_LENGTH_SET_LOCATION(node, &arguments->closing_loc);
2806 } else {
2807 assert(receiver != NULL);
2808 PM_NODE_LENGTH_SET_NODE(node, receiver);
2809 }
2810
2811 node->receiver = receiver;
2812 node->message_loc = TOK2LOC(parser, message);
2813 node->opening_loc = arguments->opening_loc;
2814 node->arguments = arguments->arguments;
2815 node->closing_loc = arguments->closing_loc;
2816
2817 node->name = pm_parser_constant_id_constant(parser, "!", 1);
2818 return node;
2819}
2820
2824static pm_call_node_t *
2825pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
2826 pm_assert_value_expression(parser, receiver);
2827
2828 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2829
2830 PM_NODE_START_SET_NODE(node, receiver);
2831 const pm_location_t *end = pm_arguments_end(arguments);
2832 assert(end != NULL && "unreachable");
2833 PM_NODE_LENGTH_SET_LOCATION(node, end);
2834
2835 node->receiver = receiver;
2836 node->call_operator_loc = TOK2LOC(parser, operator);
2837 node->opening_loc = arguments->opening_loc;
2838 node->arguments = arguments->arguments;
2839 node->closing_loc = arguments->closing_loc;
2840 node->block = arguments->block;
2841
2842 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2843 pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2844 }
2845
2846 node->name = pm_parser_constant_id_constant(parser, "call", 4);
2847 return node;
2848}
2849
2853static pm_call_node_t *
2854pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
2855 pm_assert_value_expression(parser, receiver);
2856
2857 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2858
2859 PM_NODE_START_SET_TOKEN(parser, node, operator);
2860 PM_NODE_LENGTH_SET_NODE(node, receiver);
2861
2862 node->receiver = receiver;
2863 node->message_loc = TOK2LOC(parser, operator);
2864
2865 node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
2866 return node;
2867}
2868
2873static pm_call_node_t *
2874pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
2875 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2876
2877 node->base.location = TOK2LOC(parser, message);
2878 node->message_loc = TOK2LOC(parser, message);
2879
2880 node->name = pm_parser_constant_id_token(parser, message);
2881 return node;
2882}
2883
2888static inline bool
2889pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
2890 return (
2891 (node->message_loc.length > 0) &&
2892 (parser->start[node->message_loc.start + node->message_loc.length - 1] != '!') &&
2893 (parser->start[node->message_loc.start + node->message_loc.length - 1] != '?') &&
2894 char_is_identifier_start(parser, parser->start + node->message_loc.start, (ptrdiff_t) node->message_loc.length) &&
2895 (node->opening_loc.length == 0) &&
2896 (node->arguments == NULL) &&
2897 (node->block == NULL)
2898 );
2899}
2900
2904static void
2905pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
2906 pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
2907
2908 if (write_constant->length > 0) {
2909 size_t length = write_constant->length - 1;
2910
2911 void *memory = xmalloc(length);
2912 memcpy(memory, write_constant->start, length);
2913
2914 *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
2915 } else {
2916 // We can get here if the message was missing because of a syntax error.
2917 *read_name = pm_parser_constant_id_constant(parser, "", 0);
2918 }
2919}
2920
2924static pm_call_and_write_node_t *
2925pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2926 assert(target->block == NULL);
2927 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2928
2929 pm_call_and_write_node_t *node = pm_call_and_write_node_new(
2930 parser->arena,
2931 ++parser->node_id,
2932 FL(target),
2933 PM_LOCATION_INIT_NODES(target, value),
2934 target->receiver,
2935 target->call_operator_loc,
2936 target->message_loc,
2937 0,
2938 target->name,
2939 TOK2LOC(parser, operator),
2940 value
2941 );
2942
2943 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
2944
2945 // The target is no longer necessary because we've reused its children.
2946 // It is arena-allocated so no explicit free is needed.
2947
2948 return node;
2949}
2950
2955static void
2956pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
2957 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
2958 if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
2959 pm_node_t *node;
2960 PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
2961 if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
2962 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
2963 break;
2964 }
2965 }
2966 }
2967
2968 if (block != NULL) {
2969 pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
2970 }
2971 }
2972}
2973
2977static pm_index_and_write_node_t *
2978pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2979 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2980
2981 pm_index_arguments_check(parser, target->arguments, target->block);
2982
2983 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
2984
2985 pm_index_and_write_node_t *node = pm_index_and_write_node_new(
2986 parser->arena,
2987 ++parser->node_id,
2988 FL(target),
2989 PM_LOCATION_INIT_NODES(target, value),
2990 target->receiver,
2991 target->call_operator_loc,
2992 target->opening_loc,
2993 target->arguments,
2994 target->closing_loc,
2995 (pm_block_argument_node_t *) target->block,
2996 TOK2LOC(parser, operator),
2997 value
2998 );
2999
3000 // The target is no longer necessary because we've reused its children.
3001 // It is arena-allocated so no explicit free is needed.
3002
3003 return node;
3004}
3005
3009static pm_call_operator_write_node_t *
3010pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3011 assert(target->block == NULL);
3012
3013 pm_call_operator_write_node_t *node = pm_call_operator_write_node_new(
3014 parser->arena,
3015 ++parser->node_id,
3016 FL(target),
3017 PM_LOCATION_INIT_NODES(target, value),
3018 target->receiver,
3019 target->call_operator_loc,
3020 target->message_loc,
3021 0,
3022 target->name,
3023 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
3024 TOK2LOC(parser, operator),
3025 value
3026 );
3027
3028 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3029
3030 // The target is no longer necessary because we've reused its children.
3031 // It is arena-allocated so no explicit free is needed.
3032
3033 return node;
3034}
3035
3039static pm_index_operator_write_node_t *
3040pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3041 pm_index_arguments_check(parser, target->arguments, target->block);
3042
3043 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3044
3045 pm_index_operator_write_node_t *node = pm_index_operator_write_node_new(
3046 parser->arena,
3047 ++parser->node_id,
3048 FL(target),
3049 PM_LOCATION_INIT_NODES(target, value),
3050 target->receiver,
3051 target->call_operator_loc,
3052 target->opening_loc,
3053 target->arguments,
3054 target->closing_loc,
3055 (pm_block_argument_node_t *) target->block,
3056 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
3057 TOK2LOC(parser, operator),
3058 value
3059 );
3060
3061 // The target is no longer necessary because we've reused its children.
3062 // It is arena-allocated so no explicit free is needed.
3063
3064 return node;
3065}
3066
3070static pm_call_or_write_node_t *
3071pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3072 assert(target->block == NULL);
3073 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3074
3075 pm_call_or_write_node_t *node = pm_call_or_write_node_new(
3076 parser->arena,
3077 ++parser->node_id,
3078 FL(target),
3079 PM_LOCATION_INIT_NODES(target, value),
3080 target->receiver,
3081 target->call_operator_loc,
3082 target->message_loc,
3083 0,
3084 target->name,
3085 TOK2LOC(parser, operator),
3086 value
3087 );
3088
3089 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3090
3091 // The target is no longer necessary because we've reused its children.
3092 // It is arena-allocated so no explicit free is needed.
3093
3094 return node;
3095}
3096
3100static pm_index_or_write_node_t *
3101pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3102 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3103
3104 pm_index_arguments_check(parser, target->arguments, target->block);
3105
3106 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3107
3108 pm_index_or_write_node_t *node = pm_index_or_write_node_new(
3109 parser->arena,
3110 ++parser->node_id,
3111 FL(target),
3112 PM_LOCATION_INIT_NODES(target, value),
3113 target->receiver,
3114 target->call_operator_loc,
3115 target->opening_loc,
3116 target->arguments,
3117 target->closing_loc,
3118 (pm_block_argument_node_t *) target->block,
3119 TOK2LOC(parser, operator),
3120 value
3121 );
3122
3123 // The target is no longer necessary because we've reused its children.
3124 // It is arena-allocated so no explicit free is needed.
3125
3126 return node;
3127}
3128
3133static pm_call_target_node_t *
3134pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3135 pm_call_target_node_t *node = pm_call_target_node_new(
3136 parser->arena,
3137 ++parser->node_id,
3138 FL(target),
3139 PM_LOCATION_INIT_NODE(target),
3140 target->receiver,
3141 target->call_operator_loc,
3142 target->name,
3143 target->message_loc
3144 );
3145
3146 /* It is possible to get here where we have parsed an invalid syntax tree
3147 * where the call operator was not present. In that case we will have a
3148 * problem because it is a required location. In this case we need to fill
3149 * it in with a fake location so that the syntax tree remains valid. */
3150 if (node->call_operator_loc.length == 0) {
3151 node->call_operator_loc = target->base.location;
3152 }
3153
3154 // The target is no longer necessary because we've reused its children.
3155 // It is arena-allocated so no explicit free is needed.
3156
3157 return node;
3158}
3159
3164static pm_index_target_node_t *
3165pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3166 pm_index_arguments_check(parser, target->arguments, target->block);
3167 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3168
3169 pm_index_target_node_t *node = pm_index_target_node_new(
3170 parser->arena,
3171 ++parser->node_id,
3172 FL(target) | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
3173 PM_LOCATION_INIT_NODE(target),
3174 target->receiver,
3175 target->opening_loc,
3176 target->arguments,
3177 target->closing_loc,
3178 (pm_block_argument_node_t *) target->block
3179 );
3180
3181 // The target is no longer necessary because we've reused its children.
3182 // It is arena-allocated so no explicit free is needed.
3183
3184 return node;
3185}
3186
3190static pm_capture_pattern_node_t *
3191pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
3192 return pm_capture_pattern_node_new(
3193 parser->arena,
3194 ++parser->node_id,
3195 0,
3196 PM_LOCATION_INIT_NODES(value, target),
3197 value,
3198 target,
3199 TOK2LOC(parser, operator)
3200 );
3201}
3202
3206static pm_case_node_t *
3207pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3208 return pm_case_node_new(
3209 parser->arena,
3210 ++parser->node_id,
3211 0,
3212 PM_LOCATION_INIT_TOKENS(parser, case_keyword, end_keyword == NULL ? case_keyword : end_keyword),
3213 predicate,
3214 ((pm_node_list_t) { 0 }),
3215 NULL,
3216 TOK2LOC(parser, case_keyword),
3217 NTOK2LOC(parser, end_keyword)
3218 );
3219}
3220
3224static void
3225pm_case_node_condition_append(pm_arena_t *arena, pm_case_node_t *node, pm_node_t *condition) {
3226 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
3227
3228 pm_node_list_append(arena, &node->conditions, condition);
3229 PM_NODE_LENGTH_SET_NODE(node, condition);
3230}
3231
3235static void
3236pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
3237 node->else_clause = else_clause;
3238 PM_NODE_LENGTH_SET_NODE(node, else_clause);
3239}
3240
3244static void
3245pm_case_node_end_keyword_loc_set(const pm_parser_t *parser, pm_case_node_t *node, const pm_token_t *end_keyword) {
3246 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
3247 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
3248}
3249
3253static pm_case_match_node_t *
3254pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate) {
3255 return pm_case_match_node_new(
3256 parser->arena,
3257 ++parser->node_id,
3258 0,
3259 PM_LOCATION_INIT_TOKEN(parser, case_keyword),
3260 predicate,
3261 ((pm_node_list_t) { 0 }),
3262 NULL,
3263 TOK2LOC(parser, case_keyword),
3264 ((pm_location_t) { 0 })
3265 );
3266}
3267
3271static void
3272pm_case_match_node_condition_append(pm_arena_t *arena, pm_case_match_node_t *node, pm_node_t *condition) {
3273 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
3274
3275 pm_node_list_append(arena, &node->conditions, condition);
3276 PM_NODE_LENGTH_SET_NODE(node, condition);
3277}
3278
3282static void
3283pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
3284 node->else_clause = else_clause;
3285 PM_NODE_LENGTH_SET_NODE(node, else_clause);
3286}
3287
3291static void
3292pm_case_match_node_end_keyword_loc_set(const pm_parser_t *parser, pm_case_match_node_t *node, const pm_token_t *end_keyword) {
3293 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
3294 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
3295}
3296
3300static pm_class_node_t *
3301pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
3302 return pm_class_node_new(
3303 parser->arena,
3304 ++parser->node_id,
3305 0,
3306 PM_LOCATION_INIT_TOKENS(parser, class_keyword, end_keyword),
3307 *locals,
3308 TOK2LOC(parser, class_keyword),
3309 constant_path,
3310 NTOK2LOC(parser, inheritance_operator),
3311 superclass,
3312 body,
3313 TOK2LOC(parser, end_keyword),
3314 pm_parser_constant_id_token(parser, name)
3315 );
3316}
3317
3321static pm_class_variable_and_write_node_t *
3322pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3323 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3324
3325 return pm_class_variable_and_write_node_new(
3326 parser->arena,
3327 ++parser->node_id,
3328 0,
3329 PM_LOCATION_INIT_NODES(target, value),
3330 target->name,
3331 target->base.location,
3332 TOK2LOC(parser, operator),
3333 value
3334 );
3335}
3336
3340static pm_class_variable_operator_write_node_t *
3341pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3342 return pm_class_variable_operator_write_node_new(
3343 parser->arena,
3344 ++parser->node_id,
3345 0,
3346 PM_LOCATION_INIT_NODES(target, value),
3347 target->name,
3348 target->base.location,
3349 TOK2LOC(parser, operator),
3350 value,
3351 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
3352 );
3353}
3354
3358static pm_class_variable_or_write_node_t *
3359pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3360 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3361
3362 return pm_class_variable_or_write_node_new(
3363 parser->arena,
3364 ++parser->node_id,
3365 0,
3366 PM_LOCATION_INIT_NODES(target, value),
3367 target->name,
3368 target->base.location,
3369 TOK2LOC(parser, operator),
3370 value
3371 );
3372}
3373
3377static pm_class_variable_read_node_t *
3378pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
3379 assert(token->type == PM_TOKEN_CLASS_VARIABLE);
3380
3381 return pm_class_variable_read_node_new(
3382 parser->arena,
3383 ++parser->node_id,
3384 0,
3385 PM_LOCATION_INIT_TOKEN(parser, token),
3386 pm_parser_constant_id_token(parser, token)
3387 );
3388}
3389
3396static inline pm_node_flags_t
3397pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
3398 if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.length == 0) {
3399 return flags;
3400 }
3401 return 0;
3402}
3403
3407static pm_class_variable_write_node_t *
3408pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
3409 return pm_class_variable_write_node_new(
3410 parser->arena,
3411 ++parser->node_id,
3412 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3413 PM_LOCATION_INIT_NODES(read_node, value),
3414 read_node->name,
3415 read_node->base.location,
3416 value,
3417 TOK2LOC(parser, operator)
3418 );
3419}
3420
3424static pm_constant_path_and_write_node_t *
3425pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3426 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3427
3428 return pm_constant_path_and_write_node_new(
3429 parser->arena,
3430 ++parser->node_id,
3431 0,
3432 PM_LOCATION_INIT_NODES(target, value),
3433 target,
3434 TOK2LOC(parser, operator),
3435 value
3436 );
3437}
3438
3442static pm_constant_path_operator_write_node_t *
3443pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3444 return pm_constant_path_operator_write_node_new(
3445 parser->arena,
3446 ++parser->node_id,
3447 0,
3448 PM_LOCATION_INIT_NODES(target, value),
3449 target,
3450 TOK2LOC(parser, operator),
3451 value,
3452 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
3453 );
3454}
3455
3459static pm_constant_path_or_write_node_t *
3460pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3461 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3462
3463 return pm_constant_path_or_write_node_new(
3464 parser->arena,
3465 ++parser->node_id,
3466 0,
3467 PM_LOCATION_INIT_NODES(target, value),
3468 target,
3469 TOK2LOC(parser, operator),
3470 value
3471 );
3472}
3473
3477static pm_constant_path_node_t *
3478pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3479 pm_assert_value_expression(parser, parent);
3480
3481 pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3482 if (name_token->type == PM_TOKEN_CONSTANT) {
3483 name = pm_parser_constant_id_token(parser, name_token);
3484 }
3485
3486 return pm_constant_path_node_new(
3487 parser->arena,
3488 ++parser->node_id,
3489 0,
3490 (parent == NULL) ? PM_LOCATION_INIT_TOKENS(parser, delimiter, name_token) : PM_LOCATION_INIT_NODE_TOKEN(parser, parent, name_token),
3491 parent,
3492 name,
3493 TOK2LOC(parser, delimiter),
3494 TOK2LOC(parser, name_token)
3495 );
3496}
3497
3501static pm_constant_path_write_node_t *
3502pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3503 return pm_constant_path_write_node_new(
3504 parser->arena,
3505 ++parser->node_id,
3506 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3507 PM_LOCATION_INIT_NODES(target, value),
3508 target,
3509 TOK2LOC(parser, operator),
3510 value
3511 );
3512}
3513
3517static pm_constant_and_write_node_t *
3518pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3519 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3520
3521 return pm_constant_and_write_node_new(
3522 parser->arena,
3523 ++parser->node_id,
3524 0,
3525 PM_LOCATION_INIT_NODES(target, value),
3526 target->name,
3527 target->base.location,
3528 TOK2LOC(parser, operator),
3529 value
3530 );
3531}
3532
3536static pm_constant_operator_write_node_t *
3537pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3538 return pm_constant_operator_write_node_new(
3539 parser->arena,
3540 ++parser->node_id,
3541 0,
3542 PM_LOCATION_INIT_NODES(target, value),
3543 target->name,
3544 target->base.location,
3545 TOK2LOC(parser, operator),
3546 value,
3547 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
3548 );
3549}
3550
3554static pm_constant_or_write_node_t *
3555pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3556 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3557
3558 return pm_constant_or_write_node_new(
3559 parser->arena,
3560 ++parser->node_id,
3561 0,
3562 PM_LOCATION_INIT_NODES(target, value),
3563 target->name,
3564 target->base.location,
3565 TOK2LOC(parser, operator),
3566 value
3567 );
3568}
3569
3573static pm_constant_read_node_t *
3574pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
3575 assert(name->type == PM_TOKEN_CONSTANT || name->type == 0);
3576
3577 return pm_constant_read_node_new(
3578 parser->arena,
3579 ++parser->node_id,
3580 0,
3581 PM_LOCATION_INIT_TOKEN(parser, name),
3582 pm_parser_constant_id_token(parser, name)
3583 );
3584}
3585
3589static pm_constant_write_node_t *
3590pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3591 return pm_constant_write_node_new(
3592 parser->arena,
3593 ++parser->node_id,
3594 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3595 PM_LOCATION_INIT_NODES(target, value),
3596 target->name,
3597 target->base.location,
3598 value,
3599 TOK2LOC(parser, operator)
3600 );
3601}
3602
3606static void
3607pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3608 switch (PM_NODE_TYPE(node)) {
3609 case PM_BEGIN_NODE: {
3610 const pm_begin_node_t *cast = (pm_begin_node_t *) node;
3611 if (cast->statements != NULL) pm_def_node_receiver_check(parser, UP(cast->statements));
3612 break;
3613 }
3614 case PM_PARENTHESES_NODE: {
3615 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
3616 if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
3617 break;
3618 }
3619 case PM_STATEMENTS_NODE: {
3620 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
3621 pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
3622 break;
3623 }
3624 case PM_ARRAY_NODE:
3625 case PM_FLOAT_NODE:
3626 case PM_IMAGINARY_NODE:
3627 case PM_INTEGER_NODE:
3628 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
3629 case PM_INTERPOLATED_STRING_NODE:
3630 case PM_INTERPOLATED_SYMBOL_NODE:
3631 case PM_INTERPOLATED_X_STRING_NODE:
3632 case PM_RATIONAL_NODE:
3633 case PM_REGULAR_EXPRESSION_NODE:
3634 case PM_SOURCE_ENCODING_NODE:
3635 case PM_SOURCE_FILE_NODE:
3636 case PM_SOURCE_LINE_NODE:
3637 case PM_STRING_NODE:
3638 case PM_SYMBOL_NODE:
3639 case PM_X_STRING_NODE:
3640 pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
3641 break;
3642 default:
3643 break;
3644 }
3645}
3646
3650static pm_def_node_t *
3651pm_def_node_create(
3652 pm_parser_t *parser,
3653 pm_constant_id_t name,
3654 const pm_token_t *name_loc,
3655 pm_node_t *receiver,
3656 pm_parameters_node_t *parameters,
3657 pm_node_t *body,
3658 pm_constant_id_list_t *locals,
3659 const pm_token_t *def_keyword,
3660 const pm_token_t *operator,
3661 const pm_token_t *lparen,
3662 const pm_token_t *rparen,
3663 const pm_token_t *equal,
3664 const pm_token_t *end_keyword
3665) {
3666 if (receiver != NULL) {
3667 pm_def_node_receiver_check(parser, receiver);
3668 }
3669
3670 return pm_def_node_new(
3671 parser->arena,
3672 ++parser->node_id,
3673 0,
3674 (end_keyword == NULL) ? PM_LOCATION_INIT_TOKEN_NODE(parser, def_keyword, body) : PM_LOCATION_INIT_TOKENS(parser, def_keyword, end_keyword),
3675 name,
3676 TOK2LOC(parser, name_loc),
3677 receiver,
3678 parameters,
3679 body,
3680 *locals,
3681 TOK2LOC(parser, def_keyword),
3682 NTOK2LOC(parser, operator),
3683 NTOK2LOC(parser, lparen),
3684 NTOK2LOC(parser, rparen),
3685 NTOK2LOC(parser, equal),
3686 NTOK2LOC(parser, end_keyword)
3687 );
3688}
3689
3693static pm_defined_node_t *
3694pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_token_t *keyword) {
3695 return pm_defined_node_new(
3696 parser->arena,
3697 ++parser->node_id,
3698 0,
3699 (rparen == NULL) ? PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, value) : PM_LOCATION_INIT_TOKENS(parser, keyword, rparen),
3700 NTOK2LOC(parser, lparen),
3701 value,
3702 NTOK2LOC(parser, rparen),
3703 TOK2LOC(parser, keyword)
3704 );
3705}
3706
3710static pm_else_node_t *
3711pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3712 return pm_else_node_new(
3713 parser->arena,
3714 ++parser->node_id,
3715 0,
3716 ((end_keyword == NULL) && (statements != NULL)) ? PM_LOCATION_INIT_TOKEN_NODE(parser, else_keyword, statements) : PM_LOCATION_INIT_TOKENS(parser, else_keyword, end_keyword),
3717 TOK2LOC(parser, else_keyword),
3718 statements,
3719 NTOK2LOC(parser, end_keyword)
3720 );
3721}
3722
3726static pm_embedded_statements_node_t *
3727pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
3728 return pm_embedded_statements_node_new(
3729 parser->arena,
3730 ++parser->node_id,
3731 0,
3732 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
3733 TOK2LOC(parser, opening),
3734 statements,
3735 TOK2LOC(parser, closing)
3736 );
3737}
3738
3742static pm_embedded_variable_node_t *
3743pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
3744 return pm_embedded_variable_node_new(
3745 parser->arena,
3746 ++parser->node_id,
3747 0,
3748 PM_LOCATION_INIT_TOKEN_NODE(parser, operator, variable),
3749 TOK2LOC(parser, operator),
3750 variable
3751 );
3752}
3753
3757static pm_ensure_node_t *
3758pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3759 return pm_ensure_node_new(
3760 parser->arena,
3761 ++parser->node_id,
3762 0,
3763 PM_LOCATION_INIT_TOKENS(parser, ensure_keyword, end_keyword),
3764 TOK2LOC(parser, ensure_keyword),
3765 statements,
3766 TOK2LOC(parser, end_keyword)
3767 );
3768}
3769
3773static pm_false_node_t *
3774pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
3775 assert(token->type == PM_TOKEN_KEYWORD_FALSE);
3776
3777 return pm_false_node_new(
3778 parser->arena,
3779 ++parser->node_id,
3780 PM_NODE_FLAG_STATIC_LITERAL,
3781 PM_LOCATION_INIT_TOKEN(parser, token)
3782 );
3783}
3784
3789static pm_find_pattern_node_t *
3790pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
3791 pm_node_t *left = nodes->nodes[0];
3792 assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
3793 pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left;
3794
3795 pm_node_t *right;
3796
3797 if (nodes->size == 1) {
3798 right = UP(pm_missing_node_create(parser, PM_NODE_END(left), 0));
3799 } else {
3800 right = nodes->nodes[nodes->size - 1];
3801 assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
3802 }
3803
3804#if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
3805 // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode.
3806 // The resulting AST will anyway be ignored, but this file still needs to compile.
3807 pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node;
3808#else
3809 pm_node_t *right_splat_node = right;
3810#endif
3811
3812 pm_find_pattern_node_t *node = pm_find_pattern_node_new(
3813 parser->arena,
3814 ++parser->node_id,
3815 0,
3816 PM_LOCATION_INIT_NODES(left, right),
3817 NULL,
3818 left_splat_node,
3819 ((pm_node_list_t) { 0 }),
3820 right_splat_node,
3821 ((pm_location_t) { 0 }),
3822 ((pm_location_t) { 0 })
3823 );
3824
3825 // For now we're going to just copy over each pointer manually. This could be
3826 // much more efficient, as we could instead resize the node list to only point
3827 // to 1...-1.
3828 for (size_t index = 1; index < nodes->size - 1; index++) {
3829 pm_node_list_append(parser->arena, &node->requireds, nodes->nodes[index]);
3830 }
3831
3832 return node;
3833}
3834
3839static double
3840pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
3841 ptrdiff_t diff = token->end - token->start;
3842 if (diff <= 0) return 0.0;
3843
3844 // First, get a buffer of the content.
3845 size_t length = (size_t) diff;
3846 const size_t buffer_size = sizeof(char) * (length + 1);
3847 char *buffer = xmalloc(buffer_size);
3848 memcpy((void *) buffer, token->start, length);
3849
3850 // Next, determine if we need to replace the decimal point because of
3851 // locale-specific options, and then normalize them if we have to.
3852 char decimal_point = *localeconv()->decimal_point;
3853 if (decimal_point != '.') {
3854 for (size_t index = 0; index < length; index++) {
3855 if (buffer[index] == '.') buffer[index] = decimal_point;
3856 }
3857 }
3858
3859 // Next, handle underscores by removing them from the buffer.
3860 for (size_t index = 0; index < length; index++) {
3861 if (buffer[index] == '_') {
3862 memmove((void *) (buffer + index), (void *) (buffer + index + 1), length - index);
3863 length--;
3864 }
3865 }
3866
3867 // Null-terminate the buffer so that strtod cannot read off the end.
3868 buffer[length] = '\0';
3869
3870 // Now, call strtod to parse the value. Note that CRuby has their own
3871 // version of strtod which avoids locales. We're okay using the locale-aware
3872 // version because we've already validated through the parser that the token
3873 // is in a valid format.
3874 errno = 0;
3875 char *eptr;
3876 double value = strtod(buffer, &eptr);
3877
3878 // This should never happen, because we've already checked that the token
3879 // is in a valid format. However it's good to be safe.
3880 if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
3881 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, PM_ERR_FLOAT_PARSE);
3882 xfree_sized(buffer, buffer_size);
3883 return 0.0;
3884 }
3885
3886 // If errno is set, then it should only be ERANGE. At this point we need to
3887 // check if it's infinity (it should be).
3888 if (errno == ERANGE && PRISM_ISINF(value)) {
3889 int warn_width;
3890 const char *ellipsis;
3891
3892 if (length > 20) {
3893 warn_width = 20;
3894 ellipsis = "...";
3895 } else {
3896 warn_width = (int) length;
3897 ellipsis = "";
3898 }
3899
3900 pm_diagnostic_list_append_format(&parser->warning_list, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
3901 value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
3902 }
3903
3904 // Finally we can free the buffer and return the value.
3905 xfree_sized(buffer, buffer_size);
3906 return value;
3907}
3908
3912static pm_float_node_t *
3913pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
3914 assert(token->type == PM_TOKEN_FLOAT);
3915
3916 return pm_float_node_new(
3917 parser->arena,
3918 ++parser->node_id,
3919 PM_NODE_FLAG_STATIC_LITERAL,
3920 PM_LOCATION_INIT_TOKEN(parser, token),
3921 pm_double_parse(parser, token)
3922 );
3923}
3924
3928static pm_imaginary_node_t *
3929pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
3930 assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
3931
3932 return pm_imaginary_node_new(
3933 parser->arena,
3934 ++parser->node_id,
3935 PM_NODE_FLAG_STATIC_LITERAL,
3936 PM_LOCATION_INIT_TOKEN(parser, token),
3937 UP(pm_float_node_create(parser, &((pm_token_t) {
3938 .type = PM_TOKEN_FLOAT,
3939 .start = token->start,
3940 .end = token->end - 1
3941 })))
3942 );
3943}
3944
3948static pm_rational_node_t *
3949pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
3950 assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
3951
3952 pm_rational_node_t *node = pm_rational_node_new(
3953 parser->arena,
3954 ++parser->node_id,
3955 PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
3956 PM_LOCATION_INIT_TOKEN(parser, token),
3957 ((pm_integer_t) { 0 }),
3958 ((pm_integer_t) { 0 })
3959 );
3960
3961 const uint8_t *start = token->start;
3962 const uint8_t *end = token->end - 1; // r
3963
3964 while (start < end && *start == '0') start++; // 0.1 -> .1
3965 while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
3966
3967 size_t length = (size_t) (end - start);
3968 if (length == 1) {
3969 node->denominator.value = 1;
3970 return node;
3971 }
3972
3973 const uint8_t *point = memchr(start, '.', length);
3974 assert(point && "should have a decimal point");
3975
3976 uint8_t *digits = xmalloc(length);
3977 if (digits == NULL) {
3978 fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
3979 abort();
3980 }
3981
3982 memcpy(digits, start, (unsigned long) (point - start));
3983 memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
3984 pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
3985
3986 size_t fract_length = 0;
3987 for (const uint8_t *fract = point; fract < end; ++fract) {
3988 if (*fract != '_') ++fract_length;
3989 }
3990 digits[0] = '1';
3991 if (fract_length > 1) memset(digits + 1, '0', fract_length - 1);
3992 pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + fract_length);
3993 xfree_sized(digits, length);
3994
3995 pm_integers_reduce(&node->numerator, &node->denominator);
3996 pm_integer_arena_move(parser->arena, &node->numerator);
3997 pm_integer_arena_move(parser->arena, &node->denominator);
3998 return node;
3999}
4000
4005static pm_imaginary_node_t *
4006pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4007 assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
4008
4009 return pm_imaginary_node_new(
4010 parser->arena,
4011 ++parser->node_id,
4012 PM_NODE_FLAG_STATIC_LITERAL,
4013 PM_LOCATION_INIT_TOKEN(parser, token),
4014 UP(pm_float_node_rational_create(parser, &((pm_token_t) {
4015 .type = PM_TOKEN_FLOAT_RATIONAL,
4016 .start = token->start,
4017 .end = token->end - 1
4018 })))
4019 );
4020}
4021
4025static pm_for_node_t *
4026pm_for_node_create(
4027 pm_parser_t *parser,
4028 pm_node_t *index,
4029 pm_node_t *collection,
4030 pm_statements_node_t *statements,
4031 const pm_token_t *for_keyword,
4032 const pm_token_t *in_keyword,
4033 const pm_token_t *do_keyword,
4034 const pm_token_t *end_keyword
4035) {
4036 return pm_for_node_new(
4037 parser->arena,
4038 ++parser->node_id,
4039 0,
4040 PM_LOCATION_INIT_TOKENS(parser, for_keyword, end_keyword),
4041 index,
4042 collection,
4043 statements,
4044 TOK2LOC(parser, for_keyword),
4045 TOK2LOC(parser, in_keyword),
4046 NTOK2LOC(parser, do_keyword),
4047 TOK2LOC(parser, end_keyword)
4048 );
4049}
4050
4054static pm_forwarding_arguments_node_t *
4055pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
4056 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4057
4058 return pm_forwarding_arguments_node_new(
4059 parser->arena,
4060 ++parser->node_id,
4061 0,
4062 PM_LOCATION_INIT_TOKEN(parser, token)
4063 );
4064}
4065
4069static pm_forwarding_parameter_node_t *
4070pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
4071 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4072
4073 return pm_forwarding_parameter_node_new(
4074 parser->arena,
4075 ++parser->node_id,
4076 0,
4077 PM_LOCATION_INIT_TOKEN(parser, token)
4078 );
4079}
4080
4084static pm_forwarding_super_node_t *
4085pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
4086 assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
4087 assert(token->type == PM_TOKEN_KEYWORD_SUPER);
4088
4089 pm_block_node_t *block = NULL;
4090 if (arguments->block != NULL) {
4091 block = (pm_block_node_t *) arguments->block;
4092 }
4093
4094 return pm_forwarding_super_node_new(
4095 parser->arena,
4096 ++parser->node_id,
4097 0,
4098 (block == NULL) ? PM_LOCATION_INIT_TOKEN(parser, token) : PM_LOCATION_INIT_TOKEN_NODE(parser, token, block),
4099 block
4100 );
4101}
4102
4107static pm_hash_pattern_node_t *
4108pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4109 return pm_hash_pattern_node_new(
4110 parser->arena,
4111 ++parser->node_id,
4112 0,
4113 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
4114 NULL,
4115 ((pm_node_list_t) { 0 }),
4116 NULL,
4117 TOK2LOC(parser, opening),
4118 TOK2LOC(parser, closing)
4119 );
4120}
4121
4125static pm_hash_pattern_node_t *
4126pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
4127 uint32_t start;
4128 uint32_t end;
4129
4130 if (elements->size > 0) {
4131 if (rest) {
4132 start = MIN(PM_NODE_START(rest), PM_NODE_START(elements->nodes[0]));
4133 end = MAX(PM_NODE_END(rest), PM_NODE_END(elements->nodes[elements->size - 1]));
4134 } else {
4135 start = PM_NODE_START(elements->nodes[0]);
4136 end = PM_NODE_END(elements->nodes[elements->size - 1]);
4137 }
4138 } else {
4139 assert(rest != NULL);
4140 start = PM_NODE_START(rest);
4141 end = PM_NODE_END(rest);
4142 }
4143
4144 pm_hash_pattern_node_t *node = pm_hash_pattern_node_new(
4145 parser->arena,
4146 ++parser->node_id,
4147 0,
4148 ((pm_location_t) { .start = start, .length = U32(end - start) }),
4149 NULL,
4150 ((pm_node_list_t) { 0 }),
4151 rest,
4152 ((pm_location_t) { 0 }),
4153 ((pm_location_t) { 0 })
4154 );
4155
4156 pm_node_list_concat(parser->arena, &node->elements, elements);
4157 return node;
4158}
4159
4163static pm_constant_id_t
4164pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
4165 switch (PM_NODE_TYPE(target)) {
4166 case PM_GLOBAL_VARIABLE_READ_NODE:
4167 return ((pm_global_variable_read_node_t *) target)->name;
4168 case PM_BACK_REFERENCE_READ_NODE:
4169 return ((pm_back_reference_read_node_t *) target)->name;
4170 case PM_NUMBERED_REFERENCE_READ_NODE:
4171 // This will only ever happen in the event of a syntax error, but we
4172 // still need to provide something for the node.
4173 return pm_parser_constant_id_raw(parser, parser->start + PM_NODE_START(target), parser->start + PM_NODE_END(target));
4174 default:
4175 assert(false && "unreachable");
4176 return (pm_constant_id_t) -1;
4177 }
4178}
4179
4183static pm_global_variable_and_write_node_t *
4184pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4185 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4186
4187 return pm_global_variable_and_write_node_new(
4188 parser->arena,
4189 ++parser->node_id,
4190 0,
4191 PM_LOCATION_INIT_NODES(target, value),
4192 pm_global_variable_write_name(parser, target),
4193 target->location,
4194 TOK2LOC(parser, operator),
4195 value
4196 );
4197}
4198
4202static pm_global_variable_operator_write_node_t *
4203pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4204 return pm_global_variable_operator_write_node_new(
4205 parser->arena,
4206 ++parser->node_id,
4207 0,
4208 PM_LOCATION_INIT_NODES(target, value),
4209 pm_global_variable_write_name(parser, target),
4210 target->location,
4211 TOK2LOC(parser, operator),
4212 value,
4213 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
4214 );
4215}
4216
4220static pm_global_variable_or_write_node_t *
4221pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4222 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4223
4224 return pm_global_variable_or_write_node_new(
4225 parser->arena,
4226 ++parser->node_id,
4227 0,
4228 PM_LOCATION_INIT_NODES(target, value),
4229 pm_global_variable_write_name(parser, target),
4230 target->location,
4231 TOK2LOC(parser, operator),
4232 value
4233 );
4234}
4235
4239static pm_global_variable_read_node_t *
4240pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4241 return pm_global_variable_read_node_new(
4242 parser->arena,
4243 ++parser->node_id,
4244 0,
4245 PM_LOCATION_INIT_TOKEN(parser, name),
4246 pm_parser_constant_id_token(parser, name)
4247 );
4248}
4249
4253static pm_global_variable_read_node_t *
4254pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
4255 return pm_global_variable_read_node_new(
4256 parser->arena,
4257 ++parser->node_id,
4258 0,
4259 PM_LOCATION_INIT_UNSET,
4260 name
4261 );
4262}
4263
4267static pm_global_variable_write_node_t *
4268pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4269 return pm_global_variable_write_node_new(
4270 parser->arena,
4271 ++parser->node_id,
4272 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
4273 PM_LOCATION_INIT_NODES(target, value),
4274 pm_global_variable_write_name(parser, target),
4275 target->location,
4276 value,
4277 TOK2LOC(parser, operator)
4278 );
4279}
4280
4284static pm_global_variable_write_node_t *
4285pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
4286 return pm_global_variable_write_node_new(
4287 parser->arena,
4288 ++parser->node_id,
4289 0,
4290 PM_LOCATION_INIT_UNSET,
4291 name,
4292 ((pm_location_t) { 0 }),
4293 value,
4294 ((pm_location_t) { 0 })
4295 );
4296}
4297
4301static pm_hash_node_t *
4302pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4303 assert(opening != NULL);
4304
4305 return pm_hash_node_new(
4306 parser->arena,
4307 ++parser->node_id,
4308 PM_NODE_FLAG_STATIC_LITERAL,
4309 PM_LOCATION_INIT_TOKEN(parser, opening),
4310 TOK2LOC(parser, opening),
4311 ((pm_node_list_t) { 0 }),
4312 ((pm_location_t) { 0 })
4313 );
4314}
4315
4319static inline void
4320pm_hash_node_elements_append(pm_arena_t *arena, pm_hash_node_t *hash, pm_node_t *element) {
4321 pm_node_list_append(arena, &hash->elements, element);
4322
4323 bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
4324 if (static_literal) {
4325 pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
4326 static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
4327 static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
4328 static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
4329 }
4330
4331 if (!static_literal) {
4332 pm_node_flag_unset(UP(hash), PM_NODE_FLAG_STATIC_LITERAL);
4333 }
4334}
4335
4336static inline void
4337pm_hash_node_closing_loc_set(const pm_parser_t *parser, pm_hash_node_t *hash, pm_token_t *token) {
4338 PM_NODE_LENGTH_SET_TOKEN(parser, hash, token);
4339 hash->closing_loc = TOK2LOC(parser, token);
4340}
4341
4345static pm_if_node_t *
4346pm_if_node_create(pm_parser_t *parser,
4347 const pm_token_t *if_keyword,
4348 pm_node_t *predicate,
4349 const pm_token_t *then_keyword,
4350 pm_statements_node_t *statements,
4351 pm_node_t *subsequent,
4352 const pm_token_t *end_keyword
4353) {
4354 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4355
4356 uint32_t start = PM_TOKEN_START(parser, if_keyword);
4357 uint32_t end;
4358
4359 if (end_keyword != NULL) {
4360 end = PM_TOKEN_END(parser, end_keyword);
4361 } else if (subsequent != NULL) {
4362 end = PM_NODE_END(subsequent);
4363 } else if (pm_statements_node_body_length(statements) != 0) {
4364 end = PM_NODE_END(statements);
4365 } else {
4366 end = PM_NODE_END(predicate);
4367 }
4368
4369 return pm_if_node_new(
4370 parser->arena,
4371 ++parser->node_id,
4372 PM_NODE_FLAG_NEWLINE,
4373 ((pm_location_t) { .start = start, .length = U32(end - start) }),
4374 TOK2LOC(parser, if_keyword),
4375 predicate,
4376 NTOK2LOC(parser, then_keyword),
4377 statements,
4378 subsequent,
4379 NTOK2LOC(parser, end_keyword)
4380 );
4381}
4382
4386static pm_if_node_t *
4387pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
4388 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4389
4390 pm_statements_node_t *statements = pm_statements_node_create(parser);
4391 pm_statements_node_body_append(parser, statements, statement, true);
4392
4393 return pm_if_node_new(
4394 parser->arena,
4395 ++parser->node_id,
4396 PM_NODE_FLAG_NEWLINE,
4397 PM_LOCATION_INIT_NODES(statement, predicate),
4398 TOK2LOC(parser, if_keyword),
4399 predicate,
4400 ((pm_location_t) { 0 }),
4401 statements,
4402 NULL,
4403 ((pm_location_t) { 0 })
4404 );
4405}
4406
4410static pm_if_node_t *
4411pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
4412 pm_assert_value_expression(parser, predicate);
4413 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4414
4415 pm_statements_node_t *if_statements = pm_statements_node_create(parser);
4416 pm_statements_node_body_append(parser, if_statements, true_expression, true);
4417
4418 pm_statements_node_t *else_statements = pm_statements_node_create(parser);
4419 pm_statements_node_body_append(parser, else_statements, false_expression, true);
4420
4421 pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, NULL);
4422 return pm_if_node_new(
4423 parser->arena,
4424 ++parser->node_id,
4425 PM_NODE_FLAG_NEWLINE,
4426 PM_LOCATION_INIT_NODES(predicate, false_expression),
4427 ((pm_location_t) { 0 }),
4428 predicate,
4429 TOK2LOC(parser, qmark),
4430 if_statements,
4431 UP(else_node),
4432 ((pm_location_t) { 0 })
4433 );
4434}
4435
4436static inline void
4437pm_if_node_end_keyword_loc_set(const pm_parser_t *parser, pm_if_node_t *node, const pm_token_t *keyword) {
4438 PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword);
4439 node->end_keyword_loc = TOK2LOC(parser, keyword);
4440}
4441
4442static inline void
4443pm_else_node_end_keyword_loc_set(const pm_parser_t *parser, pm_else_node_t *node, const pm_token_t *keyword) {
4444 PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword);
4445 node->end_keyword_loc = TOK2LOC(parser, keyword);
4446}
4447
4451static pm_implicit_node_t *
4452pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
4453 return pm_implicit_node_new(
4454 parser->arena,
4455 ++parser->node_id,
4456 0,
4457 PM_LOCATION_INIT_NODE(value),
4458 value
4459 );
4460}
4461
4465static pm_implicit_rest_node_t *
4466pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
4467 assert(token->type == PM_TOKEN_COMMA);
4468
4469 return pm_implicit_rest_node_new(
4470 parser->arena,
4471 ++parser->node_id,
4472 0,
4473 PM_LOCATION_INIT_TOKEN(parser, token)
4474 );
4475}
4476
4480static pm_integer_node_t *
4481pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4482 assert(token->type == PM_TOKEN_INTEGER);
4483
4484 pm_integer_node_t *node = pm_integer_node_new(
4485 parser->arena,
4486 ++parser->node_id,
4487 base | PM_NODE_FLAG_STATIC_LITERAL,
4488 PM_LOCATION_INIT_TOKEN(parser, token),
4489 ((pm_integer_t) { 0 })
4490 );
4491
4492 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4493 switch (base) {
4494 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4495 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4496 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4497 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4498 default: assert(false && "unreachable"); break;
4499 }
4500
4501 pm_integer_parse(&node->value, integer_base, token->start, token->end);
4502 pm_integer_arena_move(parser->arena, &node->value);
4503 return node;
4504}
4505
4510static pm_imaginary_node_t *
4511pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4512 assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
4513
4514 return pm_imaginary_node_new(
4515 parser->arena,
4516 ++parser->node_id,
4517 PM_NODE_FLAG_STATIC_LITERAL,
4518 PM_LOCATION_INIT_TOKEN(parser, token),
4519 UP(pm_integer_node_create(parser, base, &((pm_token_t) {
4520 .type = PM_TOKEN_INTEGER,
4521 .start = token->start,
4522 .end = token->end - 1
4523 })))
4524 );
4525}
4526
4531static pm_rational_node_t *
4532pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4533 assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
4534
4535 pm_rational_node_t *node = pm_rational_node_new(
4536 parser->arena,
4537 ++parser->node_id,
4538 base | PM_NODE_FLAG_STATIC_LITERAL,
4539 PM_LOCATION_INIT_TOKEN(parser, token),
4540 ((pm_integer_t) { 0 }),
4541 ((pm_integer_t) { .value = 1 })
4542 );
4543
4544 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4545 switch (base) {
4546 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4547 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4548 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4549 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4550 default: assert(false && "unreachable"); break;
4551 }
4552
4553 pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
4554 pm_integer_arena_move(parser->arena, &node->numerator);
4555
4556 return node;
4557}
4558
4563static pm_imaginary_node_t *
4564pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4565 assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
4566
4567 return pm_imaginary_node_new(
4568 parser->arena,
4569 ++parser->node_id,
4570 PM_NODE_FLAG_STATIC_LITERAL,
4571 PM_LOCATION_INIT_TOKEN(parser, token),
4572 UP(pm_integer_node_rational_create(parser, base, &((pm_token_t) {
4573 .type = PM_TOKEN_INTEGER_RATIONAL,
4574 .start = token->start,
4575 .end = token->end - 1
4576 })))
4577 );
4578}
4579
4583static pm_in_node_t *
4584pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
4585 uint32_t start = PM_TOKEN_START(parser, in_keyword);
4586 uint32_t end;
4587
4588 if (statements != NULL) {
4589 end = PM_NODE_END(statements);
4590 } else if (then_keyword != NULL) {
4591 end = PM_TOKEN_END(parser, then_keyword);
4592 } else {
4593 end = PM_NODE_END(pattern);
4594 }
4595
4596 return pm_in_node_new(
4597 parser->arena,
4598 ++parser->node_id,
4599 0,
4600 ((pm_location_t) { .start = start, .length = U32(end - start) }),
4601 pattern,
4602 statements,
4603 TOK2LOC(parser, in_keyword),
4604 NTOK2LOC(parser, then_keyword)
4605 );
4606}
4607
4611static pm_instance_variable_and_write_node_t *
4612pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4613 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4614
4615 return pm_instance_variable_and_write_node_new(
4616 parser->arena,
4617 ++parser->node_id,
4618 0,
4619 PM_LOCATION_INIT_NODES(target, value),
4620 target->name,
4621 target->base.location,
4622 TOK2LOC(parser, operator),
4623 value
4624 );
4625}
4626
4630static pm_instance_variable_operator_write_node_t *
4631pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4632 return pm_instance_variable_operator_write_node_new(
4633 parser->arena,
4634 ++parser->node_id,
4635 0,
4636 PM_LOCATION_INIT_NODES(target, value),
4637 target->name,
4638 target->base.location,
4639 TOK2LOC(parser, operator),
4640 value,
4641 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
4642 );
4643}
4644
4648static pm_instance_variable_or_write_node_t *
4649pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4650 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4651
4652 return pm_instance_variable_or_write_node_new(
4653 parser->arena,
4654 ++parser->node_id,
4655 0,
4656 PM_LOCATION_INIT_NODES(target, value),
4657 target->name,
4658 target->base.location,
4659 TOK2LOC(parser, operator),
4660 value
4661 );
4662}
4663
4667static pm_instance_variable_read_node_t *
4668pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
4669 assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
4670
4671 return pm_instance_variable_read_node_new(
4672 parser->arena,
4673 ++parser->node_id,
4674 0,
4675 PM_LOCATION_INIT_TOKEN(parser, token),
4676 pm_parser_constant_id_token(parser, token)
4677 );
4678}
4679
4684static pm_instance_variable_write_node_t *
4685pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
4686 return pm_instance_variable_write_node_new(
4687 parser->arena,
4688 ++parser->node_id,
4689 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
4690 PM_LOCATION_INIT_NODES(read_node, value),
4691 read_node->name,
4692 read_node->base.location,
4693 value,
4694 TOK2LOC(parser, operator)
4695 );
4696}
4697
4703static void
4704pm_interpolated_node_append(pm_arena_t *arena, pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
4705 switch (PM_NODE_TYPE(part)) {
4706 case PM_STRING_NODE:
4707 pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4708 break;
4709 case PM_EMBEDDED_STATEMENTS_NODE: {
4710 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
4711 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
4712
4713 if (embedded == NULL) {
4714 // If there are no statements or more than one statement, then
4715 // we lose the static literal flag.
4716 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
4717 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
4718 // If the embedded statement is a string, then we can keep the
4719 // static literal flag and mark the string as frozen.
4720 pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4721 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
4722 // If the embedded statement is an interpolated string and it's
4723 // a static literal, then we can keep the static literal flag.
4724 } else {
4725 // Otherwise we lose the static literal flag.
4726 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
4727 }
4728
4729 break;
4730 }
4731 case PM_EMBEDDED_VARIABLE_NODE:
4732 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
4733 break;
4734 default:
4735 assert(false && "unexpected node type");
4736 break;
4737 }
4738
4739 pm_node_list_append(arena, parts, part);
4740}
4741
4745static pm_interpolated_regular_expression_node_t *
4746pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4747 return pm_interpolated_regular_expression_node_new(
4748 parser->arena,
4749 ++parser->node_id,
4750 PM_NODE_FLAG_STATIC_LITERAL,
4751 PM_LOCATION_INIT_TOKEN(parser, opening),
4752 TOK2LOC(parser, opening),
4753 ((pm_node_list_t) { 0 }),
4754 TOK2LOC(parser, opening)
4755 );
4756}
4757
4758static inline void
4759pm_interpolated_regular_expression_node_append(pm_arena_t *arena, pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
4760 if (PM_NODE_START(node) > PM_NODE_START(part)) {
4761 PM_NODE_START_SET_NODE(node, part);
4762 }
4763 if (PM_NODE_END(node) < PM_NODE_END(part)) {
4764 PM_NODE_LENGTH_SET_NODE(node, part);
4765 }
4766
4767 pm_interpolated_node_append(arena, UP(node), &node->parts, part);
4768}
4769
4770static inline void
4771pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
4772 node->closing_loc = TOK2LOC(parser, closing);
4773 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
4774 pm_node_flag_set(UP(node), pm_regular_expression_flags_create(parser, closing));
4775}
4776
4800static inline void
4801pm_interpolated_string_node_append(pm_arena_t *arena, pm_interpolated_string_node_t *node, pm_node_t *part) {
4802#define CLEAR_FLAGS(node) \
4803 node->base.flags = (pm_node_flags_t) (FL(node) & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
4804
4805#define MUTABLE_FLAGS(node) \
4806 node->base.flags = (pm_node_flags_t) ((FL(node) | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
4807
4808 if (node->parts.size == 0 && node->opening_loc.length == 0) {
4809 PM_NODE_START_SET_NODE(node, part);
4810 }
4811
4812 if (PM_NODE_END(part) > PM_NODE_END(node)) {
4813 PM_NODE_LENGTH_SET_NODE(node, part);
4814 }
4815
4816 switch (PM_NODE_TYPE(part)) {
4817 case PM_STRING_NODE:
4818 // If inner string is not frozen, it stops being a static literal. We should *not* clear other flags,
4819 // because concatenating two frozen strings (`'foo' 'bar'`) is still frozen. This holds true for
4820 // as long as this interpolation only consists of other string literals.
4821 if (!PM_NODE_FLAG_P(part, PM_STRING_FLAGS_FROZEN)) {
4822 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
4823 }
4824 part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
4825 break;
4826 case PM_INTERPOLATED_STRING_NODE:
4827 if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
4828 // If the string that we're concatenating is a static literal,
4829 // then we can keep the static literal flag for this string.
4830 } else {
4831 // Otherwise, we lose the static literal flag here and we should
4832 // also clear the mutability flags.
4833 CLEAR_FLAGS(node);
4834 }
4835 break;
4836 case PM_EMBEDDED_STATEMENTS_NODE: {
4837 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
4838 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
4839
4840 if (embedded == NULL) {
4841 // If we're embedding multiple statements or no statements, then
4842 // the string is not longer a static literal.
4843 CLEAR_FLAGS(node);
4844 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
4845 // If the embedded statement is a string, then we can make that
4846 // string as frozen and static literal, and not touch the static
4847 // literal status of this string.
4848 embedded->flags = (pm_node_flags_t) ((embedded->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
4849
4850 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
4851 MUTABLE_FLAGS(node);
4852 }
4853 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
4854 // If the embedded statement is an interpolated string, but that
4855 // string is marked as static literal, then we can keep our
4856 // static literal status for this string.
4857 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
4858 MUTABLE_FLAGS(node);
4859 }
4860 } else {
4861 // In all other cases, we lose the static literal flag here and
4862 // become mutable.
4863 CLEAR_FLAGS(node);
4864 }
4865
4866 break;
4867 }
4868 case PM_EMBEDDED_VARIABLE_NODE:
4869 // Embedded variables clear static literal, which means we also
4870 // should clear the mutability flags.
4871 CLEAR_FLAGS(node);
4872 break;
4873 case PM_X_STRING_NODE:
4874 case PM_INTERPOLATED_X_STRING_NODE:
4875 case PM_SYMBOL_NODE:
4876 case PM_INTERPOLATED_SYMBOL_NODE:
4877 // These will only happen in error cases. But we want to handle it
4878 // here so that we don't fail the assertion.
4879 CLEAR_FLAGS(node);
4880 break;
4881 default:
4882 assert(false && "unexpected node type");
4883 break;
4884 }
4885
4886 pm_node_list_append(arena, &node->parts, part);
4887
4888#undef CLEAR_FLAGS
4889#undef MUTABLE_FLAGS
4890}
4891
4895static pm_interpolated_string_node_t *
4896pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
4897 pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
4898
4899 switch (parser->frozen_string_literal) {
4900 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
4901 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
4902 break;
4903 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
4904 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
4905 break;
4906 }
4907
4908 uint32_t start = opening == NULL ? 0 : PM_TOKEN_START(parser, opening);
4909 uint32_t end = closing == NULL ? 0 : PM_TOKEN_END(parser, closing);
4910
4911 pm_interpolated_string_node_t *node = pm_interpolated_string_node_new(
4912 parser->arena,
4913 ++parser->node_id,
4914 flags,
4915 ((pm_location_t) { .start = start, .length = U32(end - start) }),
4916 NTOK2LOC(parser, opening),
4917 ((pm_node_list_t) { 0 }),
4918 NTOK2LOC(parser, closing)
4919 );
4920
4921 if (parts != NULL) {
4922 pm_node_t *part;
4923 PM_NODE_LIST_FOREACH(parts, index, part) {
4924 pm_interpolated_string_node_append(parser->arena, node, part);
4925 }
4926 }
4927
4928 return node;
4929}
4930
4934static void
4935pm_interpolated_string_node_closing_set(const pm_parser_t *parser, pm_interpolated_string_node_t *node, const pm_token_t *closing) {
4936 node->closing_loc = TOK2LOC(parser, closing);
4937 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
4938}
4939
4940static void
4941pm_interpolated_symbol_node_append(pm_arena_t *arena, pm_interpolated_symbol_node_t *node, pm_node_t *part) {
4942 if (node->parts.size == 0 && node->opening_loc.length == 0) {
4943 PM_NODE_START_SET_NODE(node, part);
4944 }
4945
4946 pm_interpolated_node_append(arena, UP(node), &node->parts, part);
4947
4948 if (PM_NODE_END(part) > PM_NODE_END(node)) {
4949 PM_NODE_LENGTH_SET_NODE(node, part);
4950 }
4951}
4952
4953static void
4954pm_interpolated_symbol_node_closing_loc_set(const pm_parser_t *parser, pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
4955 node->closing_loc = TOK2LOC(parser, closing);
4956 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
4957}
4958
4962static pm_interpolated_symbol_node_t *
4963pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
4964 uint32_t start = opening == NULL ? 0 : PM_TOKEN_START(parser, opening);
4965 uint32_t end = closing == NULL ? 0 : PM_TOKEN_END(parser, closing);
4966
4967 pm_interpolated_symbol_node_t *node = pm_interpolated_symbol_node_new(
4968 parser->arena,
4969 ++parser->node_id,
4970 PM_NODE_FLAG_STATIC_LITERAL,
4971 ((pm_location_t) { .start = start, .length = U32(end - start) }),
4972 NTOK2LOC(parser, opening),
4973 ((pm_node_list_t) { 0 }),
4974 NTOK2LOC(parser, closing)
4975 );
4976
4977 if (parts != NULL) {
4978 pm_node_t *part;
4979 PM_NODE_LIST_FOREACH(parts, index, part) {
4980 pm_interpolated_symbol_node_append(parser->arena, node, part);
4981 }
4982 }
4983
4984 return node;
4985}
4986
4990static pm_interpolated_x_string_node_t *
4991pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4992 return pm_interpolated_x_string_node_new(
4993 parser->arena,
4994 ++parser->node_id,
4995 0,
4996 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
4997 TOK2LOC(parser, opening),
4998 ((pm_node_list_t) { 0 }),
4999 TOK2LOC(parser, closing)
5000 );
5001}
5002
5003static inline void
5004pm_interpolated_xstring_node_append(pm_arena_t *arena, pm_interpolated_x_string_node_t *node, pm_node_t *part) {
5005 pm_interpolated_node_append(arena, UP(node), &node->parts, part);
5006 PM_NODE_LENGTH_SET_NODE(node, part);
5007}
5008
5009static inline void
5010pm_interpolated_xstring_node_closing_set(const pm_parser_t *parser, pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
5011 node->closing_loc = TOK2LOC(parser, closing);
5012 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
5013}
5014
5018static pm_it_local_variable_read_node_t *
5019pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5020 return pm_it_local_variable_read_node_new(
5021 parser->arena,
5022 ++parser->node_id,
5023 0,
5024 PM_LOCATION_INIT_TOKEN(parser, name)
5025 );
5026}
5027
5031static pm_it_parameters_node_t *
5032pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5033 return pm_it_parameters_node_new(
5034 parser->arena,
5035 ++parser->node_id,
5036 0,
5037 PM_LOCATION_INIT_TOKENS(parser, opening, closing)
5038 );
5039}
5040
5044static pm_keyword_hash_node_t *
5045pm_keyword_hash_node_create(pm_parser_t *parser) {
5046 return pm_keyword_hash_node_new(
5047 parser->arena,
5048 ++parser->node_id,
5049 PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS,
5050 PM_LOCATION_INIT_UNSET,
5051 ((pm_node_list_t) { 0 })
5052 );
5053}
5054
5058static void
5059pm_keyword_hash_node_elements_append(pm_arena_t *arena, pm_keyword_hash_node_t *hash, pm_node_t *element) {
5060 // If the element being added is not an AssocNode or does not have a symbol
5061 // key, then we want to turn the SYMBOL_KEYS flag off.
5062 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
5063 pm_node_flag_unset(UP(hash), PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
5064 }
5065
5066 pm_node_list_append(arena, &hash->elements, element);
5067 if (PM_NODE_LENGTH(hash) == 0) {
5068 PM_NODE_START_SET_NODE(hash, element);
5069 }
5070 PM_NODE_LENGTH_SET_NODE(hash, element);
5071}
5072
5076static pm_required_keyword_parameter_node_t *
5077pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
5078 return pm_required_keyword_parameter_node_new(
5079 parser->arena,
5080 ++parser->node_id,
5081 0,
5082 PM_LOCATION_INIT_TOKEN(parser, name),
5083 pm_parser_constant_id_raw(parser, name->start, name->end - 1),
5084 TOK2LOC(parser, name)
5085 );
5086}
5087
5091static pm_optional_keyword_parameter_node_t *
5092pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
5093 return pm_optional_keyword_parameter_node_new(
5094 parser->arena,
5095 ++parser->node_id,
5096 0,
5097 PM_LOCATION_INIT_TOKEN_NODE(parser, name, value),
5098 pm_parser_constant_id_raw(parser, name->start, name->end - 1),
5099 TOK2LOC(parser, name),
5100 value
5101 );
5102}
5103
5107static pm_keyword_rest_parameter_node_t *
5108pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
5109 return pm_keyword_rest_parameter_node_new(
5110 parser->arena,
5111 ++parser->node_id,
5112 0,
5113 (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
5114 name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
5115 NTOK2LOC(parser, name),
5116 TOK2LOC(parser, operator)
5117 );
5118}
5119
5123static pm_lambda_node_t *
5124pm_lambda_node_create(
5125 pm_parser_t *parser,
5126 pm_constant_id_list_t *locals,
5127 const pm_token_t *operator,
5128 const pm_token_t *opening,
5129 const pm_token_t *closing,
5130 pm_node_t *parameters,
5131 pm_node_t *body
5132) {
5133 return pm_lambda_node_new(
5134 parser->arena,
5135 ++parser->node_id,
5136 0,
5137 PM_LOCATION_INIT_TOKENS(parser, operator, closing),
5138 *locals,
5139 TOK2LOC(parser, operator),
5140 TOK2LOC(parser, opening),
5141 TOK2LOC(parser, closing),
5142 parameters,
5143 body
5144 );
5145}
5146
5150static pm_local_variable_and_write_node_t *
5151pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5152 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5153 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5154
5155 return pm_local_variable_and_write_node_new(
5156 parser->arena,
5157 ++parser->node_id,
5158 0,
5159 PM_LOCATION_INIT_NODES(target, value),
5160 target->location,
5161 TOK2LOC(parser, operator),
5162 value,
5163 name,
5164 depth
5165 );
5166}
5167
5171static pm_local_variable_operator_write_node_t *
5172pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5173 return pm_local_variable_operator_write_node_new(
5174 parser->arena,
5175 ++parser->node_id,
5176 0,
5177 PM_LOCATION_INIT_NODES(target, value),
5178 target->location,
5179 TOK2LOC(parser, operator),
5180 value,
5181 name,
5182 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
5183 depth
5184 );
5185}
5186
5190static pm_local_variable_or_write_node_t *
5191pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5192 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5193 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5194
5195 return pm_local_variable_or_write_node_new(
5196 parser->arena,
5197 ++parser->node_id,
5198 0,
5199 PM_LOCATION_INIT_NODES(target, value),
5200 target->location,
5201 TOK2LOC(parser, operator),
5202 value,
5203 name,
5204 depth
5205 );
5206}
5207
5211static pm_local_variable_read_node_t *
5212pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
5213 if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
5214
5215 return pm_local_variable_read_node_new(
5216 parser->arena,
5217 ++parser->node_id,
5218 0,
5219 PM_LOCATION_INIT_TOKEN(parser, name),
5220 name_id,
5221 depth
5222 );
5223}
5224
5228static pm_local_variable_read_node_t *
5229pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5230 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5231 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, false);
5232}
5233
5238static pm_local_variable_read_node_t *
5239pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5240 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5241 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, true);
5242}
5243
5247static pm_local_variable_write_node_t *
5248pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
5249 return pm_local_variable_write_node_new(
5250 parser->arena,
5251 ++parser->node_id,
5252 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5253 ((pm_location_t) { .start = name_loc->start, .length = PM_NODE_END(value) - name_loc->start }),
5254 name,
5255 depth,
5256 *name_loc,
5257 value,
5258 TOK2LOC(parser, operator)
5259 );
5260}
5261
5265static inline bool
5266pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5267 return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5268}
5269
5274static inline bool
5275pm_token_is_numbered_parameter(const pm_parser_t *parser, uint32_t start, uint32_t length) {
5276 return (
5277 (length == 2) &&
5278 (parser->start[start] == '_') &&
5279 (parser->start[start + 1] != '0') &&
5280 pm_char_is_decimal_digit(parser->start[start + 1])
5281 );
5282}
5283
5288static inline void
5289pm_refute_numbered_parameter(pm_parser_t *parser, uint32_t start, uint32_t length) {
5290 if (pm_token_is_numbered_parameter(parser, start, length)) {
5291 PM_PARSER_ERR_FORMAT(parser, start, length, PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + start);
5292 }
5293}
5294
5299static pm_local_variable_target_node_t *
5300pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
5301 pm_refute_numbered_parameter(parser, location->start, location->length);
5302
5303 return pm_local_variable_target_node_new(
5304 parser->arena,
5305 ++parser->node_id,
5306 0,
5307 ((pm_location_t) { .start = location->start, .length = location->length }),
5308 name,
5309 depth
5310 );
5311}
5312
5316static pm_match_predicate_node_t *
5317pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5318 pm_assert_value_expression(parser, value);
5319
5320 return pm_match_predicate_node_new(
5321 parser->arena,
5322 ++parser->node_id,
5323 0,
5324 PM_LOCATION_INIT_NODES(value, pattern),
5325 value,
5326 pattern,
5327 TOK2LOC(parser, operator)
5328 );
5329}
5330
5334static pm_match_required_node_t *
5335pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5336 pm_assert_value_expression(parser, value);
5337
5338 return pm_match_required_node_new(
5339 parser->arena,
5340 ++parser->node_id,
5341 0,
5342 PM_LOCATION_INIT_NODES(value, pattern),
5343 value,
5344 pattern,
5345 TOK2LOC(parser, operator)
5346 );
5347}
5348
5352static pm_match_write_node_t *
5353pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
5354 return pm_match_write_node_new(
5355 parser->arena,
5356 ++parser->node_id,
5357 0,
5358 PM_LOCATION_INIT_NODE(call),
5359 call,
5360 ((pm_node_list_t) { 0 })
5361 );
5362}
5363
5367static pm_module_node_t *
5368pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
5369 return pm_module_node_new(
5370 parser->arena,
5371 ++parser->node_id,
5372 0,
5373 PM_LOCATION_INIT_TOKENS(parser, module_keyword, end_keyword),
5374 (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
5375 TOK2LOC(parser, module_keyword),
5376 constant_path,
5377 body,
5378 TOK2LOC(parser, end_keyword),
5379 pm_parser_constant_id_token(parser, name)
5380 );
5381}
5382
5386static pm_multi_target_node_t *
5387pm_multi_target_node_create(pm_parser_t *parser) {
5388 return pm_multi_target_node_new(
5389 parser->arena,
5390 ++parser->node_id,
5391 0,
5392 PM_LOCATION_INIT_UNSET,
5393 ((pm_node_list_t) { 0 }),
5394 NULL,
5395 ((pm_node_list_t) { 0 }),
5396 ((pm_location_t) { 0 }),
5397 ((pm_location_t) { 0 })
5398 );
5399}
5400
5404static void
5405pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
5406 if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
5407 if (node->rest == NULL) {
5408 node->rest = target;
5409 } else {
5410 pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
5411 pm_node_list_append(parser->arena, &node->rights, target);
5412 }
5413 } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
5414 if (node->rest == NULL) {
5415 node->rest = target;
5416 } else {
5417 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
5418 pm_node_list_append(parser->arena, &node->rights, target);
5419 }
5420 } else if (node->rest == NULL) {
5421 pm_node_list_append(parser->arena, &node->lefts, target);
5422 } else {
5423 pm_node_list_append(parser->arena, &node->rights, target);
5424 }
5425
5426 if (PM_NODE_LENGTH(node) == 0 || (PM_NODE_START(node) > PM_NODE_START(target))) {
5427 PM_NODE_START_SET_NODE(node, target);
5428 }
5429
5430 if (PM_NODE_LENGTH(node) == 0 || (PM_NODE_END(node) < PM_NODE_END(target))) {
5431 PM_NODE_LENGTH_SET_NODE(node, target);
5432 }
5433}
5434
5438static void
5439pm_multi_target_node_opening_set(const pm_parser_t *parser, pm_multi_target_node_t *node, const pm_token_t *lparen) {
5440 PM_NODE_START_SET_TOKEN(parser, node, lparen);
5441 PM_NODE_LENGTH_SET_TOKEN(parser, node, lparen);
5442 node->lparen_loc = TOK2LOC(parser, lparen);
5443}
5444
5448static void
5449pm_multi_target_node_closing_set(const pm_parser_t *parser, pm_multi_target_node_t *node, const pm_token_t *rparen) {
5450 PM_NODE_LENGTH_SET_TOKEN(parser, node, rparen);
5451 node->rparen_loc = TOK2LOC(parser, rparen);
5452}
5453
5457static pm_multi_write_node_t *
5458pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5459 /* The target is no longer necessary because we have reused its children. It
5460 * is arena-allocated so no explicit free is needed. */
5461 return pm_multi_write_node_new(
5462 parser->arena,
5463 ++parser->node_id,
5464 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5465 PM_LOCATION_INIT_NODES(target, value),
5466 target->lefts,
5467 target->rest,
5468 target->rights,
5469 target->lparen_loc,
5470 target->rparen_loc,
5471 TOK2LOC(parser, operator),
5472 value
5473 );
5474}
5475
5479static pm_next_node_t *
5480pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
5481 assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
5482
5483 return pm_next_node_new(
5484 parser->arena,
5485 ++parser->node_id,
5486 0,
5487 (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
5488 arguments,
5489 TOK2LOC(parser, keyword)
5490 );
5491}
5492
5496static pm_nil_node_t *
5497pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
5498 assert(token->type == PM_TOKEN_KEYWORD_NIL);
5499
5500 return pm_nil_node_new(
5501 parser->arena,
5502 ++parser->node_id,
5503 PM_NODE_FLAG_STATIC_LITERAL,
5504 PM_LOCATION_INIT_TOKEN(parser, token)
5505 );
5506}
5507
5511static pm_no_block_parameter_node_t *
5512pm_no_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
5513 assert(operator->type == PM_TOKEN_AMPERSAND || operator->type == PM_TOKEN_UAMPERSAND);
5514 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
5515
5516 return pm_no_block_parameter_node_new(
5517 parser->arena,
5518 ++parser->node_id,
5519 0,
5520 PM_LOCATION_INIT_TOKENS(parser, operator, keyword),
5521 TOK2LOC(parser, operator),
5522 TOK2LOC(parser, keyword)
5523 );
5524}
5525
5529static pm_no_keywords_parameter_node_t *
5530pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
5531 assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
5532 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
5533
5534 return pm_no_keywords_parameter_node_new(
5535 parser->arena,
5536 ++parser->node_id,
5537 0,
5538 PM_LOCATION_INIT_TOKENS(parser, operator, keyword),
5539 TOK2LOC(parser, operator),
5540 TOK2LOC(parser, keyword)
5541 );
5542}
5543
5547static pm_numbered_parameters_node_t *
5548pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing, uint8_t maximum) {
5549 return pm_numbered_parameters_node_new(
5550 parser->arena,
5551 ++parser->node_id,
5552 0,
5553 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
5554 maximum
5555 );
5556}
5557
5562#define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1))
5563
5570static uint32_t
5571pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) {
5572 const uint8_t *start = token->start + 1;
5573 const uint8_t *end = token->end;
5574
5575 ptrdiff_t diff = end - start;
5576 assert(diff > 0);
5577#if PTRDIFF_MAX > SIZE_MAX
5578 assert(diff < (ptrdiff_t) SIZE_MAX);
5579#endif
5580 size_t length = (size_t) diff;
5581
5582 char *digits = xcalloc(length + 1, sizeof(char));
5583 memcpy(digits, start, length);
5584 digits[length] = '\0';
5585
5586 char *endptr;
5587 errno = 0;
5588 unsigned long value = strtoul(digits, &endptr, 10);
5589
5590 if ((digits == endptr) || (*endptr != '\0')) {
5591 pm_parser_err(parser, U32(start - parser->start), U32(length), PM_ERR_INVALID_NUMBER_DECIMAL);
5592 value = 0;
5593 }
5594
5595 xfree_sized(digits, sizeof(char) * (length + 1));
5596
5597 if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
5598 PM_PARSER_WARN_FORMAT(parser, U32(start - parser->start), U32(length), PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
5599 value = 0;
5600 }
5601
5602 return (uint32_t) value;
5603}
5604
5605#undef NTH_REF_MAX
5606
5610static pm_numbered_reference_read_node_t *
5611pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5612 assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
5613
5614 return pm_numbered_reference_read_node_new(
5615 parser->arena,
5616 ++parser->node_id,
5617 0,
5618 PM_LOCATION_INIT_TOKEN(parser, name),
5619 pm_numbered_reference_read_node_number(parser, name)
5620 );
5621}
5622
5626static pm_optional_parameter_node_t *
5627pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
5628 return pm_optional_parameter_node_new(
5629 parser->arena,
5630 ++parser->node_id,
5631 0,
5632 PM_LOCATION_INIT_TOKEN_NODE(parser, name, value),
5633 pm_parser_constant_id_token(parser, name),
5634 TOK2LOC(parser, name),
5635 TOK2LOC(parser, operator),
5636 value
5637 );
5638}
5639
5643static pm_or_node_t *
5644pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
5645 pm_assert_value_expression(parser, left);
5646
5647 return pm_or_node_new(
5648 parser->arena,
5649 ++parser->node_id,
5650 0,
5651 PM_LOCATION_INIT_NODES(left, right),
5652 left,
5653 right,
5654 TOK2LOC(parser, operator)
5655 );
5656}
5657
5661static pm_parameters_node_t *
5662pm_parameters_node_create(pm_parser_t *parser) {
5663 return pm_parameters_node_new(
5664 parser->arena,
5665 ++parser->node_id,
5666 0,
5667 PM_LOCATION_INIT_UNSET,
5668 ((pm_node_list_t) { 0 }),
5669 ((pm_node_list_t) { 0 }),
5670 NULL,
5671 ((pm_node_list_t) { 0 }),
5672 ((pm_node_list_t) { 0 }),
5673 NULL,
5674 NULL
5675 );
5676}
5677
5681static void
5682pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
5683 if ((params->base.location.length == 0) || PM_NODE_START(params) > PM_NODE_START(param)) {
5684 PM_NODE_START_SET_NODE(params, param);
5685 }
5686
5687 if ((params->base.location.length == 0) || (PM_NODE_END(params) < PM_NODE_END(param))) {
5688 PM_NODE_LENGTH_SET_NODE(params, param);
5689 }
5690}
5691
5695static void
5696pm_parameters_node_requireds_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
5697 pm_parameters_node_location_set(params, param);
5698 pm_node_list_append(arena, &params->requireds, param);
5699}
5700
5704static void
5705pm_parameters_node_optionals_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
5706 pm_parameters_node_location_set(params, UP(param));
5707 pm_node_list_append(arena, &params->optionals, UP(param));
5708}
5709
5713static void
5714pm_parameters_node_posts_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
5715 pm_parameters_node_location_set(params, param);
5716 pm_node_list_append(arena, &params->posts, param);
5717}
5718
5722static void
5723pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
5724 pm_parameters_node_location_set(params, param);
5725 params->rest = param;
5726}
5727
5731static void
5732pm_parameters_node_keywords_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
5733 pm_parameters_node_location_set(params, param);
5734 pm_node_list_append(arena, &params->keywords, param);
5735}
5736
5740static void
5741pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
5742 assert(params->keyword_rest == NULL);
5743 pm_parameters_node_location_set(params, param);
5744 params->keyword_rest = param;
5745}
5746
5750static void
5751pm_parameters_node_block_set(pm_parameters_node_t *params, pm_node_t *param) {
5752 assert(params->block == NULL);
5753 pm_parameters_node_location_set(params, param);
5754 params->block = param;
5755}
5756
5760static pm_program_node_t *
5761pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
5762 return pm_program_node_new(
5763 parser->arena,
5764 ++parser->node_id,
5765 0,
5766 PM_LOCATION_INIT_NODE(statements),
5767 *locals,
5768 statements
5769 );
5770}
5771
5775static pm_parentheses_node_t *
5776pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing, pm_node_flags_t flags) {
5777 return pm_parentheses_node_new(
5778 parser->arena,
5779 ++parser->node_id,
5780 flags,
5781 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
5782 body,
5783 TOK2LOC(parser, opening),
5784 TOK2LOC(parser, closing)
5785 );
5786}
5787
5791static pm_pinned_expression_node_t *
5792pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
5793 return pm_pinned_expression_node_new(
5794 parser->arena,
5795 ++parser->node_id,
5796 0,
5797 PM_LOCATION_INIT_TOKENS(parser, operator, rparen),
5798 expression,
5799 TOK2LOC(parser, operator),
5800 TOK2LOC(parser, lparen),
5801 TOK2LOC(parser, rparen)
5802 );
5803}
5804
5808static pm_pinned_variable_node_t *
5809pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
5810 return pm_pinned_variable_node_new(
5811 parser->arena,
5812 ++parser->node_id,
5813 0,
5814 PM_LOCATION_INIT_TOKEN_NODE(parser, operator, variable),
5815 variable,
5816 TOK2LOC(parser, operator)
5817 );
5818}
5819
5823static pm_post_execution_node_t *
5824pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
5825 return pm_post_execution_node_new(
5826 parser->arena,
5827 ++parser->node_id,
5828 0,
5829 PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
5830 statements,
5831 TOK2LOC(parser, keyword),
5832 TOK2LOC(parser, opening),
5833 TOK2LOC(parser, closing)
5834 );
5835}
5836
5840static pm_pre_execution_node_t *
5841pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
5842 return pm_pre_execution_node_new(
5843 parser->arena,
5844 ++parser->node_id,
5845 0,
5846 PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
5847 statements,
5848 TOK2LOC(parser, keyword),
5849 TOK2LOC(parser, opening),
5850 TOK2LOC(parser, closing)
5851 );
5852}
5853
5857static pm_range_node_t *
5858pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
5859 pm_assert_value_expression(parser, left);
5860 pm_assert_value_expression(parser, right);
5861 pm_node_flags_t flags = 0;
5862
5863 // Indicate that this node is an exclusive range if the operator is `...`.
5864 if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
5865 flags |= PM_RANGE_FLAGS_EXCLUDE_END;
5866 }
5867
5868 // Indicate that this node is a static literal (i.e., can be compiled with
5869 // a putobject in CRuby) if the left and right are implicit nil, explicit
5870 // nil, or integers.
5871 if (
5872 (left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
5873 (right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
5874 ) {
5875 flags |= PM_NODE_FLAG_STATIC_LITERAL;
5876 }
5877
5878 uint32_t start = left == NULL ? PM_TOKEN_START(parser, operator) : PM_NODE_START(left);
5879 uint32_t end = right == NULL ? PM_TOKEN_END(parser, operator) : PM_NODE_END(right);
5880
5881 return pm_range_node_new(
5882 parser->arena,
5883 ++parser->node_id,
5884 flags,
5885 ((pm_location_t) { .start = start, .length = U32(end - start) }),
5886 left,
5887 right,
5888 TOK2LOC(parser, operator)
5889 );
5890}
5891
5895static pm_redo_node_t *
5896pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
5897 assert(token->type == PM_TOKEN_KEYWORD_REDO);
5898
5899 return pm_redo_node_new(
5900 parser->arena,
5901 ++parser->node_id,
5902 0,
5903 PM_LOCATION_INIT_TOKEN(parser, token)
5904 );
5905}
5906
5911static pm_regular_expression_node_t *
5912pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
5913 return pm_regular_expression_node_new(
5914 parser->arena,
5915 ++parser->node_id,
5916 pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL,
5917 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
5918 TOK2LOC(parser, opening),
5919 TOK2LOC(parser, content),
5920 TOK2LOC(parser, closing),
5921 *unescaped
5922 );
5923}
5924
5928static inline pm_regular_expression_node_t *
5929pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
5930 return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
5931}
5932
5936static pm_required_parameter_node_t *
5937pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
5938 return pm_required_parameter_node_new(
5939 parser->arena,
5940 ++parser->node_id,
5941 0,
5942 PM_LOCATION_INIT_TOKEN(parser, token),
5943 pm_parser_constant_id_token(parser, token)
5944 );
5945}
5946
5950static pm_rescue_modifier_node_t *
5951pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
5952 return pm_rescue_modifier_node_new(
5953 parser->arena,
5954 ++parser->node_id,
5955 0,
5956 PM_LOCATION_INIT_NODES(expression, rescue_expression),
5957 expression,
5958 TOK2LOC(parser, keyword),
5959 rescue_expression
5960 );
5961}
5962
5966static pm_rescue_node_t *
5967pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
5968 return pm_rescue_node_new(
5969 parser->arena,
5970 ++parser->node_id,
5971 0,
5972 PM_LOCATION_INIT_TOKEN(parser, keyword),
5973 TOK2LOC(parser, keyword),
5974 ((pm_node_list_t) { 0 }),
5975 ((pm_location_t) { 0 }),
5976 NULL,
5977 ((pm_location_t) { 0 }),
5978 NULL,
5979 NULL
5980 );
5981}
5982
5983static inline void
5984pm_rescue_node_operator_set(const pm_parser_t *parser, pm_rescue_node_t *node, const pm_token_t *operator) {
5985 node->operator_loc = TOK2LOC(parser, operator);
5986}
5987
5991static void
5992pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
5993 node->reference = reference;
5994 PM_NODE_LENGTH_SET_NODE(node, reference);
5995}
5996
6000static void
6001pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
6002 node->statements = statements;
6003 if (pm_statements_node_body_length(statements) > 0) {
6004 PM_NODE_LENGTH_SET_NODE(node, statements);
6005 }
6006}
6007
6011static void
6012pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
6013 node->subsequent = subsequent;
6014 PM_NODE_LENGTH_SET_NODE(node, subsequent);
6015}
6016
6020static void
6021pm_rescue_node_exceptions_append(pm_arena_t *arena, pm_rescue_node_t *node, pm_node_t *exception) {
6022 pm_node_list_append(arena, &node->exceptions, exception);
6023 PM_NODE_LENGTH_SET_NODE(node, exception);
6024}
6025
6029static pm_rest_parameter_node_t *
6030pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
6031 return pm_rest_parameter_node_new(
6032 parser->arena,
6033 ++parser->node_id,
6034 0,
6035 (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
6036 name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
6037 NTOK2LOC(parser, name),
6038 TOK2LOC(parser, operator)
6039 );
6040}
6041
6045static pm_retry_node_t *
6046pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
6047 assert(token->type == PM_TOKEN_KEYWORD_RETRY);
6048
6049 return pm_retry_node_new(
6050 parser->arena,
6051 ++parser->node_id,
6052 0,
6053 PM_LOCATION_INIT_TOKEN(parser, token)
6054 );
6055}
6056
6060static pm_return_node_t *
6061pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6062 return pm_return_node_new(
6063 parser->arena,
6064 ++parser->node_id,
6065 0,
6066 (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
6067 TOK2LOC(parser, keyword),
6068 arguments
6069 );
6070}
6071
6075static pm_self_node_t *
6076pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
6077 assert(token->type == PM_TOKEN_KEYWORD_SELF);
6078
6079 return pm_self_node_new(
6080 parser->arena,
6081 ++parser->node_id,
6082 0,
6083 PM_LOCATION_INIT_TOKEN(parser, token)
6084 );
6085}
6086
6090static pm_shareable_constant_node_t *
6091pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
6092 return pm_shareable_constant_node_new(
6093 parser->arena,
6094 ++parser->node_id,
6095 (pm_node_flags_t) value,
6096 PM_LOCATION_INIT_NODE(write),
6097 write
6098 );
6099}
6100
6104static pm_singleton_class_node_t *
6105pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
6106 return pm_singleton_class_node_new(
6107 parser->arena,
6108 ++parser->node_id,
6109 0,
6110 PM_LOCATION_INIT_TOKENS(parser, class_keyword, end_keyword),
6111 *locals,
6112 TOK2LOC(parser, class_keyword),
6113 TOK2LOC(parser, operator),
6114 expression,
6115 body,
6116 TOK2LOC(parser, end_keyword)
6117 );
6118}
6119
6123static pm_source_encoding_node_t *
6124pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
6125 assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
6126
6127 return pm_source_encoding_node_new(
6128 parser->arena,
6129 ++parser->node_id,
6130 PM_NODE_FLAG_STATIC_LITERAL,
6131 PM_LOCATION_INIT_TOKEN(parser, token)
6132 );
6133}
6134
6138static pm_source_file_node_t*
6139pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
6140 assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
6141
6142 pm_node_flags_t flags = 0;
6143
6144 switch (parser->frozen_string_literal) {
6145 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6146 flags |= PM_STRING_FLAGS_MUTABLE;
6147 break;
6148 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6149 flags |= PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6150 break;
6151 }
6152
6153 return pm_source_file_node_new(
6154 parser->arena,
6155 ++parser->node_id,
6156 flags,
6157 PM_LOCATION_INIT_TOKEN(parser, file_keyword),
6158 parser->filepath
6159 );
6160}
6161
6165static pm_source_line_node_t *
6166pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
6167 assert(token->type == PM_TOKEN_KEYWORD___LINE__);
6168
6169 return pm_source_line_node_new(
6170 parser->arena,
6171 ++parser->node_id,
6172 PM_NODE_FLAG_STATIC_LITERAL,
6173 PM_LOCATION_INIT_TOKEN(parser, token)
6174 );
6175}
6176
6180static pm_splat_node_t *
6181pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
6182 return pm_splat_node_new(
6183 parser->arena,
6184 ++parser->node_id,
6185 0,
6186 (expression == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, expression),
6187 TOK2LOC(parser, operator),
6188 expression
6189 );
6190}
6191
6195static pm_statements_node_t *
6196pm_statements_node_create(pm_parser_t *parser) {
6197 return pm_statements_node_new(
6198 parser->arena,
6199 ++parser->node_id,
6200 0,
6201 PM_LOCATION_INIT_UNSET,
6202 ((pm_node_list_t) { 0 })
6203 );
6204}
6205
6209static size_t
6210pm_statements_node_body_length(pm_statements_node_t *node) {
6211 return node && node->body.size;
6212}
6213
6218static inline void
6219pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
6220 if (pm_statements_node_body_length(node) == 0 || PM_NODE_START(statement) < PM_NODE_START(node)) {
6221 PM_NODE_START_SET_NODE(node, statement);
6222 }
6223
6224 if (PM_NODE_END(statement) > PM_NODE_END(node)) {
6225 PM_NODE_LENGTH_SET_NODE(node, statement);
6226 }
6227}
6228
6232static void
6233pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline) {
6234 pm_statements_node_body_update(node, statement);
6235
6236 if (node->body.size > 0) {
6237 const pm_node_t *previous = node->body.nodes[node->body.size - 1];
6238
6239 switch (PM_NODE_TYPE(previous)) {
6240 case PM_BREAK_NODE:
6241 case PM_NEXT_NODE:
6242 case PM_REDO_NODE:
6243 case PM_RETRY_NODE:
6244 case PM_RETURN_NODE:
6245 pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
6246 break;
6247 default:
6248 break;
6249 }
6250 }
6251
6252 pm_node_list_append(parser->arena, &node->body, statement);
6253 if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
6254}
6255
6259static void
6260pm_statements_node_body_prepend(pm_arena_t *arena, pm_statements_node_t *node, pm_node_t *statement) {
6261 pm_statements_node_body_update(node, statement);
6262 pm_node_list_prepend(arena, &node->body, statement);
6263 pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
6264}
6265
6269static inline pm_string_node_t *
6270pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
6271 pm_node_flags_t flags = 0;
6272
6273 switch (parser->frozen_string_literal) {
6274 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6275 flags = PM_STRING_FLAGS_MUTABLE;
6276 break;
6277 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6278 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6279 break;
6280 }
6281
6282 uint32_t start = PM_TOKEN_START(parser, opening == NULL ? content : opening);
6283 uint32_t end = PM_TOKEN_END(parser, closing == NULL ? content : closing);
6284
6285 return pm_string_node_new(
6286 parser->arena,
6287 ++parser->node_id,
6288 flags,
6289 ((pm_location_t) { .start = start, .length = U32(end - start) }),
6290 NTOK2LOC(parser, opening),
6291 TOK2LOC(parser, content),
6292 NTOK2LOC(parser, closing),
6293 *string
6294 );
6295}
6296
6300static pm_string_node_t *
6301pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6302 return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6303}
6304
6309static pm_string_node_t *
6310pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6311 pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
6312 parser->current_string = PM_STRING_EMPTY;
6313 return node;
6314}
6315
6319static pm_super_node_t *
6320pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
6321 assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
6322
6323 const pm_location_t *end = pm_arguments_end(arguments);
6324 assert(end != NULL && "unreachable");
6325
6326 return pm_super_node_new(
6327 parser->arena,
6328 ++parser->node_id,
6329 0,
6330 ((pm_location_t) { .start = PM_TOKEN_START(parser, keyword), .length = PM_LOCATION_END(end) - PM_TOKEN_START(parser, keyword) }),
6331 TOK2LOC(parser, keyword),
6332 arguments->opening_loc,
6333 arguments->arguments,
6334 arguments->closing_loc,
6335 arguments->block
6336 );
6337}
6338
6343static bool
6344pm_ascii_only_p(const pm_string_t *contents) {
6345 const size_t length = pm_string_length(contents);
6346 const uint8_t *source = pm_string_source(contents);
6347
6348 for (size_t index = 0; index < length; index++) {
6349 if (source[index] & 0x80) return false;
6350 }
6351
6352 return true;
6353}
6354
6358static void
6359parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
6360 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
6361 size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
6362
6363 if (width == 0) {
6364 pm_parser_err(parser, PM_TOKEN_START(parser, location), PM_TOKEN_LENGTH(location), PM_ERR_INVALID_SYMBOL);
6365 break;
6366 }
6367
6368 cursor += width;
6369 }
6370}
6371
6376static void
6377parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
6378 const pm_encoding_t *encoding = parser->encoding;
6379
6380 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
6381 size_t width = encoding->char_width(cursor, end - cursor);
6382
6383 if (width == 0) {
6384 pm_parser_err(parser, PM_TOKEN_START(parser, location), PM_TOKEN_LENGTH(location), PM_ERR_INVALID_SYMBOL);
6385 break;
6386 }
6387
6388 cursor += width;
6389 }
6390}
6391
6401static inline pm_node_flags_t
6402parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
6403 if (parser->explicit_encoding != NULL) {
6404 // A Symbol may optionally have its encoding explicitly set. This will
6405 // happen if an escape sequence results in a non-ASCII code point.
6406 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
6407 if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
6408 return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
6409 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6410 return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
6411 } else if (validate) {
6412 parse_symbol_encoding_validate_other(parser, location, contents);
6413 }
6414 } else if (pm_ascii_only_p(contents)) {
6415 // Ruby stipulates that all source files must use an ASCII-compatible
6416 // encoding. Thus, all symbols appearing in source are eligible for
6417 // "downgrading" to US-ASCII.
6418 return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
6419 } else if (validate) {
6420 parse_symbol_encoding_validate_other(parser, location, contents);
6421 }
6422
6423 return 0;
6424}
6425
6426static pm_node_flags_t
6427parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding) {
6428 assert ((modifier == 'n' && modifier_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) ||
6429 (modifier == 'u' && modifier_encoding == PM_ENCODING_UTF_8_ENTRY) ||
6430 (modifier == 'e' && modifier_encoding == PM_ENCODING_EUC_JP_ENTRY) ||
6431 (modifier == 's' && modifier_encoding == PM_ENCODING_WINDOWS_31J_ENTRY));
6432
6433 // There's special validation logic used if a string does not contain any character escape sequences.
6434 if (parser->explicit_encoding == NULL) {
6435 // If an ASCII-only string without character escapes is used with an encoding modifier, then resulting Regexp
6436 // has the modifier encoding, unless the ASCII-8BIT modifier is used, in which case the Regexp "downgrades" to
6437 // the US-ASCII encoding.
6438 if (ascii_only) {
6439 return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags;
6440 }
6441
6442 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6443 if (!ascii_only) {
6444 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
6445 }
6446 } else if (parser->encoding != modifier_encoding) {
6447 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name);
6448
6449 if (modifier == 'n' && !ascii_only) {
6450 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source));
6451 }
6452 }
6453
6454 return flags;
6455 }
6456
6457 // TODO (nirvdrum 21-Feb-2024): To validate regexp sources with character escape sequences we need to know whether hex or Unicode escape sequences were used and Prism doesn't currently provide that data. We handle a subset of unambiguous cases in the meanwhile.
6458 bool mixed_encoding = false;
6459
6460 if (mixed_encoding) {
6461 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
6462 } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
6463 // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily.
6464 bool valid_string_in_modifier_encoding = true;
6465
6466 if (!valid_string_in_modifier_encoding) {
6467 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
6468 }
6469 } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
6470 // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now.
6471 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
6472 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source));
6473 }
6474 }
6475
6476 // We've determined the encoding would naturally be EUC-JP and there is no need to force the encoding to anything else.
6477 return flags;
6478}
6479
6486static pm_node_flags_t
6487parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags) {
6488 // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report.
6489 bool valid_unicode_range = true;
6490 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) {
6491 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source));
6492 return flags;
6493 }
6494
6495 // US-ASCII strings do not admit multi-byte character literals. However, character escape sequences corresponding
6496 // to multi-byte characters are allowed.
6497 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) {
6498 // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the
6499 // following error message appearing twice. We do the same for compatibility.
6500 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
6501 }
6502
6511 if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
6512 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY);
6513 }
6514
6515 if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
6516 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY);
6517 }
6518
6519 if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
6520 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY);
6521 }
6522
6523 if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
6524 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY);
6525 }
6526
6527 // At this point no encoding modifiers will be present on the regular expression as they would have already
6528 // been processed. Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all
6529 // regular expressions without an encoding modifier appearing in source are eligible for "downgrading" to US-ASCII.
6530 if (ascii_only) {
6531 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
6532 }
6533
6534 // A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string
6535 // or by specifying a modifier.
6536 //
6537 // NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points.
6538 if (parser->explicit_encoding != NULL) {
6539 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
6540 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
6541 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6542 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
6543 }
6544 }
6545
6546 return 0;
6547}
6548
6553static pm_symbol_node_t *
6554pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
6555 uint32_t start = opening == NULL ? PM_TOKEN_START(parser, value) : PM_TOKEN_START(parser, opening);
6556 uint32_t end = closing == NULL ? PM_TOKEN_END(parser, value) : PM_TOKEN_END(parser, closing);
6557
6558 return pm_symbol_node_new(
6559 parser->arena,
6560 ++parser->node_id,
6561 PM_NODE_FLAG_STATIC_LITERAL | flags,
6562 ((pm_location_t) { .start = start, .length = U32(end - start) }),
6563 NTOK2LOC(parser, opening),
6564 NTOK2LOC(parser, value),
6565 NTOK2LOC(parser, closing),
6566 *unescaped
6567 );
6568}
6569
6573static inline pm_symbol_node_t *
6574pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
6575 return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
6576}
6577
6581static pm_symbol_node_t *
6582pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
6583 pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
6584 parser->current_string = PM_STRING_EMPTY;
6585 return node;
6586}
6587
6591static pm_symbol_node_t *
6592pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
6593 assert(token->type == PM_TOKEN_LABEL);
6594
6595 pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
6596 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
6597 pm_symbol_node_t *node = pm_symbol_node_create(parser, NULL, &label, &closing);
6598
6599 assert((label.end - label.start) >= 0);
6600 pm_string_shared_init(&node->unescaped, label.start, label.end);
6601 pm_node_flag_set(UP(node), parse_symbol_encoding(parser, &label, &node->unescaped, false));
6602
6603 return node;
6604}
6605
6609static pm_symbol_node_t *
6610pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
6611 pm_symbol_node_t *node = pm_symbol_node_new(
6612 parser->arena,
6613 ++parser->node_id,
6614 PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
6615 PM_LOCATION_INIT_UNSET,
6616 ((pm_location_t) { 0 }),
6617 ((pm_location_t) { 0 }),
6618 ((pm_location_t) { 0 }),
6619 ((pm_string_t) { 0 })
6620 );
6621
6622 pm_string_constant_init(&node->unescaped, content, strlen(content));
6623 return node;
6624}
6625
6629static bool
6630pm_symbol_node_label_p(const pm_parser_t *parser, const pm_node_t *node) {
6631 const pm_location_t *location = NULL;
6632
6633 switch (PM_NODE_TYPE(node)) {
6634 case PM_SYMBOL_NODE: {
6635 const pm_symbol_node_t *cast = (pm_symbol_node_t *) node;
6636 if (cast->closing_loc.length > 0) {
6637 location = &cast->closing_loc;
6638 }
6639 break;
6640 }
6641 case PM_INTERPOLATED_SYMBOL_NODE: {
6642 const pm_interpolated_symbol_node_t *cast = (pm_interpolated_symbol_node_t *) node;
6643 if (cast->closing_loc.length > 0) {
6644 location = &cast->closing_loc;
6645 }
6646 break;
6647 }
6648 default:
6649 return false;
6650 }
6651
6652 return (location != NULL) && (parser->start[PM_LOCATION_END(location) - 1] == ':');
6653}
6654
6658static pm_symbol_node_t *
6659pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
6660 pm_symbol_node_t *new_node = pm_symbol_node_new(
6661 parser->arena,
6662 ++parser->node_id,
6663 PM_NODE_FLAG_STATIC_LITERAL,
6664 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
6665 TOK2LOC(parser, opening),
6666 node->content_loc,
6667 TOK2LOC(parser, closing),
6668 node->unescaped
6669 );
6670
6671 pm_token_t content = {
6672 .type = PM_TOKEN_IDENTIFIER,
6673 .start = parser->start + node->content_loc.start,
6674 .end = parser->start + node->content_loc.start + node->content_loc.length
6675 };
6676
6677 pm_node_flag_set(UP(new_node), parse_symbol_encoding(parser, &content, &node->unescaped, true));
6678
6679 /* The old node is arena-allocated so no explicit free is needed. */
6680 return new_node;
6681}
6682
6686static pm_string_node_t *
6687pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
6688 pm_node_flags_t flags = 0;
6689
6690 switch (parser->frozen_string_literal) {
6691 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6692 flags = PM_STRING_FLAGS_MUTABLE;
6693 break;
6694 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6695 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6696 break;
6697 }
6698
6699 pm_string_node_t *new_node = pm_string_node_new(
6700 parser->arena,
6701 ++parser->node_id,
6702 flags,
6703 PM_LOCATION_INIT_NODE(node),
6704 node->opening_loc,
6705 node->value_loc,
6706 node->closing_loc,
6707 node->unescaped
6708 );
6709
6710 /* The old node is arena-allocated so no explicit free is needed. */
6711 return new_node;
6712}
6713
6717static pm_true_node_t *
6718pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
6719 assert(token->type == PM_TOKEN_KEYWORD_TRUE);
6720
6721 return pm_true_node_new(
6722 parser->arena,
6723 ++parser->node_id,
6724 PM_NODE_FLAG_STATIC_LITERAL,
6725 PM_LOCATION_INIT_TOKEN(parser, token)
6726 );
6727}
6728
6732static pm_true_node_t *
6733pm_true_node_synthesized_create(pm_parser_t *parser) {
6734 return pm_true_node_new(
6735 parser->arena,
6736 ++parser->node_id,
6737 PM_NODE_FLAG_STATIC_LITERAL,
6738 PM_LOCATION_INIT_UNSET
6739 );
6740}
6741
6745static pm_undef_node_t *
6746pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
6747 assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
6748
6749 return pm_undef_node_new(
6750 parser->arena,
6751 ++parser->node_id,
6752 0,
6753 PM_LOCATION_INIT_TOKEN(parser, token),
6754 ((pm_node_list_t) { 0 }),
6755 TOK2LOC(parser, token)
6756 );
6757}
6758
6762static void
6763pm_undef_node_append(pm_arena_t *arena, pm_undef_node_t *node, pm_node_t *name) {
6764 PM_NODE_LENGTH_SET_NODE(node, name);
6765 pm_node_list_append(arena, &node->names, name);
6766}
6767
6771static pm_unless_node_t *
6772pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
6773 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6774 pm_node_t *end = statements == NULL ? predicate : UP(statements);
6775
6776 return pm_unless_node_new(
6777 parser->arena,
6778 ++parser->node_id,
6779 PM_NODE_FLAG_NEWLINE,
6780 PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, end),
6781 TOK2LOC(parser, keyword),
6782 predicate,
6783 NTOK2LOC(parser, then_keyword),
6784 statements,
6785 NULL,
6786 ((pm_location_t) { 0 })
6787 );
6788}
6789
6793static pm_unless_node_t *
6794pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
6795 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6796
6797 pm_statements_node_t *statements = pm_statements_node_create(parser);
6798 pm_statements_node_body_append(parser, statements, statement, true);
6799
6800 return pm_unless_node_new(
6801 parser->arena,
6802 ++parser->node_id,
6803 PM_NODE_FLAG_NEWLINE,
6804 PM_LOCATION_INIT_NODES(statement, predicate),
6805 TOK2LOC(parser, unless_keyword),
6806 predicate,
6807 ((pm_location_t) { 0 }),
6808 statements,
6809 NULL,
6810 ((pm_location_t) { 0 })
6811 );
6812}
6813
6814static inline void
6815pm_unless_node_end_keyword_loc_set(const pm_parser_t *parser, pm_unless_node_t *node, const pm_token_t *end_keyword) {
6816 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
6817 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
6818}
6819
6825static void
6826pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statements) {
6827 assert(parser->current_block_exits != NULL);
6828
6829 // All of the block exits that we want to remove should be within the
6830 // statements, and since we are modifying the statements, we shouldn't have
6831 // to check the end location.
6832 uint32_t start = statements->base.location.start;
6833
6834 for (size_t index = parser->current_block_exits->size; index > 0; index--) {
6835 pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
6836 if (block_exit->location.start < start) break;
6837
6838 // Implicitly remove from the list by lowering the size.
6839 parser->current_block_exits->size--;
6840 }
6841}
6842
6846static pm_until_node_t *
6847pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6848 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6849
6850 return pm_until_node_new(
6851 parser->arena,
6852 ++parser->node_id,
6853 flags,
6854 PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
6855 TOK2LOC(parser, keyword),
6856 NTOK2LOC(parser, do_keyword),
6857 TOK2LOC(parser, closing),
6858 predicate,
6859 statements
6860 );
6861}
6862
6866static pm_until_node_t *
6867pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6868 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6869 pm_loop_modifier_block_exits(parser, statements);
6870
6871 return pm_until_node_new(
6872 parser->arena,
6873 ++parser->node_id,
6874 flags,
6875 PM_LOCATION_INIT_NODES(statements, predicate),
6876 TOK2LOC(parser, keyword),
6877 ((pm_location_t) { 0 }),
6878 ((pm_location_t) { 0 }),
6879 predicate,
6880 statements
6881 );
6882}
6883
6887static pm_when_node_t *
6888pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
6889 return pm_when_node_new(
6890 parser->arena,
6891 ++parser->node_id,
6892 0,
6893 PM_LOCATION_INIT_TOKEN(parser, keyword),
6894 TOK2LOC(parser, keyword),
6895 ((pm_node_list_t) { 0 }),
6896 ((pm_location_t) { 0 }),
6897 NULL
6898 );
6899}
6900
6904static void
6905pm_when_node_conditions_append(pm_arena_t *arena, pm_when_node_t *node, pm_node_t *condition) {
6906 PM_NODE_LENGTH_SET_NODE(node, condition);
6907 pm_node_list_append(arena, &node->conditions, condition);
6908}
6909
6913static inline void
6914pm_when_node_then_keyword_loc_set(const pm_parser_t *parser, pm_when_node_t *node, const pm_token_t *then_keyword) {
6915 PM_NODE_LENGTH_SET_TOKEN(parser, node, then_keyword);
6916 node->then_keyword_loc = TOK2LOC(parser, then_keyword);
6917}
6918
6922static void
6923pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
6924 if (PM_NODE_END(statements) > PM_NODE_END(node)) {
6925 PM_NODE_LENGTH_SET_NODE(node, statements);
6926 }
6927
6928 node->statements = statements;
6929}
6930
6934static pm_while_node_t *
6935pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6936 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6937
6938 return pm_while_node_new(
6939 parser->arena,
6940 ++parser->node_id,
6941 flags,
6942 PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
6943 TOK2LOC(parser, keyword),
6944 NTOK2LOC(parser, do_keyword),
6945 TOK2LOC(parser, closing),
6946 predicate,
6947 statements
6948 );
6949}
6950
6954static pm_while_node_t *
6955pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6956 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6957 pm_loop_modifier_block_exits(parser, statements);
6958
6959 return pm_while_node_new(
6960 parser->arena,
6961 ++parser->node_id,
6962 flags,
6963 PM_LOCATION_INIT_NODES(statements, predicate),
6964 TOK2LOC(parser, keyword),
6965 ((pm_location_t) { 0 }),
6966 ((pm_location_t) { 0 }),
6967 predicate,
6968 statements
6969 );
6970}
6971
6975static pm_while_node_t *
6976pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
6977 return pm_while_node_new(
6978 parser->arena,
6979 ++parser->node_id,
6980 0,
6981 PM_LOCATION_INIT_UNSET,
6982 ((pm_location_t) { 0 }),
6983 ((pm_location_t) { 0 }),
6984 ((pm_location_t) { 0 }),
6985 predicate,
6986 statements
6987 );
6988}
6989
6994static pm_x_string_node_t *
6995pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
6996 return pm_x_string_node_new(
6997 parser->arena,
6998 ++parser->node_id,
6999 PM_STRING_FLAGS_FROZEN,
7000 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
7001 TOK2LOC(parser, opening),
7002 TOK2LOC(parser, content),
7003 TOK2LOC(parser, closing),
7004 *unescaped
7005 );
7006}
7007
7011static inline pm_x_string_node_t *
7012pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7013 return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7014}
7015
7019static pm_yield_node_t *
7020pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
7021 uint32_t start = PM_TOKEN_START(parser, keyword);
7022 uint32_t end;
7023
7024 if (rparen_loc->length > 0) {
7025 end = PM_LOCATION_END(rparen_loc);
7026 } else if (arguments != NULL) {
7027 end = PM_NODE_END(arguments);
7028 } else if (lparen_loc->length > 0) {
7029 end = PM_LOCATION_END(lparen_loc);
7030 } else {
7031 end = PM_TOKEN_END(parser, keyword);
7032 }
7033
7034 return pm_yield_node_new(
7035 parser->arena,
7036 ++parser->node_id,
7037 0,
7038 ((pm_location_t) { .start = start, .length = U32(end - start) }),
7039 TOK2LOC(parser, keyword),
7040 *lparen_loc,
7041 arguments,
7042 *rparen_loc
7043 );
7044}
7045
7050static int
7051pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
7052 pm_scope_t *scope = parser->current_scope;
7053 int depth = 0;
7054
7055 while (scope != NULL) {
7056 if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
7057 if (scope->closed) break;
7058
7059 scope = scope->previous;
7060 depth++;
7061 }
7062
7063 return -1;
7064}
7065
7071static inline int
7072pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
7073 return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
7074}
7075
7079static inline void
7080pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7081 pm_locals_write(&parser->current_scope->locals, constant_id, U32(start - parser->start), U32(end - start), reads);
7082}
7083
7087static pm_constant_id_t
7088pm_parser_local_add_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7089 pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, start, end);
7090 if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
7091 return constant_id;
7092}
7093
7097static inline pm_constant_id_t
7098pm_parser_local_add_location(pm_parser_t *parser, pm_location_t *location, uint32_t reads) {
7099 return pm_parser_local_add_raw(parser, parser->start + location->start, parser->start + location->start + location->length, reads);
7100}
7101
7105static inline pm_constant_id_t
7106pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
7107 return pm_parser_local_add_raw(parser, token->start, token->end, reads);
7108}
7109
7113static pm_constant_id_t
7114pm_parser_local_add_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
7115 pm_constant_id_t constant_id = pm_parser_constant_id_owned(parser, start, length);
7116 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
7117 return constant_id;
7118}
7119
7123static pm_constant_id_t
7124pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
7125 pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
7126 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
7127 return constant_id;
7128}
7129
7137static bool
7138pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
7139 // We want to check whether the parameter name is a numbered parameter or
7140 // not.
7141 pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, name), PM_TOKEN_LENGTH(name));
7142
7143 // Otherwise we'll fetch the constant id for the parameter name and check
7144 // whether it's already in the current scope.
7145 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
7146
7147 if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
7148 // Add an error if the parameter doesn't start with _ and has been seen before
7149 if ((name->start < name->end) && (*name->start != '_')) {
7150 pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
7151 }
7152 return true;
7153 }
7154 return false;
7155}
7156
7160static void
7161pm_parser_scope_pop(pm_parser_t *parser) {
7162 pm_scope_t *scope = parser->current_scope;
7163 parser->current_scope = scope->previous;
7164 pm_locals_free(&scope->locals);
7165 xfree_sized(scope, sizeof(pm_scope_t));
7166}
7167
7168/******************************************************************************/
7169/* Stack helpers */
7170/******************************************************************************/
7171
7175static inline void
7176pm_state_stack_push(pm_state_stack_t *stack, bool value) {
7177 *stack = (*stack << 1) | (value & 1);
7178}
7179
7183static inline void
7184pm_state_stack_pop(pm_state_stack_t *stack) {
7185 *stack >>= 1;
7186}
7187
7191static inline bool
7192pm_state_stack_p(const pm_state_stack_t *stack) {
7193 return *stack & 1;
7194}
7195
7196static inline void
7197pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
7198 // Use the negation of the value to prevent stack overflow.
7199 pm_state_stack_push(&parser->accepts_block_stack, !value);
7200}
7201
7202static inline void
7203pm_accepts_block_stack_pop(pm_parser_t *parser) {
7204 pm_state_stack_pop(&parser->accepts_block_stack);
7205}
7206
7207static inline bool
7208pm_accepts_block_stack_p(pm_parser_t *parser) {
7209 return !pm_state_stack_p(&parser->accepts_block_stack);
7210}
7211
7212static inline void
7213pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
7214 pm_state_stack_push(&parser->do_loop_stack, value);
7215}
7216
7217static inline void
7218pm_do_loop_stack_pop(pm_parser_t *parser) {
7219 pm_state_stack_pop(&parser->do_loop_stack);
7220}
7221
7222static inline bool
7223pm_do_loop_stack_p(pm_parser_t *parser) {
7224 return pm_state_stack_p(&parser->do_loop_stack);
7225}
7226
7227/******************************************************************************/
7228/* Lexer check helpers */
7229/******************************************************************************/
7230
7235static inline uint8_t
7236peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
7237 if (cursor < parser->end) {
7238 return *cursor;
7239 } else {
7240 return '\0';
7241 }
7242}
7243
7249static inline uint8_t
7250peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
7251 return peek_at(parser, parser->current.end + offset);
7252}
7253
7258static inline uint8_t
7259peek(const pm_parser_t *parser) {
7260 return peek_at(parser, parser->current.end);
7261}
7262
7267static inline bool
7268match(pm_parser_t *parser, uint8_t value) {
7269 if (peek(parser) == value) {
7270 parser->current.end++;
7271 return true;
7272 }
7273 return false;
7274}
7275
7280static inline size_t
7281match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
7282 if (peek_at(parser, cursor) == '\n') {
7283 return 1;
7284 }
7285 if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
7286 return 2;
7287 }
7288 return 0;
7289}
7290
7296static inline size_t
7297match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
7298 return match_eol_at(parser, parser->current.end + offset);
7299}
7300
7306static inline size_t
7307match_eol(pm_parser_t *parser) {
7308 return match_eol_at(parser, parser->current.end);
7309}
7310
7314static inline const uint8_t *
7315next_newline(const uint8_t *cursor, ptrdiff_t length) {
7316 assert(length >= 0);
7317
7318 // Note that it's okay for us to use memchr here to look for \n because none
7319 // of the encodings that we support have \n as a component of a multi-byte
7320 // character.
7321 return memchr(cursor, '\n', (size_t) length);
7322}
7323
7327static inline bool
7328ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
7329 return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
7330}
7331
7336static bool
7337parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
7338 const pm_encoding_t *encoding = pm_encoding_find(start, end);
7339
7340 if (encoding != NULL) {
7341 if (parser->encoding != encoding) {
7342 parser->encoding = encoding;
7343 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
7344 }
7345
7346 parser->encoding_changed = (encoding != PM_ENCODING_UTF_8_ENTRY);
7347 return true;
7348 }
7349
7350 return false;
7351}
7352
7357static void
7358parser_lex_magic_comment_encoding(pm_parser_t *parser) {
7359 const uint8_t *cursor = parser->current.start + 1;
7360 const uint8_t *end = parser->current.end;
7361
7362 bool separator = false;
7363 while (true) {
7364 if (end - cursor <= 6) return;
7365 switch (cursor[6]) {
7366 case 'C': case 'c': cursor += 6; continue;
7367 case 'O': case 'o': cursor += 5; continue;
7368 case 'D': case 'd': cursor += 4; continue;
7369 case 'I': case 'i': cursor += 3; continue;
7370 case 'N': case 'n': cursor += 2; continue;
7371 case 'G': case 'g': cursor += 1; continue;
7372 case '=': case ':':
7373 separator = true;
7374 cursor += 6;
7375 break;
7376 default:
7377 cursor += 6;
7378 if (pm_char_is_whitespace(*cursor)) break;
7379 continue;
7380 }
7381 if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
7382 separator = false;
7383 }
7384
7385 while (true) {
7386 do {
7387 if (++cursor >= end) return;
7388 } while (pm_char_is_whitespace(*cursor));
7389
7390 if (separator) break;
7391 if (*cursor != '=' && *cursor != ':') return;
7392
7393 separator = true;
7394 cursor++;
7395 }
7396
7397 const uint8_t *value_start = cursor;
7398 while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
7399
7400 if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
7401 // If we were unable to parse the encoding value, then we've got an
7402 // issue because we didn't understand the encoding that the user was
7403 // trying to use. In this case we'll keep using the default encoding but
7404 // add an error to the parser to indicate an unsuccessful parse.
7405 pm_parser_err(parser, U32(value_start - parser->start), U32(cursor - value_start), PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
7406 }
7407}
7408
7409typedef enum {
7410 PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE,
7411 PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE,
7412 PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID
7413} pm_magic_comment_boolean_value_t;
7414
7419static pm_magic_comment_boolean_value_t
7420parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t value_length) {
7421 if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "true", 4) == 0) {
7422 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE;
7423 } else if (value_length == 5 && pm_strncasecmp(value_start, (const uint8_t *) "false", 5) == 0) {
7424 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE;
7425 } else {
7426 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID;
7427 }
7428}
7429
7430static inline bool
7431pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
7432 return b == '\'' || b == '"' || b == ':' || b == ';';
7433}
7434
7440static inline const uint8_t *
7441parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
7442 while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
7443 if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
7444 return cursor;
7445 }
7446 cursor++;
7447 }
7448 return NULL;
7449}
7450
7461static inline bool
7462parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
7463 bool result = true;
7464
7465 const uint8_t *start = parser->current.start + 1;
7466 const uint8_t *end = parser->current.end;
7467 if (end - start <= 7) return false;
7468
7469 const uint8_t *cursor;
7470 bool indicator = false;
7471
7472 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
7473 start = cursor + 3;
7474
7475 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
7476 end = cursor;
7477 indicator = true;
7478 } else {
7479 // If we have a start marker but not an end marker, then we cannot
7480 // have a magic comment.
7481 return false;
7482 }
7483 }
7484
7485 cursor = start;
7486 while (cursor < end) {
7487 while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
7488
7489 const uint8_t *key_start = cursor;
7490 while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
7491
7492 const uint8_t *key_end = cursor;
7493 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7494 if (cursor == end) break;
7495
7496 if (*cursor == ':') {
7497 cursor++;
7498 } else {
7499 if (!indicator) return false;
7500 continue;
7501 }
7502
7503 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7504 if (cursor == end) break;
7505
7506 const uint8_t *value_start;
7507 const uint8_t *value_end;
7508
7509 if (*cursor == '"') {
7510 value_start = ++cursor;
7511 for (; cursor < end && *cursor != '"'; cursor++) {
7512 if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
7513 }
7514 value_end = cursor;
7515 if (cursor < end && *cursor == '"') cursor++;
7516 } else {
7517 value_start = cursor;
7518 while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
7519 value_end = cursor;
7520 }
7521
7522 if (indicator) {
7523 while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
7524 } else {
7525 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7526 if (cursor != end) return false;
7527 }
7528
7529 // Here, we need to do some processing on the key to swap out dashes for
7530 // underscores. We only need to do this if there _is_ a dash in the key.
7531 pm_string_t key;
7532 const size_t key_length = (size_t) (key_end - key_start);
7533 const uint8_t *dash = pm_memchr(key_start, '-', key_length, parser->encoding_changed, parser->encoding);
7534
7535 if (dash == NULL) {
7536 pm_string_shared_init(&key, key_start, key_end);
7537 } else {
7538 uint8_t *buffer = xmalloc(key_length);
7539 if (buffer == NULL) break;
7540
7541 memcpy(buffer, key_start, key_length);
7542 buffer[dash - key_start] = '_';
7543
7544 while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
7545 buffer[dash - key_start] = '_';
7546 }
7547
7548 pm_string_owned_init(&key, buffer, key_length);
7549 }
7550
7551 // Finally, we can start checking the key against the list of known
7552 // magic comment keys, and potentially change state based on that.
7553 const uint8_t *key_source = pm_string_source(&key);
7554 uint32_t value_length = (uint32_t) (value_end - value_start);
7555
7556 // We only want to attempt to compare against encoding comments if it's
7557 // the first line in the file (or the second in the case of a shebang).
7558 if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
7559 if (
7560 (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
7561 (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
7562 ) {
7563 result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
7564 }
7565 }
7566
7567 if (key_length == 11) {
7568 if (pm_strncasecmp(key_source, (const uint8_t *) "warn_indent", 11) == 0) {
7569 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
7570 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
7571 PM_PARSER_WARN_TOKEN_FORMAT(
7572 parser,
7573 &parser->current,
7574 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7575 (int) key_length,
7576 (const char *) key_source,
7577 (int) value_length,
7578 (const char *) value_start
7579 );
7580 break;
7581 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
7582 parser->warn_mismatched_indentation = false;
7583 break;
7584 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
7585 parser->warn_mismatched_indentation = true;
7586 break;
7587 }
7588 }
7589 } else if (key_length == 21) {
7590 if (pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
7591 // We only want to handle frozen string literal comments if it's
7592 // before any semantic tokens have been seen.
7593 if (semantic_token_seen) {
7594 pm_parser_warn_token(parser, &parser->current, PM_WARN_IGNORED_FROZEN_STRING_LITERAL);
7595 } else {
7596 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
7597 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
7598 PM_PARSER_WARN_TOKEN_FORMAT(
7599 parser,
7600 &parser->current,
7601 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7602 (int) key_length,
7603 (const char *) key_source,
7604 (int) value_length,
7605 (const char *) value_start
7606 );
7607 break;
7608 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
7610 break;
7611 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
7613 break;
7614 }
7615 }
7616 }
7617 } else if (key_length == 24) {
7618 if (pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
7619 const uint8_t *cursor = parser->current.start;
7620 while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
7621
7622 if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
7623 pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
7624 } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
7625 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
7626 } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
7627 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
7628 } else if (value_length == 23 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_everything", 23) == 0) {
7629 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING);
7630 } else if (value_length == 17 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_copy", 17) == 0) {
7631 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY);
7632 } else {
7633 PM_PARSER_WARN_TOKEN_FORMAT(
7634 parser,
7635 &parser->current,
7636 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7637 (int) key_length,
7638 (const char *) key_source,
7639 (int) value_length,
7640 (const char *) value_start
7641 );
7642 }
7643 }
7644 }
7645
7646 // When we're done, we want to free the string in case we had to
7647 // allocate memory for it.
7648 pm_string_free(&key);
7649
7650 // Allocate a new magic comment node to append to the parser's list.
7652 if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) {
7653 magic_comment->key = (pm_location_t) { .start = U32(key_start - parser->start), .length = U32(key_length) };
7654 magic_comment->value = (pm_location_t) { .start = U32(value_start - parser->start), .length = value_length };
7655 pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
7656 }
7657 }
7658
7659 return result;
7660}
7661
7662/******************************************************************************/
7663/* Context manipulations */
7664/******************************************************************************/
7665
7666static const uint32_t context_terminators[] = {
7667 [PM_CONTEXT_NONE] = 0,
7668 [PM_CONTEXT_BEGIN] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7669 [PM_CONTEXT_BEGIN_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7670 [PM_CONTEXT_BEGIN_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7671 [PM_CONTEXT_BEGIN_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7672 [PM_CONTEXT_BLOCK_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
7673 [PM_CONTEXT_BLOCK_KEYWORDS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7674 [PM_CONTEXT_BLOCK_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7675 [PM_CONTEXT_BLOCK_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7676 [PM_CONTEXT_BLOCK_PARAMETERS] = (1U << PM_TOKEN_PIPE),
7677 [PM_CONTEXT_BLOCK_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7678 [PM_CONTEXT_CASE_WHEN] = (1U << PM_TOKEN_KEYWORD_WHEN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
7679 [PM_CONTEXT_CASE_IN] = (1U << PM_TOKEN_KEYWORD_IN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
7680 [PM_CONTEXT_CLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7681 [PM_CONTEXT_CLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7682 [PM_CONTEXT_CLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7683 [PM_CONTEXT_CLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7684 [PM_CONTEXT_DEF] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7685 [PM_CONTEXT_DEF_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7686 [PM_CONTEXT_DEF_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7687 [PM_CONTEXT_DEF_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7688 [PM_CONTEXT_DEF_PARAMS] = (1U << PM_TOKEN_EOF),
7689 [PM_CONTEXT_DEFINED] = (1U << PM_TOKEN_EOF),
7690 [PM_CONTEXT_DEFAULT_PARAMS] = (1U << PM_TOKEN_COMMA) | (1U << PM_TOKEN_PARENTHESIS_RIGHT),
7691 [PM_CONTEXT_ELSE] = (1U << PM_TOKEN_KEYWORD_END),
7692 [PM_CONTEXT_ELSIF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
7693 [PM_CONTEXT_EMBEXPR] = (1U << PM_TOKEN_EMBEXPR_END),
7694 [PM_CONTEXT_FOR] = (1U << PM_TOKEN_KEYWORD_END),
7695 [PM_CONTEXT_FOR_INDEX] = (1U << PM_TOKEN_KEYWORD_IN),
7696 [PM_CONTEXT_IF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
7697 [PM_CONTEXT_LAMBDA_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
7698 [PM_CONTEXT_LAMBDA_DO_END] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7699 [PM_CONTEXT_LAMBDA_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7700 [PM_CONTEXT_LAMBDA_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7701 [PM_CONTEXT_LAMBDA_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7702 [PM_CONTEXT_LOOP_PREDICATE] = (1U << PM_TOKEN_KEYWORD_DO) | (1U << PM_TOKEN_KEYWORD_THEN),
7703 [PM_CONTEXT_MAIN] = (1U << PM_TOKEN_EOF),
7704 [PM_CONTEXT_MODULE] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7705 [PM_CONTEXT_MODULE_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7706 [PM_CONTEXT_MODULE_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7707 [PM_CONTEXT_MODULE_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7708 [PM_CONTEXT_MULTI_TARGET] = (1U << PM_TOKEN_EOF),
7709 [PM_CONTEXT_PARENS] = (1U << PM_TOKEN_PARENTHESIS_RIGHT),
7710 [PM_CONTEXT_POSTEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
7711 [PM_CONTEXT_PREDICATE] = (1U << PM_TOKEN_KEYWORD_THEN) | (1U << PM_TOKEN_NEWLINE) | (1U << PM_TOKEN_SEMICOLON),
7712 [PM_CONTEXT_PREEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
7713 [PM_CONTEXT_RESCUE_MODIFIER] = (1U << PM_TOKEN_EOF),
7714 [PM_CONTEXT_SCLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7715 [PM_CONTEXT_SCLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7716 [PM_CONTEXT_SCLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7717 [PM_CONTEXT_SCLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7718 [PM_CONTEXT_TERNARY] = (1U << PM_TOKEN_EOF),
7719 [PM_CONTEXT_UNLESS] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7720 [PM_CONTEXT_UNTIL] = (1U << PM_TOKEN_KEYWORD_END),
7721 [PM_CONTEXT_WHILE] = (1U << PM_TOKEN_KEYWORD_END),
7722};
7723
7724static inline bool
7725context_terminator(pm_context_t context, pm_token_t *token) {
7726 return token->type < 32 && (context_terminators[context] & (1U << token->type));
7727}
7728
7733static pm_context_t
7734context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
7735 pm_context_node_t *context_node = parser->current_context;
7736
7737 while (context_node != NULL) {
7738 if (context_terminator(context_node->context, token)) return context_node->context;
7739 context_node = context_node->prev;
7740 }
7741
7742 return PM_CONTEXT_NONE;
7743}
7744
7745static bool
7746context_push(pm_parser_t *parser, pm_context_t context) {
7747 pm_context_node_t *context_node = (pm_context_node_t *) xmalloc(sizeof(pm_context_node_t));
7748 if (context_node == NULL) return false;
7749
7750 *context_node = (pm_context_node_t) { .context = context, .prev = NULL };
7751
7752 if (parser->current_context == NULL) {
7753 parser->current_context = context_node;
7754 } else {
7755 context_node->prev = parser->current_context;
7756 parser->current_context = context_node;
7757 }
7758
7759 return true;
7760}
7761
7762static void
7763context_pop(pm_parser_t *parser) {
7764 pm_context_node_t *prev = parser->current_context->prev;
7765 xfree_sized(parser->current_context, sizeof(pm_context_node_t));
7766 parser->current_context = prev;
7767}
7768
7769static bool
7770context_p(const pm_parser_t *parser, pm_context_t context) {
7771 pm_context_node_t *context_node = parser->current_context;
7772
7773 while (context_node != NULL) {
7774 if (context_node->context == context) return true;
7775 context_node = context_node->prev;
7776 }
7777
7778 return false;
7779}
7780
7781static bool
7782context_def_p(const pm_parser_t *parser) {
7783 pm_context_node_t *context_node = parser->current_context;
7784
7785 while (context_node != NULL) {
7786 switch (context_node->context) {
7787 case PM_CONTEXT_DEF:
7792 return true;
7793 case PM_CONTEXT_CLASS:
7797 case PM_CONTEXT_MODULE:
7801 case PM_CONTEXT_SCLASS:
7805 return false;
7806 default:
7807 context_node = context_node->prev;
7808 }
7809 }
7810
7811 return false;
7812}
7813
7818static const char *
7819context_human(pm_context_t context) {
7820 switch (context) {
7821 case PM_CONTEXT_NONE:
7822 assert(false && "unreachable");
7823 return "";
7824 case PM_CONTEXT_BEGIN: return "begin statement";
7825 case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
7826 case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
7827 case PM_CONTEXT_BLOCK_PARAMETERS: return "'|'..'|' block parameter";
7828 case PM_CONTEXT_CASE_WHEN: return "'when' clause";
7829 case PM_CONTEXT_CASE_IN: return "'in' clause";
7830 case PM_CONTEXT_CLASS: return "class definition";
7831 case PM_CONTEXT_DEF: return "method definition";
7832 case PM_CONTEXT_DEF_PARAMS: return "method parameters";
7833 case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
7834 case PM_CONTEXT_DEFINED: return "'defined?' expression";
7835 case PM_CONTEXT_ELSE:
7842 case PM_CONTEXT_SCLASS_ELSE: return "'else' clause";
7843 case PM_CONTEXT_ELSIF: return "'elsif' clause";
7844 case PM_CONTEXT_EMBEXPR: return "embedded expression";
7851 case PM_CONTEXT_SCLASS_ENSURE: return "'ensure' clause";
7852 case PM_CONTEXT_FOR: return "for loop";
7853 case PM_CONTEXT_FOR_INDEX: return "for loop index";
7854 case PM_CONTEXT_IF: return "if statement";
7855 case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
7856 case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
7857 case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
7858 case PM_CONTEXT_MAIN: return "top level context";
7859 case PM_CONTEXT_MODULE: return "module definition";
7860 case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
7861 case PM_CONTEXT_PARENS: return "parentheses";
7862 case PM_CONTEXT_POSTEXE: return "'END' block";
7863 case PM_CONTEXT_PREDICATE: return "predicate";
7864 case PM_CONTEXT_PREEXE: return "'BEGIN' block";
7872 case PM_CONTEXT_SCLASS_RESCUE: return "'rescue' clause";
7873 case PM_CONTEXT_SCLASS: return "singleton class definition";
7874 case PM_CONTEXT_TERNARY: return "ternary expression";
7875 case PM_CONTEXT_UNLESS: return "unless statement";
7876 case PM_CONTEXT_UNTIL: return "until statement";
7877 case PM_CONTEXT_WHILE: return "while statement";
7878 }
7879
7880 assert(false && "unreachable");
7881 return "";
7882}
7883
7884/******************************************************************************/
7885/* Specific token lexers */
7886/******************************************************************************/
7887
7888static inline void
7889pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
7890 if (invalid != NULL) {
7891 pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
7892 pm_parser_err(parser, U32(invalid - parser->start), 1, diag_id);
7893 }
7894}
7895
7896static size_t
7897pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
7898 const uint8_t *invalid = NULL;
7899 size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
7900 pm_strspn_number_validate(parser, string, length, invalid);
7901 return length;
7902}
7903
7904static size_t
7905pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
7906 const uint8_t *invalid = NULL;
7907 size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
7908 pm_strspn_number_validate(parser, string, length, invalid);
7909 return length;
7910}
7911
7912static size_t
7913pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
7914 const uint8_t *invalid = NULL;
7915 size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
7916 pm_strspn_number_validate(parser, string, length, invalid);
7917 return length;
7918}
7919
7920static size_t
7921pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
7922 const uint8_t *invalid = NULL;
7923 size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
7924 pm_strspn_number_validate(parser, string, length, invalid);
7925 return length;
7926}
7927
7928static pm_token_type_t
7929lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
7930 pm_token_type_t type = PM_TOKEN_INTEGER;
7931
7932 // Here we're going to attempt to parse the optional decimal portion of a
7933 // float. If it's not there, then it's okay and we'll just continue on.
7934 if (peek(parser) == '.') {
7935 if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
7936 parser->current.end += 2;
7937 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
7938 type = PM_TOKEN_FLOAT;
7939 } else {
7940 // If we had a . and then something else, then it's not a float
7941 // suffix on a number it's a method call or something else.
7942 return type;
7943 }
7944 }
7945
7946 // Here we're going to attempt to parse the optional exponent portion of a
7947 // float. If it's not there, it's okay and we'll just continue on.
7948 if ((peek(parser) == 'e') || (peek(parser) == 'E')) {
7949 if ((peek_offset(parser, 1) == '+') || (peek_offset(parser, 1) == '-')) {
7950 parser->current.end += 2;
7951
7952 if (pm_char_is_decimal_digit(peek(parser))) {
7953 parser->current.end++;
7954 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
7955 } else {
7956 pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
7957 }
7958 } else if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
7959 parser->current.end++;
7960 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
7961 } else {
7962 return type;
7963 }
7964
7965 *seen_e = true;
7966 type = PM_TOKEN_FLOAT;
7967 }
7968
7969 return type;
7970}
7971
7972static pm_token_type_t
7973lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
7974 pm_token_type_t type = PM_TOKEN_INTEGER;
7975 *seen_e = false;
7976
7977 if (peek_offset(parser, -1) == '0') {
7978 switch (*parser->current.end) {
7979 // 0d1111 is a decimal number
7980 case 'd':
7981 case 'D':
7982 parser->current.end++;
7983 if (pm_char_is_decimal_digit(peek(parser))) {
7984 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
7985 } else {
7986 match(parser, '_');
7987 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
7988 }
7989
7990 break;
7991
7992 // 0b1111 is a binary number
7993 case 'b':
7994 case 'B':
7995 parser->current.end++;
7996 if (pm_char_is_binary_digit(peek(parser))) {
7997 parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
7998 } else {
7999 match(parser, '_');
8000 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
8001 }
8002
8003 parser->integer_base = PM_INTEGER_BASE_FLAGS_BINARY;
8004 break;
8005
8006 // 0o1111 is an octal number
8007 case 'o':
8008 case 'O':
8009 parser->current.end++;
8010 if (pm_char_is_octal_digit(peek(parser))) {
8011 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8012 } else {
8013 match(parser, '_');
8014 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
8015 }
8016
8017 parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
8018 break;
8019
8020 // 01111 is an octal number
8021 case '_':
8022 case '0':
8023 case '1':
8024 case '2':
8025 case '3':
8026 case '4':
8027 case '5':
8028 case '6':
8029 case '7':
8030 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8031 parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
8032 break;
8033
8034 // 0x1111 is a hexadecimal number
8035 case 'x':
8036 case 'X':
8037 parser->current.end++;
8038 if (pm_char_is_hexadecimal_digit(peek(parser))) {
8039 parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
8040 } else {
8041 match(parser, '_');
8042 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
8043 }
8044
8045 parser->integer_base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
8046 break;
8047
8048 // 0.xxx is a float
8049 case '.': {
8050 type = lex_optional_float_suffix(parser, seen_e);
8051 break;
8052 }
8053
8054 // 0exxx is a float
8055 case 'e':
8056 case 'E': {
8057 type = lex_optional_float_suffix(parser, seen_e);
8058 break;
8059 }
8060 }
8061 } else {
8062 // If it didn't start with a 0, then we'll lex as far as we can into a
8063 // decimal number.
8064 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8065
8066 // Afterward, we'll lex as far as we can into an optional float suffix.
8067 type = lex_optional_float_suffix(parser, seen_e);
8068 }
8069
8070 // At this point we have a completed number, but we want to provide the user
8071 // with a good experience if they put an additional .xxx fractional
8072 // component on the end, so we'll check for that here.
8073 if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8074 const uint8_t *fraction_start = parser->current.end;
8075 const uint8_t *fraction_end = parser->current.end + 2;
8076 fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
8077 pm_parser_err(parser, U32(fraction_start - parser->start), U32(fraction_end - fraction_start), PM_ERR_INVALID_NUMBER_FRACTION);
8078 }
8079
8080 return type;
8081}
8082
8083static pm_token_type_t
8084lex_numeric(pm_parser_t *parser) {
8085 pm_token_type_t type = PM_TOKEN_INTEGER;
8086 parser->integer_base = PM_INTEGER_BASE_FLAGS_DECIMAL;
8087
8088 if (parser->current.end < parser->end) {
8089 bool seen_e = false;
8090 type = lex_numeric_prefix(parser, &seen_e);
8091
8092 const uint8_t *end = parser->current.end;
8093 pm_token_type_t suffix_type = type;
8094
8095 if (type == PM_TOKEN_INTEGER) {
8096 if (match(parser, 'r')) {
8097 suffix_type = PM_TOKEN_INTEGER_RATIONAL;
8098
8099 if (match(parser, 'i')) {
8100 suffix_type = PM_TOKEN_INTEGER_RATIONAL_IMAGINARY;
8101 }
8102 } else if (match(parser, 'i')) {
8103 suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
8104 }
8105 } else {
8106 if (!seen_e && match(parser, 'r')) {
8107 suffix_type = PM_TOKEN_FLOAT_RATIONAL;
8108
8109 if (match(parser, 'i')) {
8110 suffix_type = PM_TOKEN_FLOAT_RATIONAL_IMAGINARY;
8111 }
8112 } else if (match(parser, 'i')) {
8113 suffix_type = PM_TOKEN_FLOAT_IMAGINARY;
8114 }
8115 }
8116
8117 const uint8_t b = peek(parser);
8118 if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
8119 parser->current.end = end;
8120 } else {
8121 type = suffix_type;
8122 }
8123 }
8124
8125 return type;
8126}
8127
8128static pm_token_type_t
8129lex_global_variable(pm_parser_t *parser) {
8130 if (parser->current.end >= parser->end) {
8131 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
8132 return PM_TOKEN_GLOBAL_VARIABLE;
8133 }
8134
8135 // True if multiple characters are allowed after the declaration of the
8136 // global variable. Not true when it starts with "$-".
8137 bool allow_multiple = true;
8138
8139 switch (*parser->current.end) {
8140 case '~': // $~: match-data
8141 case '*': // $*: argv
8142 case '$': // $$: pid
8143 case '?': // $?: last status
8144 case '!': // $!: error string
8145 case '@': // $@: error position
8146 case '/': // $/: input record separator
8147 case '\\': // $\: output record separator
8148 case ';': // $;: field separator
8149 case ',': // $,: output field separator
8150 case '.': // $.: last read line number
8151 case '=': // $=: ignorecase
8152 case ':': // $:: load path
8153 case '<': // $<: reading filename
8154 case '>': // $>: default output handle
8155 case '\"': // $": already loaded files
8156 parser->current.end++;
8157 return PM_TOKEN_GLOBAL_VARIABLE;
8158
8159 case '&': // $&: last match
8160 case '`': // $`: string before last match
8161 case '\'': // $': string after last match
8162 case '+': // $+: string matches last paren.
8163 parser->current.end++;
8164 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_BACK_REFERENCE;
8165
8166 case '0': {
8167 parser->current.end++;
8168 size_t width;
8169
8170 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
8171 do {
8172 parser->current.end += width;
8173 } while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
8174
8175 // $0 isn't allowed to be followed by anything.
8176 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8177 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->current, diag_id);
8178 }
8179
8180 return PM_TOKEN_GLOBAL_VARIABLE;
8181 }
8182
8183 case '1':
8184 case '2':
8185 case '3':
8186 case '4':
8187 case '5':
8188 case '6':
8189 case '7':
8190 case '8':
8191 case '9':
8192 parser->current.end += pm_strspn_decimal_digit(parser->current.end, parser->end - parser->current.end);
8193 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_NUMBERED_REFERENCE;
8194
8195 case '-':
8196 parser->current.end++;
8197 allow_multiple = false;
8199 default: {
8200 size_t width;
8201
8202 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
8203 do {
8204 parser->current.end += width;
8205 } while (allow_multiple && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
8206 } else if (pm_char_is_whitespace(peek(parser))) {
8207 // If we get here, then we have a $ followed by whitespace,
8208 // which is not allowed.
8209 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
8210 } else {
8211 // If we get here, then we have a $ followed by something that
8212 // isn't recognized as a global variable.
8213 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8214 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8215 PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), diag_id, (int) (PM_TOKEN_LENGTH(&parser->current) + U32(width)), (const char *) parser->current.start);
8216 }
8217
8218 return PM_TOKEN_GLOBAL_VARIABLE;
8219 }
8220 }
8221}
8222
8235static inline pm_token_type_t
8236lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
8237 if (memcmp(current_start, value, vlen) == 0) {
8238 pm_lex_state_t last_state = parser->lex_state;
8239
8240 if (parser->lex_state & PM_LEX_STATE_FNAME) {
8241 lex_state_set(parser, PM_LEX_STATE_ENDFN);
8242 } else {
8243 lex_state_set(parser, state);
8244 if (state == PM_LEX_STATE_BEG) {
8245 parser->command_start = true;
8246 }
8247
8248 if ((modifier_type != PM_TOKEN_EOF) && !(last_state & (PM_LEX_STATE_BEG | PM_LEX_STATE_LABELED | PM_LEX_STATE_CLASS))) {
8249 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
8250 return modifier_type;
8251 }
8252 }
8253
8254 return type;
8255 }
8256
8257 return PM_TOKEN_EOF;
8258}
8259
8260static pm_token_type_t
8261lex_identifier(pm_parser_t *parser, bool previous_command_start) {
8262 // Lex as far as we can into the current identifier.
8263 size_t width;
8264 const uint8_t *end = parser->end;
8265 const uint8_t *current_start = parser->current.start;
8266 const uint8_t *current_end = parser->current.end;
8267 bool encoding_changed = parser->encoding_changed;
8268
8269 if (encoding_changed) {
8270 while ((width = char_is_identifier(parser, current_end, end - current_end)) > 0) {
8271 current_end += width;
8272 }
8273 } else {
8274 while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
8275 current_end += width;
8276 }
8277 }
8278 parser->current.end = current_end;
8279
8280 // Now cache the length of the identifier so that we can quickly compare it
8281 // against known keywords.
8282 width = (size_t) (current_end - current_start);
8283
8284 if (current_end < end) {
8285 if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
8286 // First we'll attempt to extend the identifier by a ! or ?. Then we'll
8287 // check if we're returning the defined? keyword or just an identifier.
8288 width++;
8289
8290 if (
8291 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
8292 (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
8293 ) {
8294 // If we're in a position where we can accept a : at the end of an
8295 // identifier, then we'll optionally accept it.
8296 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
8297 (void) match(parser, ':');
8298 return PM_TOKEN_LABEL;
8299 }
8300
8301 if (parser->lex_state != PM_LEX_STATE_DOT) {
8302 if (width == 8 && (lex_keyword(parser, current_start, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
8303 return PM_TOKEN_KEYWORD_DEFINED;
8304 }
8305 }
8306
8307 return PM_TOKEN_METHOD_NAME;
8308 }
8309
8310 if (lex_state_p(parser, PM_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
8311 // If we're in a position where we can accept a = at the end of an
8312 // identifier, then we'll optionally accept it.
8313 return PM_TOKEN_IDENTIFIER;
8314 }
8315
8316 if (
8317 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
8318 peek(parser) == ':' && peek_offset(parser, 1) != ':'
8319 ) {
8320 // If we're in a position where we can accept a : at the end of an
8321 // identifier, then we'll optionally accept it.
8322 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
8323 (void) match(parser, ':');
8324 return PM_TOKEN_LABEL;
8325 }
8326 }
8327
8328 if (parser->lex_state != PM_LEX_STATE_DOT) {
8329 pm_token_type_t type;
8330 switch (width) {
8331 case 2:
8332 if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
8333 if (pm_do_loop_stack_p(parser)) {
8334 return PM_TOKEN_KEYWORD_DO_LOOP;
8335 }
8336 return PM_TOKEN_KEYWORD_DO;
8337 }
8338
8339 if ((type = lex_keyword(parser, current_start, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
8340 if ((type = lex_keyword(parser, current_start, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8341 if ((type = lex_keyword(parser, current_start, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8342 break;
8343 case 3:
8344 if ((type = lex_keyword(parser, current_start, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8345 if ((type = lex_keyword(parser, current_start, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8346 if ((type = lex_keyword(parser, current_start, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8347 if ((type = lex_keyword(parser, current_start, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8348 if ((type = lex_keyword(parser, current_start, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8349 if ((type = lex_keyword(parser, current_start, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8350 if ((type = lex_keyword(parser, current_start, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8351 break;
8352 case 4:
8353 if ((type = lex_keyword(parser, current_start, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8354 if ((type = lex_keyword(parser, current_start, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8355 if ((type = lex_keyword(parser, current_start, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8356 if ((type = lex_keyword(parser, current_start, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8357 if ((type = lex_keyword(parser, current_start, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8358 if ((type = lex_keyword(parser, current_start, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8359 if ((type = lex_keyword(parser, current_start, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8360 if ((type = lex_keyword(parser, current_start, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8361 break;
8362 case 5:
8363 if ((type = lex_keyword(parser, current_start, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8364 if ((type = lex_keyword(parser, current_start, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8365 if ((type = lex_keyword(parser, current_start, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8366 if ((type = lex_keyword(parser, current_start, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8367 if ((type = lex_keyword(parser, current_start, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8368 if ((type = lex_keyword(parser, current_start, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8369 if ((type = lex_keyword(parser, current_start, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8370 if ((type = lex_keyword(parser, current_start, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8371 if ((type = lex_keyword(parser, current_start, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8372 if ((type = lex_keyword(parser, current_start, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8373 if ((type = lex_keyword(parser, current_start, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
8374 if ((type = lex_keyword(parser, current_start, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
8375 if ((type = lex_keyword(parser, current_start, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8376 break;
8377 case 6:
8378 if ((type = lex_keyword(parser, current_start, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8379 if ((type = lex_keyword(parser, current_start, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8380 if ((type = lex_keyword(parser, current_start, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
8381 if ((type = lex_keyword(parser, current_start, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8382 if ((type = lex_keyword(parser, current_start, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
8383 break;
8384 case 8:
8385 if ((type = lex_keyword(parser, current_start, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8386 if ((type = lex_keyword(parser, current_start, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8387 break;
8388 case 12:
8389 if ((type = lex_keyword(parser, current_start, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8390 break;
8391 }
8392 }
8393
8394 if (encoding_changed) {
8395 return parser->encoding->isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
8396 }
8397 return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
8398}
8399
8404static bool
8405current_token_starts_line(pm_parser_t *parser) {
8406 return (parser->current.start == parser->start) || (parser->current.start[-1] == '\n');
8407}
8408
8423static pm_token_type_t
8424lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
8425 // If there is no content following this #, then we're at the end of
8426 // the string and we can safely return string content.
8427 if (pound + 1 >= parser->end) {
8428 parser->current.end = pound + 1;
8429 return PM_TOKEN_STRING_CONTENT;
8430 }
8431
8432 // Now we'll check against the character that follows the #. If it
8433 // constitutes valid interplation, we'll handle that, otherwise we'll return
8434 // 0.
8435 switch (pound[1]) {
8436 case '@': {
8437 // In this case we may have hit an embedded instance or class variable.
8438 if (pound + 2 >= parser->end) {
8439 parser->current.end = pound + 1;
8440 return PM_TOKEN_STRING_CONTENT;
8441 }
8442
8443 // If we're looking at a @ and there's another @, then we'll skip past the
8444 // second @.
8445 const uint8_t *variable = pound + 2;
8446 if (*variable == '@' && pound + 3 < parser->end) variable++;
8447
8448 if (char_is_identifier_start(parser, variable, parser->end - variable)) {
8449 // At this point we're sure that we've either hit an embedded instance
8450 // or class variable. In this case we'll first need to check if we've
8451 // already consumed content.
8452 if (pound > parser->current.start) {
8453 parser->current.end = pound;
8454 return PM_TOKEN_STRING_CONTENT;
8455 }
8456
8457 // Otherwise we need to return the embedded variable token
8458 // and then switch to the embedded variable lex mode.
8459 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
8460 parser->current.end = pound + 1;
8461 return PM_TOKEN_EMBVAR;
8462 }
8463
8464 // If we didn't get a valid interpolation, then this is just regular
8465 // string content. This is like if we get "#@-". In this case the caller
8466 // should keep lexing.
8467 parser->current.end = pound + 1;
8468 return 0;
8469 }
8470 case '$':
8471 // In this case we may have hit an embedded global variable. If there's
8472 // not enough room, then we'll just return string content.
8473 if (pound + 2 >= parser->end) {
8474 parser->current.end = pound + 1;
8475 return PM_TOKEN_STRING_CONTENT;
8476 }
8477
8478 // This is the character that we're going to check to see if it is the
8479 // start of an identifier that would indicate that this is a global
8480 // variable.
8481 const uint8_t *check = pound + 2;
8482
8483 if (pound[2] == '-') {
8484 if (pound + 3 >= parser->end) {
8485 parser->current.end = pound + 2;
8486 return PM_TOKEN_STRING_CONTENT;
8487 }
8488
8489 check++;
8490 }
8491
8492 // If the character that we're going to check is the start of an
8493 // identifier, or we don't have a - and the character is a decimal number
8494 // or a global name punctuation character, then we've hit an embedded
8495 // global variable.
8496 if (
8497 char_is_identifier_start(parser, check, parser->end - check) ||
8498 (pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
8499 ) {
8500 // In this case we've hit an embedded global variable. First check to
8501 // see if we've already consumed content. If we have, then we need to
8502 // return that content as string content first.
8503 if (pound > parser->current.start) {
8504 parser->current.end = pound;
8505 return PM_TOKEN_STRING_CONTENT;
8506 }
8507
8508 // Otherwise, we need to return the embedded variable token and switch
8509 // to the embedded variable lex mode.
8510 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
8511 parser->current.end = pound + 1;
8512 return PM_TOKEN_EMBVAR;
8513 }
8514
8515 // In this case we've hit a #$ that does not indicate a global variable.
8516 // In this case we'll continue lexing past it.
8517 parser->current.end = pound + 1;
8518 return 0;
8519 case '{':
8520 // In this case it's the start of an embedded expression. If we have
8521 // already consumed content, then we need to return that content as string
8522 // content first.
8523 if (pound > parser->current.start) {
8524 parser->current.end = pound;
8525 return PM_TOKEN_STRING_CONTENT;
8526 }
8527
8528 parser->enclosure_nesting++;
8529
8530 // Otherwise we'll skip past the #{ and begin lexing the embedded
8531 // expression.
8532 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBEXPR });
8533 parser->current.end = pound + 2;
8534 parser->command_start = true;
8535 pm_do_loop_stack_push(parser, false);
8536 return PM_TOKEN_EMBEXPR_BEGIN;
8537 default:
8538 // In this case we've hit a # that doesn't constitute interpolation. We'll
8539 // mark that by returning the not provided token type. This tells the
8540 // consumer to keep lexing forward.
8541 parser->current.end = pound + 1;
8542 return 0;
8543 }
8544}
8545
8546static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
8547static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
8548static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
8549static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
8550static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
8551
8555static const bool ascii_printable_chars[] = {
8556 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
8557 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8558 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8559 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8560 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8561 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
8562 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8563 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
8564};
8565
8566static inline bool
8567char_is_ascii_printable(const uint8_t b) {
8568 return (b < 0x80) && ascii_printable_chars[b];
8569}
8570
8575static inline uint8_t
8576escape_hexadecimal_digit(const uint8_t value) {
8577 return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
8578}
8579
8585static inline uint32_t
8586escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length, const pm_location_t *error_location) {
8587 uint32_t value = 0;
8588 for (size_t index = 0; index < length; index++) {
8589 if (index != 0) value <<= 4;
8590 value |= escape_hexadecimal_digit(string[index]);
8591 }
8592
8593 // Here we're going to verify that the value is actually a valid Unicode
8594 // codepoint and not a surrogate pair.
8595 if (value >= 0xD800 && value <= 0xDFFF) {
8596 if (error_location != NULL) {
8597 pm_parser_err(parser, error_location->start, error_location->length, PM_ERR_ESCAPE_INVALID_UNICODE);
8598 } else {
8599 pm_parser_err(parser, U32(string - parser->start), U32(length), PM_ERR_ESCAPE_INVALID_UNICODE);
8600 }
8601 return 0xFFFD;
8602 }
8603
8604 return value;
8605}
8606
8610static inline uint8_t
8611escape_byte(uint8_t value, const uint8_t flags) {
8612 if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
8613 if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
8614 return value;
8615}
8616
8620static inline void
8621escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
8622 // \u escape sequences in string-like structures implicitly change the
8623 // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
8624 // literal.
8625 if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
8626 if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
8627 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(end - start), PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
8628 }
8629
8631 }
8632
8633 if (!pm_buffer_append_unicode_codepoint(buffer, value)) {
8634 pm_parser_err(parser, U32(start - parser->start), U32(end - start), PM_ERR_ESCAPE_INVALID_UNICODE);
8635 pm_buffer_append_byte(buffer, 0xEF);
8636 pm_buffer_append_byte(buffer, 0xBF);
8637 pm_buffer_append_byte(buffer, 0xBD);
8638 }
8639}
8640
8645static inline void
8646escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) {
8647 if (byte >= 0x80) {
8648 if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
8649 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
8650 }
8651
8652 parser->explicit_encoding = parser->encoding;
8653 }
8654
8655 pm_buffer_append_byte(buffer, byte);
8656}
8657
8673static inline void
8674escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
8675 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8676 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
8677 }
8678
8679 escape_write_byte_encoded(parser, buffer, byte);
8680}
8681
8685static inline void
8686escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
8687 size_t width;
8688 if (parser->encoding_changed) {
8689 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8690 } else {
8691 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
8692 }
8693
8694 if (width == 1) {
8695 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
8696 } else if (width > 1) {
8697 // Valid multibyte character. Just ignore escape.
8698 pm_buffer_t *b = (flags & PM_ESCAPE_FLAG_REGEXP) ? regular_expression_buffer : buffer;
8699 pm_buffer_append_bytes(b, parser->current.end, width);
8700 parser->current.end += width;
8701 } else {
8702 // Assume the next character wasn't meant to be part of this escape
8703 // sequence since it is invalid. Add an error and move on.
8704 parser->current.end++;
8705 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
8706 }
8707}
8708
8714static void
8715escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *type) {
8716#define FLAG(value) ((value & PM_ESCAPE_FLAG_CONTROL) ? "\\C-" : (value & PM_ESCAPE_FLAG_META) ? "\\M-" : "")
8717
8718 PM_PARSER_WARN_TOKEN_FORMAT(
8719 parser,
8720 &parser->current,
8721 PM_WARN_INVALID_CHARACTER,
8722 FLAG(flags),
8723 FLAG(flag),
8724 type
8725 );
8726
8727#undef FLAG
8728}
8729
8733static void
8734escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
8735 uint8_t peeked = peek(parser);
8736 switch (peeked) {
8737 case '\\': {
8738 parser->current.end++;
8739 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
8740 return;
8741 }
8742 case '\'': {
8743 parser->current.end++;
8744 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
8745 return;
8746 }
8747 case 'a': {
8748 parser->current.end++;
8749 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
8750 return;
8751 }
8752 case 'b': {
8753 parser->current.end++;
8754 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
8755 return;
8756 }
8757 case 'e': {
8758 parser->current.end++;
8759 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
8760 return;
8761 }
8762 case 'f': {
8763 parser->current.end++;
8764 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
8765 return;
8766 }
8767 case 'n': {
8768 parser->current.end++;
8769 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
8770 return;
8771 }
8772 case 'r': {
8773 parser->current.end++;
8774 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
8775 return;
8776 }
8777 case 's': {
8778 parser->current.end++;
8779 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
8780 return;
8781 }
8782 case 't': {
8783 parser->current.end++;
8784 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
8785 return;
8786 }
8787 case 'v': {
8788 parser->current.end++;
8789 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
8790 return;
8791 }
8792 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
8793 uint8_t value = (uint8_t) (*parser->current.end - '0');
8794 parser->current.end++;
8795
8796 if (pm_char_is_octal_digit(peek(parser))) {
8797 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
8798 parser->current.end++;
8799
8800 if (pm_char_is_octal_digit(peek(parser))) {
8801 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
8802 parser->current.end++;
8803 }
8804 }
8805
8806 value = escape_byte(value, flags);
8807 escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
8808 return;
8809 }
8810 case 'x': {
8811 const uint8_t *start = parser->current.end - 1;
8812
8813 parser->current.end++;
8814 uint8_t byte = peek(parser);
8815
8816 if (pm_char_is_hexadecimal_digit(byte)) {
8817 uint8_t value = escape_hexadecimal_digit(byte);
8818 parser->current.end++;
8819
8820 byte = peek(parser);
8821 if (pm_char_is_hexadecimal_digit(byte)) {
8822 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
8823 parser->current.end++;
8824 }
8825
8826 value = escape_byte(value, flags);
8827 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8828 if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
8829 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
8830 } else {
8831 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8832 }
8833 }
8834
8835 escape_write_byte_encoded(parser, buffer, value);
8836 } else {
8837 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
8838 }
8839
8840 return;
8841 }
8842 case 'u': {
8843 const uint8_t *start = parser->current.end - 1;
8844 parser->current.end++;
8845
8846 if (parser->current.end == parser->end) {
8847 const uint8_t *start = parser->current.end - 2;
8848 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
8849 } else if (peek(parser) == '{') {
8850 const uint8_t *unicode_codepoints_start = parser->current.end - 2;
8851 parser->current.end++;
8852
8853 size_t whitespace;
8854 while (true) {
8855 if ((whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
8856 parser->current.end += whitespace;
8857 } else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
8858 // This is super hacky, but it gets us nicer error
8859 // messages because we can still pass it off to the
8860 // regular expression engine even if we hit an
8861 // unterminated regular expression.
8862 parser->current.end += 2;
8863 } else {
8864 break;
8865 }
8866 }
8867
8868 const uint8_t *extra_codepoints_start = NULL;
8869 int codepoints_count = 0;
8870
8871 while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
8872 const uint8_t *unicode_start = parser->current.end;
8873 size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
8874
8875 if (hexadecimal_length > 6) {
8876 // \u{nnnn} character literal allows only 1-6 hexadecimal digits
8877 pm_parser_err(parser, U32(unicode_start - parser->start), U32(hexadecimal_length), PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
8878 } else if (hexadecimal_length == 0) {
8879 // there are not hexadecimal characters
8880
8881 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8882 // If this is a regular expression, we are going to
8883 // let the regular expression engine handle this
8884 // error instead of us.
8885 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8886 } else {
8887 pm_parser_err(parser, PM_TOKEN_END(parser, &parser->current), 0, PM_ERR_ESCAPE_INVALID_UNICODE);
8888 pm_parser_err(parser, PM_TOKEN_END(parser, &parser->current), 0, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
8889 }
8890
8891 return;
8892 }
8893
8894 parser->current.end += hexadecimal_length;
8895 codepoints_count++;
8896 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
8897 extra_codepoints_start = unicode_start;
8898 }
8899
8900 uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length, NULL);
8901 escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
8902
8903 parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
8904 }
8905
8906 // ?\u{nnnn} character literal should contain only one codepoint
8907 // and cannot be like ?\u{nnnn mmmm}.
8908 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
8909 pm_parser_err(parser, U32(extra_codepoints_start - parser->start), U32(parser->current.end - 1 - extra_codepoints_start), PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
8910 }
8911
8912 if (parser->current.end == parser->end) {
8913 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
8914 } else if (peek(parser) == '}') {
8915 parser->current.end++;
8916 } else {
8917 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8918 // If this is a regular expression, we are going to let
8919 // the regular expression engine handle this error
8920 // instead of us.
8921 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8922 } else {
8923 pm_parser_err(parser, U32(unicode_codepoints_start - parser->start), U32(parser->current.end - unicode_codepoints_start), PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
8924 }
8925 }
8926
8927 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8928 pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
8929 }
8930 } else {
8931 size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
8932
8933 if (length == 0) {
8934 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8935 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8936 } else {
8937 const uint8_t *start = parser->current.end - 2;
8938 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
8939 }
8940 } else if (length == 4) {
8941 uint32_t value = escape_unicode(parser, parser->current.end, 4, NULL);
8942
8943 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8944 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
8945 }
8946
8947 escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
8948 parser->current.end += 4;
8949 } else {
8950 parser->current.end += length;
8951
8952 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8953 // If this is a regular expression, we are going to let
8954 // the regular expression engine handle this error
8955 // instead of us.
8956 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8957 } else {
8958 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
8959 }
8960 }
8961 }
8962
8963 return;
8964 }
8965 case 'c': {
8966 parser->current.end++;
8967 if (flags & PM_ESCAPE_FLAG_CONTROL) {
8968 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
8969 }
8970
8971 if (parser->current.end == parser->end) {
8972 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
8973 return;
8974 }
8975
8976 uint8_t peeked = peek(parser);
8977 switch (peeked) {
8978 case '?': {
8979 parser->current.end++;
8980 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
8981 return;
8982 }
8983 case '\\':
8984 parser->current.end++;
8985
8986 if (match(parser, 'u') || match(parser, 'U')) {
8987 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
8988 return;
8989 }
8990
8991 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
8992 return;
8993 case ' ':
8994 parser->current.end++;
8995 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
8996 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
8997 return;
8998 case '\t':
8999 parser->current.end++;
9000 escape_read_warn(parser, flags, 0, "\\t");
9001 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9002 return;
9003 default: {
9004 if (!char_is_ascii_printable(peeked)) {
9005 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9006 return;
9007 }
9008
9009 parser->current.end++;
9010 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9011 return;
9012 }
9013 }
9014 }
9015 case 'C': {
9016 parser->current.end++;
9017 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9018 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9019 }
9020
9021 if (peek(parser) != '-') {
9022 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9023 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_CONTROL);
9024 return;
9025 }
9026
9027 parser->current.end++;
9028 if (parser->current.end == parser->end) {
9029 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9030 return;
9031 }
9032
9033 uint8_t peeked = peek(parser);
9034 switch (peeked) {
9035 case '?': {
9036 parser->current.end++;
9037 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9038 return;
9039 }
9040 case '\\':
9041 parser->current.end++;
9042
9043 if (match(parser, 'u') || match(parser, 'U')) {
9044 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
9045 return;
9046 }
9047
9048 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9049 return;
9050 case ' ':
9051 parser->current.end++;
9052 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9053 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9054 return;
9055 case '\t':
9056 parser->current.end++;
9057 escape_read_warn(parser, flags, 0, "\\t");
9058 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9059 return;
9060 default: {
9061 if (!char_is_ascii_printable(peeked)) {
9062 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9063 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_CONTROL);
9064 return;
9065 }
9066
9067 parser->current.end++;
9068 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9069 return;
9070 }
9071 }
9072 }
9073 case 'M': {
9074 parser->current.end++;
9075 if (flags & PM_ESCAPE_FLAG_META) {
9076 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
9077 }
9078
9079 if (peek(parser) != '-') {
9080 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9081 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
9082 return;
9083 }
9084
9085 parser->current.end++;
9086 if (parser->current.end == parser->end) {
9087 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
9088 return;
9089 }
9090
9091 uint8_t peeked = peek(parser);
9092 switch (peeked) {
9093 case '\\':
9094 parser->current.end++;
9095
9096 if (match(parser, 'u') || match(parser, 'U')) {
9097 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
9098 return;
9099 }
9100
9101 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
9102 return;
9103 case ' ':
9104 parser->current.end++;
9105 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_META, "\\s");
9106 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9107 return;
9108 case '\t':
9109 parser->current.end++;
9110 escape_read_warn(parser, flags & ((uint8_t) ~PM_ESCAPE_FLAG_CONTROL), PM_ESCAPE_FLAG_META, "\\t");
9111 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9112 return;
9113 default:
9114 if (!char_is_ascii_printable(peeked)) {
9115 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9116 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
9117 return;
9118 }
9119
9120 parser->current.end++;
9121 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9122 return;
9123 }
9124 }
9125 case '\r': {
9126 if (peek_offset(parser, 1) == '\n') {
9127 parser->current.end += 2;
9128 escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
9129 return;
9130 }
9132 }
9133 default: {
9134 if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
9135 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9136 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
9137 return;
9138 }
9139 if (parser->current.end < parser->end) {
9140 escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags);
9141 } else {
9142 pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
9143 }
9144 return;
9145 }
9146 }
9147}
9148
9174static pm_token_type_t
9175lex_question_mark(pm_parser_t *parser) {
9176 if (lex_state_end_p(parser)) {
9177 lex_state_set(parser, PM_LEX_STATE_BEG);
9178 return PM_TOKEN_QUESTION_MARK;
9179 }
9180
9181 if (parser->current.end >= parser->end) {
9182 pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
9183 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
9184 return PM_TOKEN_CHARACTER_LITERAL;
9185 }
9186
9187 if (pm_char_is_whitespace(*parser->current.end)) {
9188 lex_state_set(parser, PM_LEX_STATE_BEG);
9189 return PM_TOKEN_QUESTION_MARK;
9190 }
9191
9192 lex_state_set(parser, PM_LEX_STATE_BEG);
9193
9194 if (match(parser, '\\')) {
9195 lex_state_set(parser, PM_LEX_STATE_END);
9196
9197 pm_buffer_t buffer;
9198 pm_buffer_init_capacity(&buffer, 3);
9199
9200 escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
9201
9202 // Copy buffer data into the arena and free the heap buffer.
9203 void *arena_data = pm_arena_memdup(parser->arena, buffer.value, buffer.length, PRISM_ALIGNOF(uint8_t));
9204 pm_string_constant_init(&parser->current_string, (const char *) arena_data, buffer.length);
9205 pm_buffer_free(&buffer);
9206
9207 return PM_TOKEN_CHARACTER_LITERAL;
9208 } else {
9209 size_t encoding_width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9210
9211 // Ternary operators can have a ? immediately followed by an identifier
9212 // which starts with an underscore. We check for this case here.
9213 if (
9214 !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
9215 (
9216 (parser->current.end + encoding_width >= parser->end) ||
9217 !char_is_identifier(parser, parser->current.end + encoding_width, parser->end - (parser->current.end + encoding_width))
9218 )
9219 ) {
9220 lex_state_set(parser, PM_LEX_STATE_END);
9221 parser->current.end += encoding_width;
9222 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
9223 return PM_TOKEN_CHARACTER_LITERAL;
9224 }
9225 }
9226
9227 return PM_TOKEN_QUESTION_MARK;
9228}
9229
9234static pm_token_type_t
9235lex_at_variable(pm_parser_t *parser) {
9236 pm_token_type_t type = match(parser, '@') ? PM_TOKEN_CLASS_VARIABLE : PM_TOKEN_INSTANCE_VARIABLE;
9237 const uint8_t *end = parser->end;
9238
9239 size_t width;
9240 if ((width = char_is_identifier_start(parser, parser->current.end, end - parser->current.end)) > 0) {
9241 parser->current.end += width;
9242
9243 while ((width = char_is_identifier(parser, parser->current.end, end - parser->current.end)) > 0) {
9244 parser->current.end += width;
9245 }
9246 } else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) {
9247 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
9248 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) {
9249 diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
9250 }
9251
9252 size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end);
9253 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
9254 } else {
9255 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
9256 pm_parser_err_token(parser, &parser->current, diag_id);
9257 }
9258
9259 // If we're lexing an embedded variable, then we need to pop back into the
9260 // parent lex context.
9261 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
9262 lex_mode_pop(parser);
9263 }
9264
9265 return type;
9266}
9267
9271static inline void
9272parser_lex_callback(pm_parser_t *parser) {
9273 if (parser->lex_callback) {
9274 parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current);
9275 }
9276}
9277
9281static inline pm_comment_t *
9282parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
9283 pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t));
9284 if (comment == NULL) return NULL;
9285
9286 *comment = (pm_comment_t) {
9287 .type = type,
9288 .location = TOK2LOC(parser, &parser->current)
9289 };
9290
9291 return comment;
9292}
9293
9299static pm_token_type_t
9300lex_embdoc(pm_parser_t *parser) {
9301 // First, lex out the EMBDOC_BEGIN token.
9302 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9303
9304 if (newline == NULL) {
9305 parser->current.end = parser->end;
9306 } else {
9307 pm_line_offset_list_append(&parser->line_offsets, U32(newline - parser->start + 1));
9308 parser->current.end = newline + 1;
9309 }
9310
9311 parser->current.type = PM_TOKEN_EMBDOC_BEGIN;
9312 parser_lex_callback(parser);
9313
9314 // Now, create a comment that is going to be attached to the parser.
9315 const uint8_t *comment_start = parser->current.start;
9316 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
9317 if (comment == NULL) return PM_TOKEN_EOF;
9318
9319 // Now, loop until we find the end of the embedded documentation or the end
9320 // of the file.
9321 while (parser->current.end + 4 <= parser->end) {
9322 parser->current.start = parser->current.end;
9323
9324 // If we've hit the end of the embedded documentation then we'll return
9325 // that token here.
9326 if (
9327 (memcmp(parser->current.end, "=end", 4) == 0) &&
9328 (
9329 (parser->current.end + 4 == parser->end) || // end of file
9330 pm_char_is_whitespace(parser->current.end[4]) || // whitespace
9331 (parser->current.end[4] == '\0') || // NUL or end of script
9332 (parser->current.end[4] == '\004') || // ^D
9333 (parser->current.end[4] == '\032') // ^Z
9334 )
9335 ) {
9336 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9337
9338 if (newline == NULL) {
9339 parser->current.end = parser->end;
9340 } else {
9341 pm_line_offset_list_append(&parser->line_offsets, U32(newline - parser->start + 1));
9342 parser->current.end = newline + 1;
9343 }
9344
9345 parser->current.type = PM_TOKEN_EMBDOC_END;
9346 parser_lex_callback(parser);
9347
9348 comment->location.length = (uint32_t) (parser->current.end - comment_start);
9349 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9350
9351 return PM_TOKEN_EMBDOC_END;
9352 }
9353
9354 // Otherwise, we'll parse until the end of the line and return a line of
9355 // embedded documentation.
9356 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9357
9358 if (newline == NULL) {
9359 parser->current.end = parser->end;
9360 } else {
9361 pm_line_offset_list_append(&parser->line_offsets, U32(newline - parser->start + 1));
9362 parser->current.end = newline + 1;
9363 }
9364
9365 parser->current.type = PM_TOKEN_EMBDOC_LINE;
9366 parser_lex_callback(parser);
9367 }
9368
9369 pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
9370
9371 comment->location.length = (uint32_t) (parser->current.end - comment_start);
9372 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9373
9374 return PM_TOKEN_EOF;
9375}
9376
9382static inline void
9383parser_lex_ignored_newline(pm_parser_t *parser) {
9384 parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
9385 parser_lex_callback(parser);
9386}
9387
9397static inline void
9398parser_flush_heredoc_end(pm_parser_t *parser) {
9399 assert(parser->heredoc_end <= parser->end);
9400 parser->next_start = parser->heredoc_end;
9401 parser->heredoc_end = NULL;
9402}
9403
9407static bool
9408parser_end_of_line_p(const pm_parser_t *parser) {
9409 const uint8_t *cursor = parser->current.end;
9410
9411 while (cursor < parser->end && *cursor != '\n' && *cursor != '#') {
9412 if (!pm_char_is_inline_whitespace(*cursor++)) return false;
9413 }
9414
9415 return true;
9416}
9417
9436typedef struct {
9442
9447 const uint8_t *cursor;
9449
9469
9473static inline void
9474pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
9475 pm_buffer_append_byte(&token_buffer->buffer, byte);
9476}
9477
9478static inline void
9479pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
9480 pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
9481}
9482
9486static inline size_t
9487parser_char_width(const pm_parser_t *parser) {
9488 size_t width;
9489 if (parser->encoding_changed) {
9490 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9491 } else {
9492 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
9493 }
9494
9495 // TODO: If the character is invalid in the given encoding, then we'll just
9496 // push one byte into the buffer. This should actually be an error.
9497 return (width == 0 ? 1 : width);
9498}
9499
9503static void
9504pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
9505 size_t width = parser_char_width(parser);
9506 pm_buffer_append_bytes(&token_buffer->buffer, parser->current.end, width);
9507 parser->current.end += width;
9508}
9509
9510static void
9511pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
9512 size_t width = parser_char_width(parser);
9513 pm_buffer_append_bytes(&token_buffer->base.buffer, parser->current.end, width);
9514 pm_buffer_append_bytes(&token_buffer->regexp_buffer, parser->current.end, width);
9515 parser->current.end += width;
9516}
9517
9518static bool
9519pm_slice_ascii_only_p(const uint8_t *value, size_t length) {
9520 for (size_t index = 0; index < length; index++) {
9521 if (value[index] & 0x80) return false;
9522 }
9523
9524 return true;
9525}
9526
9533static inline void
9534pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9535 // Copy buffer data into the arena and free the heap buffer.
9536 size_t len = pm_buffer_length(&token_buffer->buffer);
9537 void *arena_data = pm_arena_memdup(parser->arena, pm_buffer_value(&token_buffer->buffer), len, PRISM_ALIGNOF(uint8_t));
9538 pm_string_constant_init(&parser->current_string, (const char *) arena_data, len);
9539 pm_buffer_free(&token_buffer->buffer);
9540}
9541
9542static inline void
9543pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9544 pm_token_buffer_copy(parser, &token_buffer->base);
9545 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p((const uint8_t *) pm_buffer_value(&token_buffer->regexp_buffer), pm_buffer_length(&token_buffer->regexp_buffer));
9546 pm_buffer_free(&token_buffer->regexp_buffer);
9547}
9548
9558static void
9559pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9560 if (token_buffer->cursor == NULL) {
9561 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
9562 } else {
9563 pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
9564 pm_token_buffer_copy(parser, token_buffer);
9565 }
9566}
9567
9568static void
9569pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9570 if (token_buffer->base.cursor == NULL) {
9571 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
9572 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p(parser->current.start, (size_t) (parser->current.end - parser->current.start));
9573 } else {
9574 pm_buffer_append_bytes(&token_buffer->base.buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
9575 pm_buffer_append_bytes(&token_buffer->regexp_buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
9576 pm_regexp_token_buffer_copy(parser, token_buffer);
9577 }
9578}
9579
9580#define PM_TOKEN_BUFFER_DEFAULT_SIZE 16
9581
9590static void
9591pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9592 const uint8_t *start;
9593 if (token_buffer->cursor == NULL) {
9594 pm_buffer_init_capacity(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9595 start = parser->current.start;
9596 } else {
9597 start = token_buffer->cursor;
9598 }
9599
9600 const uint8_t *end = parser->current.end - 1;
9601 assert(end >= start);
9602 pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
9603
9604 token_buffer->cursor = end;
9605}
9606
9607static void
9608pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9609 const uint8_t *start;
9610 if (token_buffer->base.cursor == NULL) {
9611 pm_buffer_init_capacity(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9612 pm_buffer_init_capacity(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9613 start = parser->current.start;
9614 } else {
9615 start = token_buffer->base.cursor;
9616 }
9617
9618 const uint8_t *end = parser->current.end - 1;
9619 pm_buffer_append_bytes(&token_buffer->base.buffer, start, (size_t) (end - start));
9620 pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, (size_t) (end - start));
9621
9622 token_buffer->base.cursor = end;
9623}
9624
9625#undef PM_TOKEN_BUFFER_DEFAULT_SIZE
9626
9631static inline size_t
9632pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
9633 size_t whitespace = 0;
9634
9635 switch (indent) {
9636 case PM_HEREDOC_INDENT_NONE:
9637 // Do nothing, we can't match a terminator with
9638 // indentation and there's no need to calculate common
9639 // whitespace.
9640 break;
9641 case PM_HEREDOC_INDENT_DASH:
9642 // Skip past inline whitespace.
9643 *cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
9644 break;
9645 case PM_HEREDOC_INDENT_TILDE:
9646 // Skip past inline whitespace and calculate common
9647 // whitespace.
9648 while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
9649 if (**cursor == '\t') {
9650 whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
9651 } else {
9652 whitespace++;
9653 }
9654 (*cursor)++;
9655 }
9656
9657 break;
9658 }
9659
9660 return whitespace;
9661}
9662
9667static uint8_t
9668pm_lex_percent_delimiter(pm_parser_t *parser) {
9669 size_t eol_length = match_eol(parser);
9670
9671 if (eol_length) {
9672 if (parser->heredoc_end) {
9673 // If we have already lexed a heredoc, then the newline has already
9674 // been added to the list. In this case we want to just flush the
9675 // heredoc end.
9676 parser_flush_heredoc_end(parser);
9677 } else {
9678 // Otherwise, we'll add the newline to the list of newlines.
9679 pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + U32(eol_length));
9680 }
9681
9682 uint8_t delimiter = *parser->current.end;
9683
9684 // If our delimiter is \r\n, we want to treat it as if it's \n.
9685 // For example, %\r\nfoo\r\n should be "foo"
9686 if (eol_length == 2) {
9687 delimiter = *(parser->current.end + 1);
9688 }
9689
9690 parser->current.end += eol_length;
9691 return delimiter;
9692 }
9693
9694 return *parser->current.end++;
9695}
9696
9701#define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
9702
9709static void
9710parser_lex(pm_parser_t *parser) {
9711 assert(parser->current.end <= parser->end);
9712 parser->previous = parser->current;
9713
9714 // This value mirrors cmd_state from CRuby.
9715 bool previous_command_start = parser->command_start;
9716 parser->command_start = false;
9717
9718 // This is used to communicate to the newline lexing function that we've
9719 // already seen a comment.
9720 bool lexed_comment = false;
9721
9722 // Here we cache the current value of the semantic token seen flag. This is
9723 // used to reset it in case we find a token that shouldn't flip this flag.
9724 unsigned int semantic_token_seen = parser->semantic_token_seen;
9725 parser->semantic_token_seen = true;
9726
9727 // We'll jump to this label when we are about to encounter an EOF.
9728 // If we still have lex_modes on the stack, we pop them so that cleanup
9729 // can happen. For example, we should still continue parsing after a heredoc
9730 // identifier, even if the heredoc body was syntax invalid.
9731 switch_lex_modes:
9732
9733 switch (parser->lex_modes.current->mode) {
9734 case PM_LEX_DEFAULT:
9735 case PM_LEX_EMBEXPR:
9736 case PM_LEX_EMBVAR:
9737
9738 // We have a specific named label here because we are going to jump back to
9739 // this location in the event that we have lexed a token that should not be
9740 // returned to the parser. This includes comments, ignored newlines, and
9741 // invalid tokens of some form.
9742 lex_next_token: {
9743 // If we have the special next_start pointer set, then we're going to jump
9744 // to that location and start lexing from there.
9745 if (parser->next_start != NULL) {
9746 parser->current.end = parser->next_start;
9747 parser->next_start = NULL;
9748 }
9749
9750 // This value mirrors space_seen from CRuby. It tracks whether or not
9751 // space has been eaten before the start of the next token.
9752 bool space_seen = false;
9753
9754 // First, we're going to skip past any whitespace at the front of the next
9755 // token.
9756 bool chomping = true;
9757 while (parser->current.end < parser->end && chomping) {
9758 switch (*parser->current.end) {
9759 case ' ':
9760 case '\t':
9761 case '\f':
9762 case '\v':
9763 parser->current.end++;
9764 space_seen = true;
9765 break;
9766 case '\r':
9767 if (match_eol_offset(parser, 1)) {
9768 chomping = false;
9769 } else {
9770 pm_parser_warn(parser, PM_TOKEN_END(parser, &parser->current), 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
9771 parser->current.end++;
9772 space_seen = true;
9773 }
9774 break;
9775 case '\\': {
9776 size_t eol_length = match_eol_offset(parser, 1);
9777 if (eol_length) {
9778 if (parser->heredoc_end) {
9779 parser->current.end = parser->heredoc_end;
9780 parser->heredoc_end = NULL;
9781 } else {
9782 parser->current.end += eol_length + 1;
9783 pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
9784 space_seen = true;
9785 }
9786 } else if (pm_char_is_inline_whitespace(*parser->current.end)) {
9787 parser->current.end += 2;
9788 } else {
9789 chomping = false;
9790 }
9791
9792 break;
9793 }
9794 default:
9795 chomping = false;
9796 break;
9797 }
9798 }
9799
9800 // Next, we'll set to start of this token to be the current end.
9801 parser->current.start = parser->current.end;
9802
9803 // We'll check if we're at the end of the file. If we are, then we
9804 // need to return the EOF token.
9805 if (parser->current.end >= parser->end) {
9806 // We may be missing closing tokens. We should pop modes one by one
9807 // to do the appropriate cleanup like moving next_start for heredocs.
9808 // Only when no mode is remaining will we actually emit the EOF token.
9809 if (parser->lex_modes.current->mode != PM_LEX_DEFAULT) {
9810 lex_mode_pop(parser);
9811 goto switch_lex_modes;
9812 }
9813
9814 // If we hit EOF, but the EOF came immediately after a newline,
9815 // set the start of the token to the newline. This way any EOF
9816 // errors will be reported as happening on that line rather than
9817 // a line after. For example "foo(\n" should report an error
9818 // on line 1 even though EOF technically occurs on line 2.
9819 if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
9820 parser->current.start -= 1;
9821 }
9822 LEX(PM_TOKEN_EOF);
9823 }
9824
9825 // Finally, we'll check the current character to determine the next
9826 // token.
9827 switch (*parser->current.end++) {
9828 case '\0': // NUL or end of script
9829 case '\004': // ^D
9830 case '\032': // ^Z
9831 parser->current.end--;
9832 LEX(PM_TOKEN_EOF);
9833
9834 case '#': { // comments
9835 const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
9836 parser->current.end = ending == NULL ? parser->end : ending;
9837
9838 // If we found a comment while lexing, then we're going to
9839 // add it to the list of comments in the file and keep
9840 // lexing.
9841 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_INLINE);
9842 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9843
9844 if (ending) parser->current.end++;
9845 parser->current.type = PM_TOKEN_COMMENT;
9846 parser_lex_callback(parser);
9847
9848 // Here, parse the comment to see if it's a magic comment
9849 // and potentially change state on the parser.
9850 if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
9851 ptrdiff_t length = parser->current.end - parser->current.start;
9852
9853 // If we didn't find a magic comment within the first
9854 // pass and we're at the start of the file, then we need
9855 // to do another pass to potentially find other patterns
9856 // for encoding comments.
9857 if (length >= 10 && !parser->encoding_locked) {
9858 parser_lex_magic_comment_encoding(parser);
9859 }
9860 }
9861
9862 lexed_comment = true;
9863 }
9865 case '\r':
9866 case '\n': {
9867 parser->semantic_token_seen = semantic_token_seen & 0x1;
9868 size_t eol_length = match_eol_at(parser, parser->current.end - 1);
9869
9870 if (eol_length) {
9871 // The only way you can have carriage returns in this
9872 // particular loop is if you have a carriage return
9873 // followed by a newline. In that case we'll just skip
9874 // over the carriage return and continue lexing, in
9875 // order to make it so that the newline token
9876 // encapsulates both the carriage return and the
9877 // newline. Note that we need to check that we haven't
9878 // already lexed a comment here because that falls
9879 // through into here as well.
9880 if (!lexed_comment) {
9881 parser->current.end += eol_length - 1; // skip CR
9882 }
9883
9884 if (parser->heredoc_end == NULL) {
9885 pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
9886 }
9887 }
9888
9889 if (parser->heredoc_end) {
9890 parser_flush_heredoc_end(parser);
9891 }
9892
9893 // If this is an ignored newline, then we can continue lexing after
9894 // calling the callback with the ignored newline token.
9895 switch (lex_state_ignored_p(parser)) {
9896 case PM_IGNORED_NEWLINE_NONE:
9897 break;
9898 case PM_IGNORED_NEWLINE_PATTERN:
9899 if (parser->pattern_matching_newlines || parser->in_keyword_arg) {
9900 if (!lexed_comment) parser_lex_ignored_newline(parser);
9901 lex_state_set(parser, PM_LEX_STATE_BEG);
9902 parser->command_start = true;
9903 parser->current.type = PM_TOKEN_NEWLINE;
9904 return;
9905 }
9907 case PM_IGNORED_NEWLINE_ALL:
9908 if (!lexed_comment) parser_lex_ignored_newline(parser);
9909 lexed_comment = false;
9910 goto lex_next_token;
9911 }
9912
9913 // Here we need to look ahead and see if there is a call operator
9914 // (either . or &.) that starts the next line. If there is, then this
9915 // is going to become an ignored newline and we're going to instead
9916 // return the call operator.
9917 const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
9918 next_content += pm_strspn_inline_whitespace(next_content, parser->end - next_content);
9919
9920 if (next_content < parser->end) {
9921 // If we hit a comment after a newline, then we're going to check
9922 // if it's ignored or if it's followed by a method call ('.').
9923 // If it is, then we're going to call the
9924 // callback with an ignored newline and then continue lexing.
9925 // Otherwise we'll return a regular newline.
9926 if (next_content[0] == '#') {
9927 // Here we look for a "." or "&." following a "\n".
9928 const uint8_t *following = next_newline(next_content, parser->end - next_content);
9929
9930 while (following && (following + 1 < parser->end)) {
9931 following++;
9932 following += pm_strspn_inline_whitespace(following, parser->end - following);
9933
9934 // If this is not followed by a comment, then we can break out
9935 // of this loop.
9936 if (peek_at(parser, following) != '#') break;
9937
9938 // If there is a comment, then we need to find the end of the
9939 // comment and continue searching from there.
9940 following = next_newline(following, parser->end - following);
9941 }
9942
9943 // If the lex state was ignored, we will lex the
9944 // ignored newline.
9945 if (lex_state_ignored_p(parser)) {
9946 if (!lexed_comment) parser_lex_ignored_newline(parser);
9947 lexed_comment = false;
9948 goto lex_next_token;
9949 }
9950
9951 // If we hit a '.' or a '&.' we will lex the ignored
9952 // newline.
9953 if (following && (
9954 (peek_at(parser, following) == '.') ||
9955 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
9956 )) {
9957 if (!lexed_comment) parser_lex_ignored_newline(parser);
9958 lexed_comment = false;
9959 goto lex_next_token;
9960 }
9961
9962
9963 // If we are parsing as CRuby 4.0 or later and we
9964 // hit a '&&' or a '||' then we will lex the ignored
9965 // newline.
9966 if (
9968 following && (
9969 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '&') ||
9970 (peek_at(parser, following) == '|' && peek_at(parser, following + 1) == '|') ||
9971 (
9972 peek_at(parser, following) == 'a' &&
9973 peek_at(parser, following + 1) == 'n' &&
9974 peek_at(parser, following + 2) == 'd' &&
9975 peek_at(parser, next_content + 3) != '!' &&
9976 peek_at(parser, next_content + 3) != '?' &&
9977 !char_is_identifier(parser, following + 3, parser->end - (following + 3))
9978 ) ||
9979 (
9980 peek_at(parser, following) == 'o' &&
9981 peek_at(parser, following + 1) == 'r' &&
9982 peek_at(parser, next_content + 2) != '!' &&
9983 peek_at(parser, next_content + 2) != '?' &&
9984 !char_is_identifier(parser, following + 2, parser->end - (following + 2))
9985 )
9986 )
9987 ) {
9988 if (!lexed_comment) parser_lex_ignored_newline(parser);
9989 lexed_comment = false;
9990 goto lex_next_token;
9991 }
9992 }
9993
9994 // If we hit a . after a newline, then we're in a call chain and
9995 // we need to return the call operator.
9996 if (next_content[0] == '.') {
9997 // To match ripper, we need to emit an ignored newline even though
9998 // it's a real newline in the case that we have a beginless range
9999 // on a subsequent line.
10000 if (peek_at(parser, next_content + 1) == '.') {
10001 if (!lexed_comment) parser_lex_ignored_newline(parser);
10002 lex_state_set(parser, PM_LEX_STATE_BEG);
10003 parser->command_start = true;
10004 parser->current.type = PM_TOKEN_NEWLINE;
10005 return;
10006 }
10007
10008 if (!lexed_comment) parser_lex_ignored_newline(parser);
10009 lex_state_set(parser, PM_LEX_STATE_DOT);
10010 parser->current.start = next_content;
10011 parser->current.end = next_content + 1;
10012 parser->next_start = NULL;
10013 LEX(PM_TOKEN_DOT);
10014 }
10015
10016 // If we hit a &. after a newline, then we're in a call chain and
10017 // we need to return the call operator.
10018 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
10019 if (!lexed_comment) parser_lex_ignored_newline(parser);
10020 lex_state_set(parser, PM_LEX_STATE_DOT);
10021 parser->current.start = next_content;
10022 parser->current.end = next_content + 2;
10023 parser->next_start = NULL;
10024 LEX(PM_TOKEN_AMPERSAND_DOT);
10025 }
10026
10027 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
10028 // If we hit an && then we are in a logical chain
10029 // and we need to return the logical operator.
10030 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '&') {
10031 if (!lexed_comment) parser_lex_ignored_newline(parser);
10032 lex_state_set(parser, PM_LEX_STATE_BEG);
10033 parser->current.start = next_content;
10034 parser->current.end = next_content + 2;
10035 parser->next_start = NULL;
10036 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
10037 }
10038
10039 // If we hit a || then we are in a logical chain and
10040 // we need to return the logical operator.
10041 if (peek_at(parser, next_content) == '|' && peek_at(parser, next_content + 1) == '|') {
10042 if (!lexed_comment) parser_lex_ignored_newline(parser);
10043 lex_state_set(parser, PM_LEX_STATE_BEG);
10044 parser->current.start = next_content;
10045 parser->current.end = next_content + 2;
10046 parser->next_start = NULL;
10047 LEX(PM_TOKEN_PIPE_PIPE);
10048 }
10049
10050 // If we hit an 'and' then we are in a logical chain
10051 // and we need to return the logical operator.
10052 if (
10053 peek_at(parser, next_content) == 'a' &&
10054 peek_at(parser, next_content + 1) == 'n' &&
10055 peek_at(parser, next_content + 2) == 'd' &&
10056 peek_at(parser, next_content + 3) != '!' &&
10057 peek_at(parser, next_content + 3) != '?' &&
10058 !char_is_identifier(parser, next_content + 3, parser->end - (next_content + 3))
10059 ) {
10060 if (!lexed_comment) parser_lex_ignored_newline(parser);
10061 lex_state_set(parser, PM_LEX_STATE_BEG);
10062 parser->current.start = next_content;
10063 parser->current.end = next_content + 3;
10064 parser->next_start = NULL;
10065 parser->command_start = true;
10066 LEX(PM_TOKEN_KEYWORD_AND);
10067 }
10068
10069 // If we hit a 'or' then we are in a logical chain
10070 // and we need to return the logical operator.
10071 if (
10072 peek_at(parser, next_content) == 'o' &&
10073 peek_at(parser, next_content + 1) == 'r' &&
10074 peek_at(parser, next_content + 2) != '!' &&
10075 peek_at(parser, next_content + 2) != '?' &&
10076 !char_is_identifier(parser, next_content + 2, parser->end - (next_content + 2))
10077 ) {
10078 if (!lexed_comment) parser_lex_ignored_newline(parser);
10079 lex_state_set(parser, PM_LEX_STATE_BEG);
10080 parser->current.start = next_content;
10081 parser->current.end = next_content + 2;
10082 parser->next_start = NULL;
10083 parser->command_start = true;
10084 LEX(PM_TOKEN_KEYWORD_OR);
10085 }
10086 }
10087 }
10088
10089 // At this point we know this is a regular newline, and we can set the
10090 // necessary state and return the token.
10091 lex_state_set(parser, PM_LEX_STATE_BEG);
10092 parser->command_start = true;
10093 parser->current.type = PM_TOKEN_NEWLINE;
10094 if (!lexed_comment) parser_lex_callback(parser);
10095 return;
10096 }
10097
10098 // ,
10099 case ',':
10100 if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
10101 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
10102 }
10103
10104 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10105 LEX(PM_TOKEN_COMMA);
10106
10107 // (
10108 case '(': {
10109 pm_token_type_t type = PM_TOKEN_PARENTHESIS_LEFT;
10110
10111 if (space_seen && (lex_state_arg_p(parser) || parser->lex_state == (PM_LEX_STATE_END | PM_LEX_STATE_LABEL))) {
10112 type = PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES;
10113 }
10114
10115 parser->enclosure_nesting++;
10116 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10117 pm_do_loop_stack_push(parser, false);
10118 LEX(type);
10119 }
10120
10121 // )
10122 case ')':
10123 parser->enclosure_nesting--;
10124 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10125 pm_do_loop_stack_pop(parser);
10126 LEX(PM_TOKEN_PARENTHESIS_RIGHT);
10127
10128 // ;
10129 case ';':
10130 lex_state_set(parser, PM_LEX_STATE_BEG);
10131 parser->command_start = true;
10132 LEX(PM_TOKEN_SEMICOLON);
10133
10134 // [ [] []=
10135 case '[':
10136 parser->enclosure_nesting++;
10137 pm_token_type_t type = PM_TOKEN_BRACKET_LEFT;
10138
10139 if (lex_state_operator_p(parser)) {
10140 if (match(parser, ']')) {
10141 parser->enclosure_nesting--;
10142 lex_state_set(parser, PM_LEX_STATE_ARG);
10143 LEX(match(parser, '=') ? PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL : PM_TOKEN_BRACKET_LEFT_RIGHT);
10144 }
10145
10146 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABEL);
10147 LEX(type);
10148 }
10149
10150 if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, PM_LEX_STATE_LABELED)))) {
10151 type = PM_TOKEN_BRACKET_LEFT_ARRAY;
10152 }
10153
10154 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10155 pm_do_loop_stack_push(parser, false);
10156 LEX(type);
10157
10158 // ]
10159 case ']':
10160 parser->enclosure_nesting--;
10161 lex_state_set(parser, PM_LEX_STATE_END);
10162 pm_do_loop_stack_pop(parser);
10163 LEX(PM_TOKEN_BRACKET_RIGHT);
10164
10165 // {
10166 case '{': {
10167 pm_token_type_t type = PM_TOKEN_BRACE_LEFT;
10168
10169 if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
10170 // This { begins a lambda
10171 parser->command_start = true;
10172 lex_state_set(parser, PM_LEX_STATE_BEG);
10173 type = PM_TOKEN_LAMBDA_BEGIN;
10174 } else if (lex_state_p(parser, PM_LEX_STATE_LABELED)) {
10175 // This { begins a hash literal
10176 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10177 } else if (lex_state_p(parser, PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_END | PM_LEX_STATE_ENDFN)) {
10178 // This { begins a block
10179 parser->command_start = true;
10180 lex_state_set(parser, PM_LEX_STATE_BEG);
10181 } else if (lex_state_p(parser, PM_LEX_STATE_ENDARG)) {
10182 // This { begins a block on a command
10183 parser->command_start = true;
10184 lex_state_set(parser, PM_LEX_STATE_BEG);
10185 } else {
10186 // This { begins a hash literal
10187 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10188 }
10189
10190 parser->enclosure_nesting++;
10191 parser->brace_nesting++;
10192 pm_do_loop_stack_push(parser, false);
10193
10194 LEX(type);
10195 }
10196
10197 // }
10198 case '}':
10199 parser->enclosure_nesting--;
10200 pm_do_loop_stack_pop(parser);
10201
10202 if ((parser->lex_modes.current->mode == PM_LEX_EMBEXPR) && (parser->brace_nesting == 0)) {
10203 lex_mode_pop(parser);
10204 LEX(PM_TOKEN_EMBEXPR_END);
10205 }
10206
10207 parser->brace_nesting--;
10208 lex_state_set(parser, PM_LEX_STATE_END);
10209 LEX(PM_TOKEN_BRACE_RIGHT);
10210
10211 // * ** **= *=
10212 case '*': {
10213 if (match(parser, '*')) {
10214 if (match(parser, '=')) {
10215 lex_state_set(parser, PM_LEX_STATE_BEG);
10216 LEX(PM_TOKEN_STAR_STAR_EQUAL);
10217 }
10218
10219 pm_token_type_t type = PM_TOKEN_STAR_STAR;
10220
10221 if (lex_state_spcarg_p(parser, space_seen)) {
10222 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR);
10223 type = PM_TOKEN_USTAR_STAR;
10224 } else if (lex_state_beg_p(parser)) {
10225 type = PM_TOKEN_USTAR_STAR;
10226 } else if (ambiguous_operator_p(parser, space_seen)) {
10227 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
10228 }
10229
10230 if (lex_state_operator_p(parser)) {
10231 lex_state_set(parser, PM_LEX_STATE_ARG);
10232 } else {
10233 lex_state_set(parser, PM_LEX_STATE_BEG);
10234 }
10235
10236 LEX(type);
10237 }
10238
10239 if (match(parser, '=')) {
10240 lex_state_set(parser, PM_LEX_STATE_BEG);
10241 LEX(PM_TOKEN_STAR_EQUAL);
10242 }
10243
10244 pm_token_type_t type = PM_TOKEN_STAR;
10245
10246 if (lex_state_spcarg_p(parser, space_seen)) {
10247 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
10248 type = PM_TOKEN_USTAR;
10249 } else if (lex_state_beg_p(parser)) {
10250 type = PM_TOKEN_USTAR;
10251 } else if (ambiguous_operator_p(parser, space_seen)) {
10252 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
10253 }
10254
10255 if (lex_state_operator_p(parser)) {
10256 lex_state_set(parser, PM_LEX_STATE_ARG);
10257 } else {
10258 lex_state_set(parser, PM_LEX_STATE_BEG);
10259 }
10260
10261 LEX(type);
10262 }
10263
10264 // ! != !~ !@
10265 case '!':
10266 if (lex_state_operator_p(parser)) {
10267 lex_state_set(parser, PM_LEX_STATE_ARG);
10268 if (match(parser, '@')) {
10269 LEX(PM_TOKEN_BANG);
10270 }
10271 } else {
10272 lex_state_set(parser, PM_LEX_STATE_BEG);
10273 }
10274
10275 if (match(parser, '=')) {
10276 LEX(PM_TOKEN_BANG_EQUAL);
10277 }
10278
10279 if (match(parser, '~')) {
10280 LEX(PM_TOKEN_BANG_TILDE);
10281 }
10282
10283 LEX(PM_TOKEN_BANG);
10284
10285 // = => =~ == === =begin
10286 case '=':
10287 if (
10288 current_token_starts_line(parser) &&
10289 (parser->current.end + 5 <= parser->end) &&
10290 memcmp(parser->current.end, "begin", 5) == 0 &&
10291 (pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) == '\0'))
10292 ) {
10293 pm_token_type_t type = lex_embdoc(parser);
10294 if (type == PM_TOKEN_EOF) {
10295 LEX(type);
10296 }
10297
10298 goto lex_next_token;
10299 }
10300
10301 if (lex_state_operator_p(parser)) {
10302 lex_state_set(parser, PM_LEX_STATE_ARG);
10303 } else {
10304 lex_state_set(parser, PM_LEX_STATE_BEG);
10305 }
10306
10307 if (match(parser, '>')) {
10308 LEX(PM_TOKEN_EQUAL_GREATER);
10309 }
10310
10311 if (match(parser, '~')) {
10312 LEX(PM_TOKEN_EQUAL_TILDE);
10313 }
10314
10315 if (match(parser, '=')) {
10316 LEX(match(parser, '=') ? PM_TOKEN_EQUAL_EQUAL_EQUAL : PM_TOKEN_EQUAL_EQUAL);
10317 }
10318
10319 LEX(PM_TOKEN_EQUAL);
10320
10321 // < << <<= <= <=>
10322 case '<':
10323 if (match(parser, '<')) {
10324 if (
10325 !lex_state_p(parser, PM_LEX_STATE_DOT | PM_LEX_STATE_CLASS) &&
10326 !lex_state_end_p(parser) &&
10327 (!lex_state_p(parser, PM_LEX_STATE_ARG_ANY) || lex_state_p(parser, PM_LEX_STATE_LABELED) || space_seen)
10328 ) {
10329 const uint8_t *end = parser->current.end;
10330
10331 pm_heredoc_quote_t quote = PM_HEREDOC_QUOTE_NONE;
10332 pm_heredoc_indent_t indent = PM_HEREDOC_INDENT_NONE;
10333
10334 if (match(parser, '-')) {
10335 indent = PM_HEREDOC_INDENT_DASH;
10336 }
10337 else if (match(parser, '~')) {
10338 indent = PM_HEREDOC_INDENT_TILDE;
10339 }
10340
10341 if (match(parser, '`')) {
10342 quote = PM_HEREDOC_QUOTE_BACKTICK;
10343 }
10344 else if (match(parser, '"')) {
10345 quote = PM_HEREDOC_QUOTE_DOUBLE;
10346 }
10347 else if (match(parser, '\'')) {
10348 quote = PM_HEREDOC_QUOTE_SINGLE;
10349 }
10350
10351 const uint8_t *ident_start = parser->current.end;
10352 size_t width = 0;
10353
10354 if (parser->current.end >= parser->end) {
10355 parser->current.end = end;
10356 } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) == 0) {
10357 parser->current.end = end;
10358 } else {
10359 if (quote == PM_HEREDOC_QUOTE_NONE) {
10360 parser->current.end += width;
10361
10362 while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end))) {
10363 parser->current.end += width;
10364 }
10365 } else {
10366 // If we have quotes, then we're going to go until we find the
10367 // end quote.
10368 while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
10369 if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
10370 parser->current.end++;
10371 }
10372 }
10373
10374 size_t ident_length = (size_t) (parser->current.end - ident_start);
10375 bool ident_error = false;
10376
10377 if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
10378 pm_parser_err(parser, U32(ident_start - parser->start), U32(ident_length), PM_ERR_HEREDOC_IDENTIFIER);
10379 ident_error = true;
10380 }
10381
10382 parser->explicit_encoding = NULL;
10383 lex_mode_push(parser, (pm_lex_mode_t) {
10384 .mode = PM_LEX_HEREDOC,
10385 .as.heredoc = {
10386 .base = {
10387 .ident_start = ident_start,
10388 .ident_length = ident_length,
10389 .quote = quote,
10390 .indent = indent
10391 },
10392 .next_start = parser->current.end,
10393 .common_whitespace = NULL,
10394 .line_continuation = false
10395 }
10396 });
10397
10398 if (parser->heredoc_end == NULL) {
10399 const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
10400
10401 if (body_start == NULL) {
10402 // If there is no newline after the heredoc identifier, then
10403 // this is not a valid heredoc declaration. In this case we
10404 // will add an error, but we will still return a heredoc
10405 // start.
10406 if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
10407 body_start = parser->end;
10408 } else {
10409 // Otherwise, we want to indicate that the body of the
10410 // heredoc starts on the character after the next newline.
10411 pm_line_offset_list_append(&parser->line_offsets, U32(body_start - parser->start + 1));
10412 body_start++;
10413 }
10414
10415 parser->next_start = body_start;
10416 } else {
10417 parser->next_start = parser->heredoc_end;
10418 }
10419
10420 LEX(PM_TOKEN_HEREDOC_START);
10421 }
10422 }
10423
10424 if (match(parser, '=')) {
10425 lex_state_set(parser, PM_LEX_STATE_BEG);
10426 LEX(PM_TOKEN_LESS_LESS_EQUAL);
10427 }
10428
10429 if (ambiguous_operator_p(parser, space_seen)) {
10430 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
10431 }
10432
10433 if (lex_state_operator_p(parser)) {
10434 lex_state_set(parser, PM_LEX_STATE_ARG);
10435 } else {
10436 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
10437 lex_state_set(parser, PM_LEX_STATE_BEG);
10438 }
10439
10440 LEX(PM_TOKEN_LESS_LESS);
10441 }
10442
10443 if (lex_state_operator_p(parser)) {
10444 lex_state_set(parser, PM_LEX_STATE_ARG);
10445 } else {
10446 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
10447 lex_state_set(parser, PM_LEX_STATE_BEG);
10448 }
10449
10450 if (match(parser, '=')) {
10451 if (match(parser, '>')) {
10452 LEX(PM_TOKEN_LESS_EQUAL_GREATER);
10453 }
10454
10455 LEX(PM_TOKEN_LESS_EQUAL);
10456 }
10457
10458 LEX(PM_TOKEN_LESS);
10459
10460 // > >> >>= >=
10461 case '>':
10462 if (match(parser, '>')) {
10463 if (lex_state_operator_p(parser)) {
10464 lex_state_set(parser, PM_LEX_STATE_ARG);
10465 } else {
10466 lex_state_set(parser, PM_LEX_STATE_BEG);
10467 }
10468 LEX(match(parser, '=') ? PM_TOKEN_GREATER_GREATER_EQUAL : PM_TOKEN_GREATER_GREATER);
10469 }
10470
10471 if (lex_state_operator_p(parser)) {
10472 lex_state_set(parser, PM_LEX_STATE_ARG);
10473 } else {
10474 lex_state_set(parser, PM_LEX_STATE_BEG);
10475 }
10476
10477 LEX(match(parser, '=') ? PM_TOKEN_GREATER_EQUAL : PM_TOKEN_GREATER);
10478
10479 // double-quoted string literal
10480 case '"': {
10481 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
10482 lex_mode_push_string(parser, true, label_allowed, '\0', '"');
10483 LEX(PM_TOKEN_STRING_BEGIN);
10484 }
10485
10486 // xstring literal
10487 case '`': {
10488 if (lex_state_p(parser, PM_LEX_STATE_FNAME)) {
10489 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10490 LEX(PM_TOKEN_BACKTICK);
10491 }
10492
10493 if (lex_state_p(parser, PM_LEX_STATE_DOT)) {
10494 if (previous_command_start) {
10495 lex_state_set(parser, PM_LEX_STATE_CMDARG);
10496 } else {
10497 lex_state_set(parser, PM_LEX_STATE_ARG);
10498 }
10499
10500 LEX(PM_TOKEN_BACKTICK);
10501 }
10502
10503 lex_mode_push_string(parser, true, false, '\0', '`');
10504 LEX(PM_TOKEN_BACKTICK);
10505 }
10506
10507 // single-quoted string literal
10508 case '\'': {
10509 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
10510 lex_mode_push_string(parser, false, label_allowed, '\0', '\'');
10511 LEX(PM_TOKEN_STRING_BEGIN);
10512 }
10513
10514 // ? character literal
10515 case '?':
10516 LEX(lex_question_mark(parser));
10517
10518 // & && &&= &=
10519 case '&': {
10520 if (match(parser, '&')) {
10521 lex_state_set(parser, PM_LEX_STATE_BEG);
10522
10523 if (match(parser, '=')) {
10524 LEX(PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
10525 }
10526
10527 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
10528 }
10529
10530 if (match(parser, '=')) {
10531 lex_state_set(parser, PM_LEX_STATE_BEG);
10532 LEX(PM_TOKEN_AMPERSAND_EQUAL);
10533 }
10534
10535 if (match(parser, '.')) {
10536 lex_state_set(parser, PM_LEX_STATE_DOT);
10537 LEX(PM_TOKEN_AMPERSAND_DOT);
10538 }
10539
10540 pm_token_type_t type = PM_TOKEN_AMPERSAND;
10541 if (lex_state_spcarg_p(parser, space_seen)) {
10542 if ((peek(parser) != ':') || (peek_offset(parser, 1) == '\0')) {
10543 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
10544 } else {
10545 const uint8_t delim = peek_offset(parser, 1);
10546
10547 if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1, parser->end - (parser->current.end + 1))) {
10548 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
10549 }
10550 }
10551
10552 type = PM_TOKEN_UAMPERSAND;
10553 } else if (lex_state_beg_p(parser)) {
10554 type = PM_TOKEN_UAMPERSAND;
10555 } else if (ambiguous_operator_p(parser, space_seen)) {
10556 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
10557 }
10558
10559 if (lex_state_operator_p(parser)) {
10560 lex_state_set(parser, PM_LEX_STATE_ARG);
10561 } else {
10562 lex_state_set(parser, PM_LEX_STATE_BEG);
10563 }
10564
10565 LEX(type);
10566 }
10567
10568 // | || ||= |=
10569 case '|':
10570 if (match(parser, '|')) {
10571 if (match(parser, '=')) {
10572 lex_state_set(parser, PM_LEX_STATE_BEG);
10573 LEX(PM_TOKEN_PIPE_PIPE_EQUAL);
10574 }
10575
10576 if (lex_state_p(parser, PM_LEX_STATE_BEG)) {
10577 parser->current.end--;
10578 LEX(PM_TOKEN_PIPE);
10579 }
10580
10581 lex_state_set(parser, PM_LEX_STATE_BEG);
10582 LEX(PM_TOKEN_PIPE_PIPE);
10583 }
10584
10585 if (match(parser, '=')) {
10586 lex_state_set(parser, PM_LEX_STATE_BEG);
10587 LEX(PM_TOKEN_PIPE_EQUAL);
10588 }
10589
10590 if (lex_state_operator_p(parser)) {
10591 lex_state_set(parser, PM_LEX_STATE_ARG);
10592 } else {
10593 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10594 }
10595
10596 LEX(PM_TOKEN_PIPE);
10597
10598 // + += +@
10599 case '+': {
10600 if (lex_state_operator_p(parser)) {
10601 lex_state_set(parser, PM_LEX_STATE_ARG);
10602
10603 if (match(parser, '@')) {
10604 LEX(PM_TOKEN_UPLUS);
10605 }
10606
10607 LEX(PM_TOKEN_PLUS);
10608 }
10609
10610 if (match(parser, '=')) {
10611 lex_state_set(parser, PM_LEX_STATE_BEG);
10612 LEX(PM_TOKEN_PLUS_EQUAL);
10613 }
10614
10615 if (
10616 lex_state_beg_p(parser) ||
10617 (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
10618 ) {
10619 lex_state_set(parser, PM_LEX_STATE_BEG);
10620
10621 if (pm_char_is_decimal_digit(peek(parser))) {
10622 parser->current.end++;
10623 pm_token_type_t type = lex_numeric(parser);
10624 lex_state_set(parser, PM_LEX_STATE_END);
10625 LEX(type);
10626 }
10627
10628 LEX(PM_TOKEN_UPLUS);
10629 }
10630
10631 if (ambiguous_operator_p(parser, space_seen)) {
10632 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
10633 }
10634
10635 lex_state_set(parser, PM_LEX_STATE_BEG);
10636 LEX(PM_TOKEN_PLUS);
10637 }
10638
10639 // - -= -@
10640 case '-': {
10641 if (lex_state_operator_p(parser)) {
10642 lex_state_set(parser, PM_LEX_STATE_ARG);
10643
10644 if (match(parser, '@')) {
10645 LEX(PM_TOKEN_UMINUS);
10646 }
10647
10648 LEX(PM_TOKEN_MINUS);
10649 }
10650
10651 if (match(parser, '=')) {
10652 lex_state_set(parser, PM_LEX_STATE_BEG);
10653 LEX(PM_TOKEN_MINUS_EQUAL);
10654 }
10655
10656 if (match(parser, '>')) {
10657 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10658 LEX(PM_TOKEN_MINUS_GREATER);
10659 }
10660
10661 bool spcarg = lex_state_spcarg_p(parser, space_seen);
10662 bool is_beg = lex_state_beg_p(parser);
10663 if (!is_beg && spcarg) {
10664 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
10665 }
10666
10667 if (is_beg || spcarg) {
10668 lex_state_set(parser, PM_LEX_STATE_BEG);
10669 LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
10670 }
10671
10672 if (ambiguous_operator_p(parser, space_seen)) {
10673 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
10674 }
10675
10676 lex_state_set(parser, PM_LEX_STATE_BEG);
10677 LEX(PM_TOKEN_MINUS);
10678 }
10679
10680 // . .. ...
10681 case '.': {
10682 bool beg_p = lex_state_beg_p(parser);
10683
10684 if (match(parser, '.')) {
10685 if (match(parser, '.')) {
10686 // If we're _not_ inside a range within default parameters
10687 if (!context_p(parser, PM_CONTEXT_DEFAULT_PARAMS) && context_p(parser, PM_CONTEXT_DEF_PARAMS)) {
10688 if (lex_state_p(parser, PM_LEX_STATE_END)) {
10689 lex_state_set(parser, PM_LEX_STATE_BEG);
10690 } else {
10691 lex_state_set(parser, PM_LEX_STATE_ENDARG);
10692 }
10693 LEX(PM_TOKEN_UDOT_DOT_DOT);
10694 }
10695
10696 if (parser->enclosure_nesting == 0 && parser_end_of_line_p(parser)) {
10697 pm_parser_warn_token(parser, &parser->current, PM_WARN_DOT_DOT_DOT_EOL);
10698 }
10699
10700 lex_state_set(parser, PM_LEX_STATE_BEG);
10701 LEX(beg_p ? PM_TOKEN_UDOT_DOT_DOT : PM_TOKEN_DOT_DOT_DOT);
10702 }
10703
10704 lex_state_set(parser, PM_LEX_STATE_BEG);
10705 LEX(beg_p ? PM_TOKEN_UDOT_DOT : PM_TOKEN_DOT_DOT);
10706 }
10707
10708 lex_state_set(parser, PM_LEX_STATE_DOT);
10709 LEX(PM_TOKEN_DOT);
10710 }
10711
10712 // integer
10713 case '0':
10714 case '1':
10715 case '2':
10716 case '3':
10717 case '4':
10718 case '5':
10719 case '6':
10720 case '7':
10721 case '8':
10722 case '9': {
10723 pm_token_type_t type = lex_numeric(parser);
10724 lex_state_set(parser, PM_LEX_STATE_END);
10725 LEX(type);
10726 }
10727
10728 // :: symbol
10729 case ':':
10730 if (match(parser, ':')) {
10731 if (lex_state_beg_p(parser) || lex_state_p(parser, PM_LEX_STATE_CLASS) || (lex_state_p(parser, PM_LEX_STATE_ARG_ANY) && space_seen)) {
10732 lex_state_set(parser, PM_LEX_STATE_BEG);
10733 LEX(PM_TOKEN_UCOLON_COLON);
10734 }
10735
10736 lex_state_set(parser, PM_LEX_STATE_DOT);
10737 LEX(PM_TOKEN_COLON_COLON);
10738 }
10739
10740 if (lex_state_end_p(parser) || pm_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
10741 lex_state_set(parser, PM_LEX_STATE_BEG);
10742 LEX(PM_TOKEN_COLON);
10743 }
10744
10745 if (peek(parser) == '"' || peek(parser) == '\'') {
10746 lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
10747 parser->current.end++;
10748 }
10749
10750 lex_state_set(parser, PM_LEX_STATE_FNAME);
10751 LEX(PM_TOKEN_SYMBOL_BEGIN);
10752
10753 // / /=
10754 case '/':
10755 if (lex_state_beg_p(parser)) {
10756 lex_mode_push_regexp(parser, '\0', '/');
10757 LEX(PM_TOKEN_REGEXP_BEGIN);
10758 }
10759
10760 if (match(parser, '=')) {
10761 lex_state_set(parser, PM_LEX_STATE_BEG);
10762 LEX(PM_TOKEN_SLASH_EQUAL);
10763 }
10764
10765 if (lex_state_spcarg_p(parser, space_seen)) {
10766 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
10767 lex_mode_push_regexp(parser, '\0', '/');
10768 LEX(PM_TOKEN_REGEXP_BEGIN);
10769 }
10770
10771 if (ambiguous_operator_p(parser, space_seen)) {
10772 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
10773 }
10774
10775 if (lex_state_operator_p(parser)) {
10776 lex_state_set(parser, PM_LEX_STATE_ARG);
10777 } else {
10778 lex_state_set(parser, PM_LEX_STATE_BEG);
10779 }
10780
10781 LEX(PM_TOKEN_SLASH);
10782
10783 // ^ ^=
10784 case '^':
10785 if (lex_state_operator_p(parser)) {
10786 lex_state_set(parser, PM_LEX_STATE_ARG);
10787 } else {
10788 lex_state_set(parser, PM_LEX_STATE_BEG);
10789 }
10790 LEX(match(parser, '=') ? PM_TOKEN_CARET_EQUAL : PM_TOKEN_CARET);
10791
10792 // ~ ~@
10793 case '~':
10794 if (lex_state_operator_p(parser)) {
10795 (void) match(parser, '@');
10796 lex_state_set(parser, PM_LEX_STATE_ARG);
10797 } else {
10798 lex_state_set(parser, PM_LEX_STATE_BEG);
10799 }
10800
10801 LEX(PM_TOKEN_TILDE);
10802
10803 // % %= %i %I %q %Q %w %W
10804 case '%': {
10805 // If there is no subsequent character then we have an
10806 // invalid token. We're going to say it's the percent
10807 // operator because we don't want to move into the string
10808 // lex mode unnecessarily.
10809 if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
10810 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
10811 LEX(PM_TOKEN_PERCENT);
10812 }
10813
10814 if (!lex_state_beg_p(parser) && match(parser, '=')) {
10815 lex_state_set(parser, PM_LEX_STATE_BEG);
10816 LEX(PM_TOKEN_PERCENT_EQUAL);
10817 } else if (
10818 lex_state_beg_p(parser) ||
10819 (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
10820 lex_state_spcarg_p(parser, space_seen)
10821 ) {
10822 if (!parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end)) {
10823 if (*parser->current.end >= 0x80) {
10824 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
10825 }
10826
10827 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10828 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10829 LEX(PM_TOKEN_STRING_BEGIN);
10830 }
10831
10832 // Delimiters for %-literals cannot be alphanumeric. We
10833 // validate that here.
10834 uint8_t delimiter = peek_offset(parser, 1);
10835 if (delimiter >= 0x80 || parser->encoding->alnum_char(&delimiter, 1)) {
10836 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
10837 goto lex_next_token;
10838 }
10839
10840 switch (peek(parser)) {
10841 case 'i': {
10842 parser->current.end++;
10843
10844 if (parser->current.end < parser->end) {
10845 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
10846 } else {
10847 lex_mode_push_list_eof(parser);
10848 }
10849
10850 LEX(PM_TOKEN_PERCENT_LOWER_I);
10851 }
10852 case 'I': {
10853 parser->current.end++;
10854
10855 if (parser->current.end < parser->end) {
10856 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
10857 } else {
10858 lex_mode_push_list_eof(parser);
10859 }
10860
10861 LEX(PM_TOKEN_PERCENT_UPPER_I);
10862 }
10863 case 'r': {
10864 parser->current.end++;
10865
10866 if (parser->current.end < parser->end) {
10867 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10868 lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10869 } else {
10870 lex_mode_push_regexp(parser, '\0', '\0');
10871 }
10872
10873 LEX(PM_TOKEN_REGEXP_BEGIN);
10874 }
10875 case 'q': {
10876 parser->current.end++;
10877
10878 if (parser->current.end < parser->end) {
10879 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10880 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10881 } else {
10882 lex_mode_push_string_eof(parser);
10883 }
10884
10885 LEX(PM_TOKEN_STRING_BEGIN);
10886 }
10887 case 'Q': {
10888 parser->current.end++;
10889
10890 if (parser->current.end < parser->end) {
10891 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10892 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10893 } else {
10894 lex_mode_push_string_eof(parser);
10895 }
10896
10897 LEX(PM_TOKEN_STRING_BEGIN);
10898 }
10899 case 's': {
10900 parser->current.end++;
10901
10902 if (parser->current.end < parser->end) {
10903 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10904 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10905 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
10906 } else {
10907 lex_mode_push_string_eof(parser);
10908 }
10909
10910 LEX(PM_TOKEN_SYMBOL_BEGIN);
10911 }
10912 case 'w': {
10913 parser->current.end++;
10914
10915 if (parser->current.end < parser->end) {
10916 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
10917 } else {
10918 lex_mode_push_list_eof(parser);
10919 }
10920
10921 LEX(PM_TOKEN_PERCENT_LOWER_W);
10922 }
10923 case 'W': {
10924 parser->current.end++;
10925
10926 if (parser->current.end < parser->end) {
10927 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
10928 } else {
10929 lex_mode_push_list_eof(parser);
10930 }
10931
10932 LEX(PM_TOKEN_PERCENT_UPPER_W);
10933 }
10934 case 'x': {
10935 parser->current.end++;
10936
10937 if (parser->current.end < parser->end) {
10938 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10939 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10940 } else {
10941 lex_mode_push_string_eof(parser);
10942 }
10943
10944 LEX(PM_TOKEN_PERCENT_LOWER_X);
10945 }
10946 default:
10947 // If we get to this point, then we have a % that is completely
10948 // unparsable. In this case we'll just drop it from the parser
10949 // and skip past it and hope that the next token is something
10950 // that we can parse.
10951 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
10952 goto lex_next_token;
10953 }
10954 }
10955
10956 if (ambiguous_operator_p(parser, space_seen)) {
10957 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
10958 }
10959
10960 lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
10961 LEX(PM_TOKEN_PERCENT);
10962 }
10963
10964 // global variable
10965 case '$': {
10966 pm_token_type_t type = lex_global_variable(parser);
10967
10968 // If we're lexing an embedded variable, then we need to pop back into
10969 // the parent lex context.
10970 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
10971 lex_mode_pop(parser);
10972 }
10973
10974 lex_state_set(parser, PM_LEX_STATE_END);
10975 LEX(type);
10976 }
10977
10978 // instance variable, class variable
10979 case '@':
10980 lex_state_set(parser, parser->lex_state & PM_LEX_STATE_FNAME ? PM_LEX_STATE_ENDFN : PM_LEX_STATE_END);
10981 LEX(lex_at_variable(parser));
10982
10983 default: {
10984 if (*parser->current.start != '_') {
10985 size_t width = char_is_identifier_start(parser, parser->current.start, parser->end - parser->current.start);
10986
10987 // If this isn't the beginning of an identifier, then
10988 // it's an invalid token as we've exhausted all of the
10989 // other options. We'll skip past it and return the next
10990 // token after adding an appropriate error message.
10991 if (!width) {
10992 if (*parser->current.start >= 0x80) {
10993 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
10994 } else if (*parser->current.start == '\\') {
10995 switch (peek_at(parser, parser->current.start + 1)) {
10996 case ' ':
10997 parser->current.end++;
10998 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
10999 break;
11000 case '\f':
11001 parser->current.end++;
11002 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
11003 break;
11004 case '\t':
11005 parser->current.end++;
11006 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
11007 break;
11008 case '\v':
11009 parser->current.end++;
11010 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
11011 break;
11012 case '\r':
11013 if (peek_at(parser, parser->current.start + 2) != '\n') {
11014 parser->current.end++;
11015 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
11016 break;
11017 }
11019 default:
11020 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
11021 break;
11022 }
11023 } else if (char_is_ascii_printable(*parser->current.start)) {
11024 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
11025 } else {
11026 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
11027 }
11028
11029 goto lex_next_token;
11030 }
11031
11032 parser->current.end = parser->current.start + width;
11033 }
11034
11035 pm_token_type_t type = lex_identifier(parser, previous_command_start);
11036
11037 // If we've hit a __END__ and it was at the start of the
11038 // line or the start of the file and it is followed by
11039 // either a \n or a \r\n, then this is the last token of the
11040 // file.
11041 if (
11042 ((parser->current.end - parser->current.start) == 7) &&
11043 current_token_starts_line(parser) &&
11044 (memcmp(parser->current.start, "__END__", 7) == 0) &&
11045 (parser->current.end == parser->end || match_eol(parser))
11046 ) {
11047 // Since we know we're about to add an __END__ comment,
11048 // we know we need to add all of the newlines to get the
11049 // correct column information for it.
11050 const uint8_t *cursor = parser->current.end;
11051 while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
11052 pm_line_offset_list_append(&parser->line_offsets, U32(++cursor - parser->start));
11053 }
11054
11055 parser->current.end = parser->end;
11056 parser->current.type = PM_TOKEN___END__;
11057 parser_lex_callback(parser);
11058
11059 parser->data_loc.start = PM_TOKEN_START(parser, &parser->current);
11060 parser->data_loc.length = PM_TOKEN_LENGTH(&parser->current);
11061
11062 LEX(PM_TOKEN_EOF);
11063 }
11064
11065 pm_lex_state_t last_state = parser->lex_state;
11066
11067 if (type == PM_TOKEN_IDENTIFIER || type == PM_TOKEN_CONSTANT || type == PM_TOKEN_METHOD_NAME) {
11068 if (lex_state_p(parser, PM_LEX_STATE_BEG_ANY | PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_DOT)) {
11069 if (previous_command_start) {
11070 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11071 } else {
11072 lex_state_set(parser, PM_LEX_STATE_ARG);
11073 }
11074 } else if (parser->lex_state == PM_LEX_STATE_FNAME) {
11075 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11076 } else {
11077 lex_state_set(parser, PM_LEX_STATE_END);
11078 }
11079 }
11080
11081 if (
11082 !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
11083 (type == PM_TOKEN_IDENTIFIER) &&
11084 ((pm_parser_local_depth(parser, &parser->current) != -1) ||
11085 pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)))
11086 ) {
11087 lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
11088 }
11089
11090 LEX(type);
11091 }
11092 }
11093 }
11094 case PM_LEX_LIST: {
11095 if (parser->next_start != NULL) {
11096 parser->current.end = parser->next_start;
11097 parser->next_start = NULL;
11098 }
11099
11100 // First we'll set the beginning of the token.
11101 parser->current.start = parser->current.end;
11102
11103 // If there's any whitespace at the start of the list, then we're
11104 // going to trim it off the beginning and create a new token.
11105 size_t whitespace;
11106
11107 if (parser->heredoc_end) {
11108 whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
11109 if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') {
11110 whitespace += 1;
11111 }
11112 } else {
11113 whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
11114 }
11115
11116 if (whitespace > 0) {
11117 parser->current.end += whitespace;
11118 if (peek_offset(parser, -1) == '\n') {
11119 // mutates next_start
11120 parser_flush_heredoc_end(parser);
11121 }
11122 LEX(PM_TOKEN_WORDS_SEP);
11123 }
11124
11125 // We'll check if we're at the end of the file. If we are, then we
11126 // need to return the EOF token.
11127 if (parser->current.end >= parser->end) {
11128 LEX(PM_TOKEN_EOF);
11129 }
11130
11131 // Here we'll get a list of the places where strpbrk should break,
11132 // and then find the first one.
11133 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11134 const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
11135 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11136
11137 // If we haven't found an escape yet, then this buffer will be
11138 // unallocated since we can refer directly to the source string.
11139 pm_token_buffer_t token_buffer = { 0 };
11140
11141 while (breakpoint != NULL) {
11142 // If we hit whitespace, then we must have received content by
11143 // now, so we can return an element of the list.
11144 if (pm_char_is_whitespace(*breakpoint)) {
11145 parser->current.end = breakpoint;
11146 pm_token_buffer_flush(parser, &token_buffer);
11147 LEX(PM_TOKEN_STRING_CONTENT);
11148 }
11149
11150 // If we hit the terminator, we need to check which token to
11151 // return.
11152 if (*breakpoint == lex_mode->as.list.terminator) {
11153 // If this terminator doesn't actually close the list, then
11154 // we need to continue on past it.
11155 if (lex_mode->as.list.nesting > 0) {
11156 parser->current.end = breakpoint + 1;
11157 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11158 lex_mode->as.list.nesting--;
11159 continue;
11160 }
11161
11162 // If we've hit the terminator and we've already skipped
11163 // past content, then we can return a list node.
11164 if (breakpoint > parser->current.start) {
11165 parser->current.end = breakpoint;
11166 pm_token_buffer_flush(parser, &token_buffer);
11167 LEX(PM_TOKEN_STRING_CONTENT);
11168 }
11169
11170 // Otherwise, switch back to the default state and return
11171 // the end of the list.
11172 parser->current.end = breakpoint + 1;
11173 lex_mode_pop(parser);
11174 lex_state_set(parser, PM_LEX_STATE_END);
11175 LEX(PM_TOKEN_STRING_END);
11176 }
11177
11178 // If we hit a null byte, skip directly past it.
11179 if (*breakpoint == '\0') {
11180 breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
11181 continue;
11182 }
11183
11184 // If we hit escapes, then we need to treat the next token
11185 // literally. In this case we'll skip past the next character
11186 // and find the next breakpoint.
11187 if (*breakpoint == '\\') {
11188 parser->current.end = breakpoint + 1;
11189
11190 // If we've hit the end of the file, then break out of the
11191 // loop by setting the breakpoint to NULL.
11192 if (parser->current.end == parser->end) {
11193 breakpoint = NULL;
11194 continue;
11195 }
11196
11197 pm_token_buffer_escape(parser, &token_buffer);
11198 uint8_t peeked = peek(parser);
11199
11200 switch (peeked) {
11201 case ' ':
11202 case '\f':
11203 case '\t':
11204 case '\v':
11205 case '\\':
11206 pm_token_buffer_push_byte(&token_buffer, peeked);
11207 parser->current.end++;
11208 break;
11209 case '\r':
11210 parser->current.end++;
11211 if (peek(parser) != '\n') {
11212 pm_token_buffer_push_byte(&token_buffer, '\r');
11213 break;
11214 }
11216 case '\n':
11217 pm_token_buffer_push_byte(&token_buffer, '\n');
11218
11219 if (parser->heredoc_end) {
11220 // ... if we are on the same line as a heredoc,
11221 // flush the heredoc and continue parsing after
11222 // heredoc_end.
11223 parser_flush_heredoc_end(parser);
11224 pm_token_buffer_copy(parser, &token_buffer);
11225 LEX(PM_TOKEN_STRING_CONTENT);
11226 } else {
11227 // ... else track the newline.
11228 pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
11229 }
11230
11231 parser->current.end++;
11232 break;
11233 default:
11234 if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
11235 pm_token_buffer_push_byte(&token_buffer, peeked);
11236 parser->current.end++;
11237 } else if (lex_mode->as.list.interpolation) {
11238 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
11239 } else {
11240 pm_token_buffer_push_byte(&token_buffer, '\\');
11241 pm_token_buffer_push_escaped(&token_buffer, parser);
11242 }
11243
11244 break;
11245 }
11246
11247 token_buffer.cursor = parser->current.end;
11248 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11249 continue;
11250 }
11251
11252 // If we hit a #, then we will attempt to lex interpolation.
11253 if (*breakpoint == '#') {
11254 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11255
11256 if (!type) {
11257 // If we haven't returned at this point then we had something
11258 // that looked like an interpolated class or instance variable
11259 // like "#@" but wasn't actually. In this case we'll just skip
11260 // to the next breakpoint.
11261 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11262 continue;
11263 }
11264
11265 if (type == PM_TOKEN_STRING_CONTENT) {
11266 pm_token_buffer_flush(parser, &token_buffer);
11267 }
11268
11269 LEX(type);
11270 }
11271
11272 // If we've hit the incrementor, then we need to skip past it
11273 // and find the next breakpoint.
11274 assert(*breakpoint == lex_mode->as.list.incrementor);
11275 parser->current.end = breakpoint + 1;
11276 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11277 lex_mode->as.list.nesting++;
11278 continue;
11279 }
11280
11281 if (parser->current.end > parser->current.start) {
11282 pm_token_buffer_flush(parser, &token_buffer);
11283 LEX(PM_TOKEN_STRING_CONTENT);
11284 }
11285
11286 // If we were unable to find a breakpoint, then this token hits the
11287 // end of the file.
11288 parser->current.end = parser->end;
11289 pm_token_buffer_flush(parser, &token_buffer);
11290 LEX(PM_TOKEN_STRING_CONTENT);
11291 }
11292 case PM_LEX_REGEXP: {
11293 // First, we'll set to start of this token to be the current end.
11294 if (parser->next_start == NULL) {
11295 parser->current.start = parser->current.end;
11296 } else {
11297 parser->current.start = parser->next_start;
11298 parser->current.end = parser->next_start;
11299 parser->next_start = NULL;
11300 }
11301
11302 // We'll check if we're at the end of the file. If we are, then we
11303 // need to return the EOF token.
11304 if (parser->current.end >= parser->end) {
11305 LEX(PM_TOKEN_EOF);
11306 }
11307
11308 // Get a reference to the current mode.
11309 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11310
11311 // These are the places where we need to split up the content of the
11312 // regular expression. We'll use strpbrk to find the first of these
11313 // characters.
11314 const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
11315 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11316 pm_regexp_token_buffer_t token_buffer = { 0 };
11317
11318 while (breakpoint != NULL) {
11319 uint8_t term = lex_mode->as.regexp.terminator;
11320 bool is_terminator = (*breakpoint == term);
11321
11322 // If the terminator is newline, we need to consider \r\n _also_ a newline
11323 // For example: `%\nfoo\r\n`
11324 // The string should be "foo", not "foo\r"
11325 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
11326 if (term == '\n') {
11327 is_terminator = true;
11328 }
11329
11330 // If the terminator is a CR, but we see a CRLF, we need to
11331 // treat the CRLF as a newline, meaning this is _not_ the
11332 // terminator
11333 if (term == '\r') {
11334 is_terminator = false;
11335 }
11336 }
11337
11338 // If we hit the terminator, we need to determine what kind of
11339 // token to return.
11340 if (is_terminator) {
11341 if (lex_mode->as.regexp.nesting > 0) {
11342 parser->current.end = breakpoint + 1;
11343 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11344 lex_mode->as.regexp.nesting--;
11345 continue;
11346 }
11347
11348 // Here we've hit the terminator. If we have already consumed
11349 // content then we need to return that content as string content
11350 // first.
11351 if (breakpoint > parser->current.start) {
11352 parser->current.end = breakpoint;
11353 pm_regexp_token_buffer_flush(parser, &token_buffer);
11354 LEX(PM_TOKEN_STRING_CONTENT);
11355 }
11356
11357 // Check here if we need to track the newline.
11358 size_t eol_length = match_eol_at(parser, breakpoint);
11359 if (eol_length) {
11360 parser->current.end = breakpoint + eol_length;
11361
11362 // Track the newline if we're not in a heredoc that
11363 // would have already have added the newline to the
11364 // list.
11365 if (parser->heredoc_end == NULL) {
11366 pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
11367 }
11368 } else {
11369 parser->current.end = breakpoint + 1;
11370 }
11371
11372 // Since we've hit the terminator of the regular expression,
11373 // we now need to parse the options.
11374 parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
11375
11376 lex_mode_pop(parser);
11377 lex_state_set(parser, PM_LEX_STATE_END);
11378 LEX(PM_TOKEN_REGEXP_END);
11379 }
11380
11381 // If we've hit the incrementor, then we need to skip past it
11382 // and find the next breakpoint.
11383 if (*breakpoint && *breakpoint == lex_mode->as.regexp.incrementor) {
11384 parser->current.end = breakpoint + 1;
11385 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11386 lex_mode->as.regexp.nesting++;
11387 continue;
11388 }
11389
11390 switch (*breakpoint) {
11391 case '\0':
11392 // If we hit a null byte, skip directly past it.
11393 parser->current.end = breakpoint + 1;
11394 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11395 break;
11396 case '\r':
11397 if (peek_at(parser, breakpoint + 1) != '\n') {
11398 parser->current.end = breakpoint + 1;
11399 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11400 break;
11401 }
11402
11403 breakpoint++;
11404 parser->current.end = breakpoint;
11405 pm_regexp_token_buffer_escape(parser, &token_buffer);
11406 token_buffer.base.cursor = breakpoint;
11407
11409 case '\n':
11410 // If we've hit a newline, then we need to track that in
11411 // the list of newlines.
11412 if (parser->heredoc_end == NULL) {
11413 pm_line_offset_list_append(&parser->line_offsets, U32(breakpoint - parser->start + 1));
11414 parser->current.end = breakpoint + 1;
11415 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11416 break;
11417 }
11418
11419 parser->current.end = breakpoint + 1;
11420 parser_flush_heredoc_end(parser);
11421 pm_regexp_token_buffer_flush(parser, &token_buffer);
11422 LEX(PM_TOKEN_STRING_CONTENT);
11423 case '\\': {
11424 // If we hit escapes, then we need to treat the next
11425 // token literally. In this case we'll skip past the
11426 // next character and find the next breakpoint.
11427 parser->current.end = breakpoint + 1;
11428
11429 // If we've hit the end of the file, then break out of
11430 // the loop by setting the breakpoint to NULL.
11431 if (parser->current.end == parser->end) {
11432 breakpoint = NULL;
11433 break;
11434 }
11435
11436 pm_regexp_token_buffer_escape(parser, &token_buffer);
11437 uint8_t peeked = peek(parser);
11438
11439 switch (peeked) {
11440 case '\r':
11441 parser->current.end++;
11442 if (peek(parser) != '\n') {
11443 if (lex_mode->as.regexp.terminator != '\r') {
11444 pm_token_buffer_push_byte(&token_buffer.base, '\\');
11445 }
11446 pm_regexp_token_buffer_push_byte(&token_buffer, '\r');
11447 pm_token_buffer_push_byte(&token_buffer.base, '\r');
11448 break;
11449 }
11451 case '\n':
11452 if (parser->heredoc_end) {
11453 // ... if we are on the same line as a heredoc,
11454 // flush the heredoc and continue parsing after
11455 // heredoc_end.
11456 parser_flush_heredoc_end(parser);
11457 pm_regexp_token_buffer_copy(parser, &token_buffer);
11458 LEX(PM_TOKEN_STRING_CONTENT);
11459 } else {
11460 // ... else track the newline.
11461 pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
11462 }
11463
11464 parser->current.end++;
11465 break;
11466 case 'c':
11467 case 'C':
11468 case 'M':
11469 case 'u':
11470 case 'x':
11471 escape_read(parser, &token_buffer.regexp_buffer, &token_buffer.base.buffer, PM_ESCAPE_FLAG_REGEXP);
11472 break;
11473 default:
11474 if (lex_mode->as.regexp.terminator == peeked) {
11475 // Some characters when they are used as the
11476 // terminator also receive an escape. They are
11477 // enumerated here.
11478 switch (peeked) {
11479 case '$': case ')': case '*': case '+':
11480 case '.': case '>': case '?': case ']':
11481 case '^': case '|': case '}':
11482 pm_token_buffer_push_byte(&token_buffer.base, '\\');
11483 break;
11484 default:
11485 break;
11486 }
11487
11488 pm_regexp_token_buffer_push_byte(&token_buffer, peeked);
11489 pm_token_buffer_push_byte(&token_buffer.base, peeked);
11490 parser->current.end++;
11491 break;
11492 }
11493
11494 if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer.base, '\\');
11495 pm_regexp_token_buffer_push_escaped(&token_buffer, parser);
11496 break;
11497 }
11498
11499 token_buffer.base.cursor = parser->current.end;
11500 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11501 break;
11502 }
11503 case '#': {
11504 // If we hit a #, then we will attempt to lex
11505 // interpolation.
11506 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11507
11508 if (!type) {
11509 // If we haven't returned at this point then we had
11510 // something that looked like an interpolated class or
11511 // instance variable like "#@" but wasn't actually. In
11512 // this case we'll just skip to the next breakpoint.
11513 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11514 break;
11515 }
11516
11517 if (type == PM_TOKEN_STRING_CONTENT) {
11518 pm_regexp_token_buffer_flush(parser, &token_buffer);
11519 }
11520
11521 LEX(type);
11522 }
11523 default:
11524 assert(false && "unreachable");
11525 break;
11526 }
11527 }
11528
11529 if (parser->current.end > parser->current.start) {
11530 pm_regexp_token_buffer_flush(parser, &token_buffer);
11531 LEX(PM_TOKEN_STRING_CONTENT);
11532 }
11533
11534 // If we were unable to find a breakpoint, then this token hits the
11535 // end of the file.
11536 parser->current.end = parser->end;
11537 pm_regexp_token_buffer_flush(parser, &token_buffer);
11538 LEX(PM_TOKEN_STRING_CONTENT);
11539 }
11540 case PM_LEX_STRING: {
11541 // First, we'll set to start of this token to be the current end.
11542 if (parser->next_start == NULL) {
11543 parser->current.start = parser->current.end;
11544 } else {
11545 parser->current.start = parser->next_start;
11546 parser->current.end = parser->next_start;
11547 parser->next_start = NULL;
11548 }
11549
11550 // We'll check if we're at the end of the file. If we are, then we need to
11551 // return the EOF token.
11552 if (parser->current.end >= parser->end) {
11553 LEX(PM_TOKEN_EOF);
11554 }
11555
11556 // These are the places where we need to split up the content of the
11557 // string. We'll use strpbrk to find the first of these characters.
11558 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11559 const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
11560 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11561
11562 // If we haven't found an escape yet, then this buffer will be
11563 // unallocated since we can refer directly to the source string.
11564 pm_token_buffer_t token_buffer = { 0 };
11565
11566 while (breakpoint != NULL) {
11567 // If we hit the incrementor, then we'll increment then nesting and
11568 // continue lexing.
11569 if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
11570 lex_mode->as.string.nesting++;
11571 parser->current.end = breakpoint + 1;
11572 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11573 continue;
11574 }
11575
11576 uint8_t term = lex_mode->as.string.terminator;
11577 bool is_terminator = (*breakpoint == term);
11578
11579 // If the terminator is newline, we need to consider \r\n _also_ a newline
11580 // For example: `%r\nfoo\r\n`
11581 // The string should be /foo/, not /foo\r/
11582 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
11583 if (term == '\n') {
11584 is_terminator = true;
11585 }
11586
11587 // If the terminator is a CR, but we see a CRLF, we need to
11588 // treat the CRLF as a newline, meaning this is _not_ the
11589 // terminator
11590 if (term == '\r') {
11591 is_terminator = false;
11592 }
11593 }
11594
11595 // Note that we have to check the terminator here first because we could
11596 // potentially be parsing a % string that has a # character as the
11597 // terminator.
11598 if (is_terminator) {
11599 // If this terminator doesn't actually close the string, then we need
11600 // to continue on past it.
11601 if (lex_mode->as.string.nesting > 0) {
11602 parser->current.end = breakpoint + 1;
11603 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11604 lex_mode->as.string.nesting--;
11605 continue;
11606 }
11607
11608 // Here we've hit the terminator. If we have already consumed content
11609 // then we need to return that content as string content first.
11610 if (breakpoint > parser->current.start) {
11611 parser->current.end = breakpoint;
11612 pm_token_buffer_flush(parser, &token_buffer);
11613 LEX(PM_TOKEN_STRING_CONTENT);
11614 }
11615
11616 // Otherwise we need to switch back to the parent lex mode and
11617 // return the end of the string.
11618 size_t eol_length = match_eol_at(parser, breakpoint);
11619 if (eol_length) {
11620 parser->current.end = breakpoint + eol_length;
11621
11622 // Track the newline if we're not in a heredoc that
11623 // would have already have added the newline to the
11624 // list.
11625 if (parser->heredoc_end == NULL) {
11626 pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
11627 }
11628 } else {
11629 parser->current.end = breakpoint + 1;
11630 }
11631
11632 if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
11633 parser->current.end++;
11634 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
11635 lex_mode_pop(parser);
11636 LEX(PM_TOKEN_LABEL_END);
11637 }
11638
11639 // When the delimiter itself is a newline, we won't
11640 // get a chance to flush heredocs in the usual places since
11641 // the newline is already consumed.
11642 if (term == '\n' && parser->heredoc_end) {
11643 parser_flush_heredoc_end(parser);
11644 }
11645
11646 lex_state_set(parser, PM_LEX_STATE_END);
11647 lex_mode_pop(parser);
11648 LEX(PM_TOKEN_STRING_END);
11649 }
11650
11651 switch (*breakpoint) {
11652 case '\0':
11653 // Skip directly past the null character.
11654 parser->current.end = breakpoint + 1;
11655 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11656 break;
11657 case '\r':
11658 if (peek_at(parser, breakpoint + 1) != '\n') {
11659 parser->current.end = breakpoint + 1;
11660 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11661 break;
11662 }
11663
11664 // If we hit a \r\n sequence, then we need to treat it
11665 // as a newline.
11666 breakpoint++;
11667 parser->current.end = breakpoint;
11668 pm_token_buffer_escape(parser, &token_buffer);
11669 token_buffer.cursor = breakpoint;
11670
11672 case '\n':
11673 // When we hit a newline, we need to flush any potential
11674 // heredocs. Note that this has to happen after we check
11675 // for the terminator in case the terminator is a
11676 // newline character.
11677 if (parser->heredoc_end == NULL) {
11678 pm_line_offset_list_append(&parser->line_offsets, U32(breakpoint - parser->start + 1));
11679 parser->current.end = breakpoint + 1;
11680 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11681 break;
11682 }
11683
11684 parser->current.end = breakpoint + 1;
11685 parser_flush_heredoc_end(parser);
11686 pm_token_buffer_flush(parser, &token_buffer);
11687 LEX(PM_TOKEN_STRING_CONTENT);
11688 case '\\': {
11689 // Here we hit escapes.
11690 parser->current.end = breakpoint + 1;
11691
11692 // If we've hit the end of the file, then break out of
11693 // the loop by setting the breakpoint to NULL.
11694 if (parser->current.end == parser->end) {
11695 breakpoint = NULL;
11696 continue;
11697 }
11698
11699 pm_token_buffer_escape(parser, &token_buffer);
11700 uint8_t peeked = peek(parser);
11701
11702 switch (peeked) {
11703 case '\\':
11704 pm_token_buffer_push_byte(&token_buffer, '\\');
11705 parser->current.end++;
11706 break;
11707 case '\r':
11708 parser->current.end++;
11709 if (peek(parser) != '\n') {
11710 if (!lex_mode->as.string.interpolation) {
11711 pm_token_buffer_push_byte(&token_buffer, '\\');
11712 }
11713 pm_token_buffer_push_byte(&token_buffer, '\r');
11714 break;
11715 }
11717 case '\n':
11718 if (!lex_mode->as.string.interpolation) {
11719 pm_token_buffer_push_byte(&token_buffer, '\\');
11720 pm_token_buffer_push_byte(&token_buffer, '\n');
11721 }
11722
11723 if (parser->heredoc_end) {
11724 // ... if we are on the same line as a heredoc,
11725 // flush the heredoc and continue parsing after
11726 // heredoc_end.
11727 parser_flush_heredoc_end(parser);
11728 pm_token_buffer_copy(parser, &token_buffer);
11729 LEX(PM_TOKEN_STRING_CONTENT);
11730 } else {
11731 // ... else track the newline.
11732 pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
11733 }
11734
11735 parser->current.end++;
11736 break;
11737 default:
11738 if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
11739 pm_token_buffer_push_byte(&token_buffer, peeked);
11740 parser->current.end++;
11741 } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
11742 pm_token_buffer_push_byte(&token_buffer, peeked);
11743 parser->current.end++;
11744 } else if (lex_mode->as.string.interpolation) {
11745 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
11746 } else {
11747 pm_token_buffer_push_byte(&token_buffer, '\\');
11748 pm_token_buffer_push_escaped(&token_buffer, parser);
11749 }
11750
11751 break;
11752 }
11753
11754 token_buffer.cursor = parser->current.end;
11755 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11756 break;
11757 }
11758 case '#': {
11759 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11760
11761 if (!type) {
11762 // If we haven't returned at this point then we had something that
11763 // looked like an interpolated class or instance variable like "#@"
11764 // but wasn't actually. In this case we'll just skip to the next
11765 // breakpoint.
11766 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11767 break;
11768 }
11769
11770 if (type == PM_TOKEN_STRING_CONTENT) {
11771 pm_token_buffer_flush(parser, &token_buffer);
11772 }
11773
11774 LEX(type);
11775 }
11776 default:
11777 assert(false && "unreachable");
11778 }
11779 }
11780
11781 if (parser->current.end > parser->current.start) {
11782 pm_token_buffer_flush(parser, &token_buffer);
11783 LEX(PM_TOKEN_STRING_CONTENT);
11784 }
11785
11786 // If we've hit the end of the string, then this is an unterminated
11787 // string. In that case we'll return a string content token.
11788 parser->current.end = parser->end;
11789 pm_token_buffer_flush(parser, &token_buffer);
11790 LEX(PM_TOKEN_STRING_CONTENT);
11791 }
11792 case PM_LEX_HEREDOC: {
11793 // First, we'll set to start of this token.
11794 if (parser->next_start == NULL) {
11795 parser->current.start = parser->current.end;
11796 } else {
11797 parser->current.start = parser->next_start;
11798 parser->current.end = parser->next_start;
11799 parser->heredoc_end = NULL;
11800 parser->next_start = NULL;
11801 }
11802
11803 // Now let's grab the information about the identifier off of the
11804 // current lex mode.
11805 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11806 pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
11807
11808 bool line_continuation = lex_mode->as.heredoc.line_continuation;
11809 lex_mode->as.heredoc.line_continuation = false;
11810
11811 // We'll check if we're at the end of the file. If we are, then we
11812 // will add an error (because we weren't able to find the
11813 // terminator) but still continue parsing so that content after the
11814 // declaration of the heredoc can be parsed.
11815 if (parser->current.end >= parser->end) {
11816 pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
11817 parser->next_start = lex_mode->as.heredoc.next_start;
11818 parser->heredoc_end = parser->current.end;
11819 lex_state_set(parser, PM_LEX_STATE_END);
11820 lex_mode_pop(parser);
11821 LEX(PM_TOKEN_HEREDOC_END);
11822 }
11823
11824 const uint8_t *ident_start = heredoc_lex_mode->ident_start;
11825 size_t ident_length = heredoc_lex_mode->ident_length;
11826
11827 // If we are immediately following a newline and we have hit the
11828 // terminator, then we need to return the ending of the heredoc.
11829 if (current_token_starts_line(parser)) {
11830 const uint8_t *start = parser->current.start;
11831
11832 if (!line_continuation && (start + ident_length <= parser->end)) {
11833 const uint8_t *newline = next_newline(start, parser->end - start);
11834 const uint8_t *ident_end = newline;
11835 const uint8_t *terminator_end = newline;
11836
11837 if (newline == NULL) {
11838 terminator_end = parser->end;
11839 ident_end = parser->end;
11840 } else {
11841 terminator_end++;
11842 if (newline[-1] == '\r') {
11843 ident_end--; // Remove \r
11844 }
11845 }
11846
11847 const uint8_t *terminator_start = ident_end - ident_length;
11848 const uint8_t *cursor = start;
11849
11850 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
11851 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
11852 cursor++;
11853 }
11854 }
11855
11856 if (
11857 (cursor == terminator_start) &&
11858 (memcmp(terminator_start, ident_start, ident_length) == 0)
11859 ) {
11860 if (newline != NULL) {
11861 pm_line_offset_list_append(&parser->line_offsets, U32(newline - parser->start + 1));
11862 }
11863
11864 parser->current.end = terminator_end;
11865 if (*lex_mode->as.heredoc.next_start == '\\') {
11866 parser->next_start = NULL;
11867 } else {
11868 parser->next_start = lex_mode->as.heredoc.next_start;
11869 parser->heredoc_end = parser->current.end;
11870 }
11871
11872 lex_state_set(parser, PM_LEX_STATE_END);
11873 lex_mode_pop(parser);
11874 LEX(PM_TOKEN_HEREDOC_END);
11875 }
11876 }
11877
11878 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
11879 if (
11880 heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
11881 lex_mode->as.heredoc.common_whitespace != NULL &&
11882 (*lex_mode->as.heredoc.common_whitespace > whitespace) &&
11883 peek_at(parser, start) != '\n'
11884 ) {
11885 *lex_mode->as.heredoc.common_whitespace = whitespace;
11886 }
11887 }
11888
11889 // Otherwise we'll be parsing string content. These are the places
11890 // where we need to split up the content of the heredoc. We'll use
11891 // strpbrk to find the first of these characters.
11892 uint8_t breakpoints[] = "\r\n\\#";
11893
11894 pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
11895 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
11896 breakpoints[3] = '\0';
11897 }
11898
11899 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11900 pm_token_buffer_t token_buffer = { 0 };
11901 bool was_line_continuation = false;
11902
11903 while (breakpoint != NULL) {
11904 switch (*breakpoint) {
11905 case '\0':
11906 // Skip directly past the null character.
11907 parser->current.end = breakpoint + 1;
11908 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11909 break;
11910 case '\r':
11911 parser->current.end = breakpoint + 1;
11912
11913 if (peek_at(parser, breakpoint + 1) != '\n') {
11914 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11915 break;
11916 }
11917
11918 // If we hit a \r\n sequence, then we want to replace it
11919 // with a single \n character in the final string.
11920 breakpoint++;
11921 pm_token_buffer_escape(parser, &token_buffer);
11922 token_buffer.cursor = breakpoint;
11923
11925 case '\n': {
11926 if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
11927 parser_flush_heredoc_end(parser);
11928 parser->current.end = breakpoint + 1;
11929 pm_token_buffer_flush(parser, &token_buffer);
11930 LEX(PM_TOKEN_STRING_CONTENT);
11931 }
11932
11933 pm_line_offset_list_append(&parser->line_offsets, U32(breakpoint - parser->start + 1));
11934
11935 // If we have a - or ~ heredoc, then we can match after
11936 // some leading whitespace.
11937 const uint8_t *start = breakpoint + 1;
11938
11939 if (!was_line_continuation && (start + ident_length <= parser->end)) {
11940 // We want to match the terminator starting from the end of the line in case
11941 // there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
11942 const uint8_t *newline = next_newline(start, parser->end - start);
11943
11944 if (newline == NULL) {
11945 newline = parser->end;
11946 } else if (newline[-1] == '\r') {
11947 newline--; // Remove \r
11948 }
11949
11950 // Start of a possible terminator.
11951 const uint8_t *terminator_start = newline - ident_length;
11952
11953 // Cursor to check for the leading whitespace. We skip the
11954 // leading whitespace if we have a - or ~ heredoc.
11955 const uint8_t *cursor = start;
11956
11957 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
11958 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
11959 cursor++;
11960 }
11961 }
11962
11963 if (
11964 cursor == terminator_start &&
11965 (memcmp(terminator_start, ident_start, ident_length) == 0)
11966 ) {
11967 parser->current.end = breakpoint + 1;
11968 pm_token_buffer_flush(parser, &token_buffer);
11969 LEX(PM_TOKEN_STRING_CONTENT);
11970 }
11971 }
11972
11973 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
11974
11975 // If we have hit a newline that is followed by a valid
11976 // terminator, then we need to return the content of the
11977 // heredoc here as string content. Then, the next time a
11978 // token is lexed, it will match again and return the
11979 // end of the heredoc.
11980 if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
11981 if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
11982 *lex_mode->as.heredoc.common_whitespace = whitespace;
11983 }
11984
11985 parser->current.end = breakpoint + 1;
11986 pm_token_buffer_flush(parser, &token_buffer);
11987 LEX(PM_TOKEN_STRING_CONTENT);
11988 }
11989
11990 // Otherwise we hit a newline and it wasn't followed by
11991 // a terminator, so we can continue parsing.
11992 parser->current.end = breakpoint + 1;
11993 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11994 break;
11995 }
11996 case '\\': {
11997 // If we hit an escape, then we need to skip past
11998 // however many characters the escape takes up. However
11999 // it's important that if \n or \r\n are escaped, we
12000 // stop looping before the newline and not after the
12001 // newline so that we can still potentially find the
12002 // terminator of the heredoc.
12003 parser->current.end = breakpoint + 1;
12004
12005 // If we've hit the end of the file, then break out of
12006 // the loop by setting the breakpoint to NULL.
12007 if (parser->current.end == parser->end) {
12008 breakpoint = NULL;
12009 continue;
12010 }
12011
12012 pm_token_buffer_escape(parser, &token_buffer);
12013 uint8_t peeked = peek(parser);
12014
12015 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12016 switch (peeked) {
12017 case '\r':
12018 parser->current.end++;
12019 if (peek(parser) != '\n') {
12020 pm_token_buffer_push_byte(&token_buffer, '\\');
12021 pm_token_buffer_push_byte(&token_buffer, '\r');
12022 break;
12023 }
12025 case '\n':
12026 pm_token_buffer_push_byte(&token_buffer, '\\');
12027 pm_token_buffer_push_byte(&token_buffer, '\n');
12028 token_buffer.cursor = parser->current.end + 1;
12029 breakpoint = parser->current.end;
12030 continue;
12031 default:
12032 pm_token_buffer_push_byte(&token_buffer, '\\');
12033 pm_token_buffer_push_escaped(&token_buffer, parser);
12034 break;
12035 }
12036 } else {
12037 switch (peeked) {
12038 case '\r':
12039 parser->current.end++;
12040 if (peek(parser) != '\n') {
12041 pm_token_buffer_push_byte(&token_buffer, '\r');
12042 break;
12043 }
12045 case '\n':
12046 // If we are in a tilde here, we should
12047 // break out of the loop and return the
12048 // string content.
12049 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12050 const uint8_t *end = parser->current.end;
12051
12052 if (parser->heredoc_end == NULL) {
12053 pm_line_offset_list_append(&parser->line_offsets, U32(end - parser->start + 1));
12054 }
12055
12056 // Here we want the buffer to only
12057 // include up to the backslash.
12058 parser->current.end = breakpoint;
12059 pm_token_buffer_flush(parser, &token_buffer);
12060
12061 // Now we can advance the end of the
12062 // token past the newline.
12063 parser->current.end = end + 1;
12064 lex_mode->as.heredoc.line_continuation = true;
12065 LEX(PM_TOKEN_STRING_CONTENT);
12066 }
12067
12068 was_line_continuation = true;
12069 token_buffer.cursor = parser->current.end + 1;
12070 breakpoint = parser->current.end;
12071 continue;
12072 default:
12073 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12074 break;
12075 }
12076 }
12077
12078 token_buffer.cursor = parser->current.end;
12079 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12080 break;
12081 }
12082 case '#': {
12083 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12084
12085 if (!type) {
12086 // If we haven't returned at this point then we had
12087 // something that looked like an interpolated class
12088 // or instance variable like "#@" but wasn't
12089 // actually. In this case we'll just skip to the
12090 // next breakpoint.
12091 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12092 break;
12093 }
12094
12095 if (type == PM_TOKEN_STRING_CONTENT) {
12096 pm_token_buffer_flush(parser, &token_buffer);
12097 }
12098
12099 LEX(type);
12100 }
12101 default:
12102 assert(false && "unreachable");
12103 }
12104
12105 was_line_continuation = false;
12106 }
12107
12108 if (parser->current.end > parser->current.start) {
12109 parser->current.end = parser->end;
12110 pm_token_buffer_flush(parser, &token_buffer);
12111 LEX(PM_TOKEN_STRING_CONTENT);
12112 }
12113
12114 // If we've hit the end of the string, then this is an unterminated
12115 // heredoc. In that case we'll return a string content token.
12116 parser->current.end = parser->end;
12117 pm_token_buffer_flush(parser, &token_buffer);
12118 LEX(PM_TOKEN_STRING_CONTENT);
12119 }
12120 }
12121
12122 assert(false && "unreachable");
12123}
12124
12125#undef LEX
12126
12127/******************************************************************************/
12128/* Parse functions */
12129/******************************************************************************/
12130
12139typedef enum {
12140 PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
12141 PM_BINDING_POWER_STATEMENT = 2,
12142 PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
12143 PM_BINDING_POWER_MODIFIER = 6, // if unless until while
12144 PM_BINDING_POWER_COMPOSITION = 8, // and or
12145 PM_BINDING_POWER_NOT = 10, // not
12146 PM_BINDING_POWER_MATCH = 12, // => in
12147 PM_BINDING_POWER_DEFINED = 14, // defined?
12148 PM_BINDING_POWER_MULTI_ASSIGNMENT = 16, // =
12149 PM_BINDING_POWER_ASSIGNMENT = 18, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
12150 PM_BINDING_POWER_TERNARY = 20, // ?:
12151 PM_BINDING_POWER_RANGE = 22, // .. ...
12152 PM_BINDING_POWER_LOGICAL_OR = 24, // ||
12153 PM_BINDING_POWER_LOGICAL_AND = 26, // &&
12154 PM_BINDING_POWER_EQUALITY = 28, // <=> == === != =~ !~
12155 PM_BINDING_POWER_COMPARISON = 30, // > >= < <=
12156 PM_BINDING_POWER_BITWISE_OR = 32, // | ^
12157 PM_BINDING_POWER_BITWISE_AND = 34, // &
12158 PM_BINDING_POWER_SHIFT = 36, // << >>
12159 PM_BINDING_POWER_TERM = 38, // + -
12160 PM_BINDING_POWER_FACTOR = 40, // * / %
12161 PM_BINDING_POWER_UMINUS = 42, // -@
12162 PM_BINDING_POWER_EXPONENT = 44, // **
12163 PM_BINDING_POWER_UNARY = 46, // ! ~ +@
12164 PM_BINDING_POWER_INDEX = 48, // [] []=
12165 PM_BINDING_POWER_CALL = 50, // :: .
12166 PM_BINDING_POWER_MAX = 52
12167} pm_binding_power_t;
12168
12173typedef struct {
12175 pm_binding_power_t left;
12176
12178 pm_binding_power_t right;
12179
12182
12189
12190#define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
12191#define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
12192#define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
12193#define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
12194#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
12195
12196pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
12197 // rescue
12198 [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = { PM_BINDING_POWER_MODIFIER_RESCUE, PM_BINDING_POWER_COMPOSITION, true, false },
12199
12200 // if unless until while
12201 [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12202 [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12203 [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12204 [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12205
12206 // and or
12207 [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12208 [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12209
12210 // => in
12211 [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12212 [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12213
12214 // &&= &= ^= = >>= <<= -= %= |= ||= += /= *= **=
12215 [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12216 [PM_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12217 [PM_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
12218 [PM_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT,
12219 [PM_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT,
12220 [PM_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12221 [PM_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12222 [PM_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT,
12223 [PM_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12224 [PM_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12225 [PM_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12226 [PM_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT,
12227 [PM_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12228 [PM_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12229
12230 // ?:
12231 [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
12232
12233 // .. ...
12234 [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12235 [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12236 [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12237 [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12238
12239 // ||
12240 [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
12241
12242 // &&
12243 [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
12244
12245 // != !~ == === =~ <=>
12246 [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12247 [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12248 [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12249 [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12250 [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12251 [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12252
12253 // > >= < <=
12254 [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12255 [PM_TOKEN_GREATER_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12256 [PM_TOKEN_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12257 [PM_TOKEN_LESS_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12258
12259 // ^ |
12260 [PM_TOKEN_CARET] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12261 [PM_TOKEN_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12262
12263 // &
12264 [PM_TOKEN_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_AND),
12265
12266 // >> <<
12267 [PM_TOKEN_GREATER_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12268 [PM_TOKEN_LESS_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12269
12270 // - +
12271 [PM_TOKEN_MINUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12272 [PM_TOKEN_PLUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12273
12274 // % / *
12275 [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12276 [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12277 [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12278 [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
12279
12280 // -@
12281 [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
12282 [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false, false },
12283
12284 // **
12285 [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
12286 [PM_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12287
12288 // ! ~ +@
12289 [PM_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12290 [PM_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12291 [PM_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12292
12293 // [
12294 [PM_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_INDEX),
12295
12296 // :: . &.
12297 [PM_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
12298 [PM_TOKEN_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
12299 [PM_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL)
12300};
12301
12302#undef BINDING_POWER_ASSIGNMENT
12303#undef LEFT_ASSOCIATIVE
12304#undef RIGHT_ASSOCIATIVE
12305#undef RIGHT_ASSOCIATIVE_UNARY
12306
12310static inline bool
12311match1(const pm_parser_t *parser, pm_token_type_t type) {
12312 return parser->current.type == type;
12313}
12314
12318static inline bool
12319match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
12320 return match1(parser, type1) || match1(parser, type2);
12321}
12322
12326static inline bool
12327match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
12328 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
12329}
12330
12334static inline bool
12335match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
12336 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
12337}
12338
12342static inline bool
12343match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
12344 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
12345}
12346
12350static inline bool
12351match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
12352 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
12353}
12354
12361static bool
12362accept1(pm_parser_t *parser, pm_token_type_t type) {
12363 if (match1(parser, type)) {
12364 parser_lex(parser);
12365 return true;
12366 }
12367 return false;
12368}
12369
12374static inline bool
12375accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
12376 if (match2(parser, type1, type2)) {
12377 parser_lex(parser);
12378 return true;
12379 }
12380 return false;
12381}
12382
12394static void
12395expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
12396 if (accept1(parser, type)) return;
12397
12398 const uint8_t *location = parser->previous.end;
12399 pm_parser_err(parser, U32(location - parser->start), 0, diag_id);
12400
12401 parser->previous.start = location;
12402 parser->previous.type = 0;
12403}
12404
12409static void
12410expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_diagnostic_id_t diag_id) {
12411 if (accept2(parser, type1, type2)) return;
12412
12413 const uint8_t *location = parser->previous.end;
12414 pm_parser_err(parser, U32(location - parser->start), 0, diag_id);
12415
12416 parser->previous.start = location;
12417 parser->previous.type = 0;
12418}
12419
12424static void
12425expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
12426 if (match1(parser, PM_TOKEN_HEREDOC_END)) {
12427 parser_lex(parser);
12428 } else {
12429 pm_parser_err_heredoc_term(parser, ident_start, ident_length);
12430 parser->previous.start = parser->previous.end;
12431 parser->previous.type = 0;
12432 }
12433}
12434
12441static void
12442expect1_opening(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id, const pm_token_t *opening) {
12443 if (accept1(parser, type)) return;
12444
12445 const uint8_t *start = opening->start;
12446 pm_parser_err(parser, U32(start - parser->start), U32(opening->end - start), diag_id);
12447
12448 parser->previous.start = parser->previous.end;
12449 parser->previous.type = 0;
12450}
12451
12452static pm_node_t *
12453parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth);
12454
12459static pm_node_t *
12460parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
12461 pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
12462 pm_assert_value_expression(parser, node);
12463 return node;
12464}
12465
12484static inline bool
12485token_begins_expression_p(pm_token_type_t type) {
12486 switch (type) {
12487 case PM_TOKEN_EQUAL_GREATER:
12488 case PM_TOKEN_KEYWORD_IN:
12489 // We need to special case this because it is a binary operator that
12490 // should not be marked as beginning an expression.
12491 return false;
12492 case PM_TOKEN_BRACE_RIGHT:
12493 case PM_TOKEN_BRACKET_RIGHT:
12494 case PM_TOKEN_COLON:
12495 case PM_TOKEN_COMMA:
12496 case PM_TOKEN_EMBEXPR_END:
12497 case PM_TOKEN_EOF:
12498 case PM_TOKEN_LAMBDA_BEGIN:
12499 case PM_TOKEN_KEYWORD_DO:
12500 case PM_TOKEN_KEYWORD_DO_LOOP:
12501 case PM_TOKEN_KEYWORD_END:
12502 case PM_TOKEN_KEYWORD_ELSE:
12503 case PM_TOKEN_KEYWORD_ELSIF:
12504 case PM_TOKEN_KEYWORD_ENSURE:
12505 case PM_TOKEN_KEYWORD_THEN:
12506 case PM_TOKEN_KEYWORD_RESCUE:
12507 case PM_TOKEN_KEYWORD_WHEN:
12508 case PM_TOKEN_NEWLINE:
12509 case PM_TOKEN_PARENTHESIS_RIGHT:
12510 case PM_TOKEN_SEMICOLON:
12511 // The reason we need this short-circuit is because we're using the
12512 // binding powers table to tell us if the subsequent token could
12513 // potentially be the start of an expression. If there _is_ a binding
12514 // power for one of these tokens, then we should remove it from this list
12515 // and let it be handled by the default case below.
12516 assert(pm_binding_powers[type].left == PM_BINDING_POWER_UNSET);
12517 return false;
12518 case PM_TOKEN_UAMPERSAND:
12519 // This is a special case because this unary operator cannot appear
12520 // as a general operator, it only appears in certain circumstances.
12521 return false;
12522 case PM_TOKEN_UCOLON_COLON:
12523 case PM_TOKEN_UMINUS:
12524 case PM_TOKEN_UMINUS_NUM:
12525 case PM_TOKEN_UPLUS:
12526 case PM_TOKEN_BANG:
12527 case PM_TOKEN_TILDE:
12528 case PM_TOKEN_UDOT_DOT:
12529 case PM_TOKEN_UDOT_DOT_DOT:
12530 // These unary tokens actually do have binding power associated with them
12531 // so that we can correctly place them into the precedence order. But we
12532 // want them to be marked as beginning an expression, so we need to
12533 // special case them here.
12534 return true;
12535 default:
12536 return pm_binding_powers[type].left == PM_BINDING_POWER_UNSET;
12537 }
12538}
12539
12544static pm_node_t *
12545parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
12546 if (accept1(parser, PM_TOKEN_USTAR)) {
12547 pm_token_t operator = parser->previous;
12548 pm_node_t *expression = parse_value_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
12549 return UP(pm_splat_node_create(parser, &operator, expression));
12550 }
12551
12552 return parse_value_expression(parser, binding_power, accepts_command_call, false, diag_id, depth);
12553}
12554
12555static bool
12556pm_node_unreference_each(const pm_node_t *node, void *data) {
12557 switch (PM_NODE_TYPE(node)) {
12558 /* When we are about to destroy a set of nodes that could potentially
12559 * contain block exits for the current scope, we need to check if they
12560 * are contained in the list of block exits and remove them if they are.
12561 */
12562 case PM_BREAK_NODE:
12563 case PM_NEXT_NODE:
12564 case PM_REDO_NODE: {
12565 pm_parser_t *parser = (pm_parser_t *) data;
12566 size_t index = 0;
12567
12568 while (index < parser->current_block_exits->size) {
12569 pm_node_t *block_exit = parser->current_block_exits->nodes[index];
12570
12571 if (block_exit == node) {
12572 if (index + 1 < parser->current_block_exits->size) {
12573 memmove(
12574 &parser->current_block_exits->nodes[index],
12575 &parser->current_block_exits->nodes[index + 1],
12576 (parser->current_block_exits->size - index - 1) * sizeof(pm_node_t *)
12577 );
12578 }
12579 parser->current_block_exits->size--;
12580
12581 /* Note returning true here because these nodes could have
12582 * arguments that are themselves block exits. */
12583 return true;
12584 }
12585
12586 index++;
12587 }
12588
12589 return true;
12590 }
12591 /* When an implicit local variable is written to or targeted, it becomes
12592 * a regular, named local variable. This branch removes it from the list
12593 * of implicit parameters when that happens. */
12594 case PM_LOCAL_VARIABLE_READ_NODE:
12595 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12596 pm_parser_t *parser = (pm_parser_t *) data;
12597 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
12598
12599 for (size_t index = 0; index < implicit_parameters->size; index++) {
12600 if (implicit_parameters->nodes[index] == node) {
12601 /* If the node is not the last one in the list, we need to
12602 * shift the remaining nodes down to fill the gap. This is
12603 * extremely unlikely to happen. */
12604 if (index != implicit_parameters->size - 1) {
12605 memmove(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
12606 }
12607
12608 implicit_parameters->size--;
12609 break;
12610 }
12611 }
12612
12613 return false;
12614 }
12615 default:
12616 return true;
12617 }
12618}
12619
12625static void
12626pm_node_unreference(pm_parser_t *parser, const pm_node_t *node) {
12627 pm_visit_node(node, pm_node_unreference_each, parser);
12628}
12629
12634static void
12635parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
12636 // The method name needs to change. If we previously had
12637 // foo, we now need foo=. In this case we'll allocate a new
12638 // owned string, copy the previous method name in, and
12639 // append an =.
12640 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
12641 size_t length = constant->length;
12642 uint8_t *name = xcalloc(length + 1, sizeof(uint8_t));
12643 if (name == NULL) return;
12644
12645 memcpy(name, constant->start, length);
12646 name[length] = '=';
12647
12648 // Now switch the name to the new string.
12649 // This silences clang analyzer warning about leak of memory pointed by `name`.
12650 // NOLINTNEXTLINE(clang-analyzer-*)
12651 *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
12652}
12653
12660static pm_node_t *
12661parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
12662 switch (PM_NODE_TYPE(target)) {
12663 case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
12664 case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
12665 case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
12666 case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
12667 case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
12668 case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
12669 case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
12670 default: break;
12671 }
12672
12673 pm_constant_id_t name = pm_parser_constant_id_raw(parser, parser->start + PM_NODE_START(target), parser->start + PM_NODE_END(target));
12674 pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
12675
12676 return UP(result);
12677}
12678
12687static pm_node_t *
12688parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
12689 switch (PM_NODE_TYPE(target)) {
12690 case PM_MISSING_NODE:
12691 return target;
12692 case PM_SOURCE_ENCODING_NODE:
12693 case PM_FALSE_NODE:
12694 case PM_SOURCE_FILE_NODE:
12695 case PM_SOURCE_LINE_NODE:
12696 case PM_NIL_NODE:
12697 case PM_SELF_NODE:
12698 case PM_TRUE_NODE: {
12699 // In these special cases, we have specific error messages and we
12700 // will replace them with local variable writes.
12701 return parse_unwriteable_target(parser, target);
12702 }
12703 case PM_CLASS_VARIABLE_READ_NODE:
12705 target->type = PM_CLASS_VARIABLE_TARGET_NODE;
12706 return target;
12707 case PM_CONSTANT_PATH_NODE:
12708 if (context_def_p(parser)) {
12709 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
12710 }
12711
12713 target->type = PM_CONSTANT_PATH_TARGET_NODE;
12714
12715 return target;
12716 case PM_CONSTANT_READ_NODE:
12717 if (context_def_p(parser)) {
12718 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
12719 }
12720
12721 assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
12722 target->type = PM_CONSTANT_TARGET_NODE;
12723
12724 return target;
12725 case PM_BACK_REFERENCE_READ_NODE:
12726 case PM_NUMBERED_REFERENCE_READ_NODE:
12727 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
12728 return target;
12729 case PM_GLOBAL_VARIABLE_READ_NODE:
12731 target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
12732 return target;
12733 case PM_LOCAL_VARIABLE_READ_NODE: {
12734 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(target), PM_NODE_LENGTH(target))) {
12735 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(target), PM_NODE_LENGTH(target), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(target));
12736 pm_node_unreference(parser, target);
12737 }
12738
12739 const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
12740 uint32_t name = cast->name;
12741 uint32_t depth = cast->depth;
12742 pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
12743
12745 target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
12746
12747 return target;
12748 }
12749 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12750 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
12751 pm_node_t *node = UP(pm_local_variable_target_node_create(parser, &target->location, name, 0));
12752
12753 pm_node_unreference(parser, target);
12754
12755 return node;
12756 }
12757 case PM_INSTANCE_VARIABLE_READ_NODE:
12759 target->type = PM_INSTANCE_VARIABLE_TARGET_NODE;
12760 return target;
12761 case PM_MULTI_TARGET_NODE:
12762 if (splat_parent) {
12763 // Multi target is not accepted in all positions. If this is one
12764 // of them, then we need to add an error.
12765 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
12766 }
12767
12768 return target;
12769 case PM_SPLAT_NODE: {
12770 pm_splat_node_t *splat = (pm_splat_node_t *) target;
12771
12772 if (splat->expression != NULL) {
12773 splat->expression = parse_target(parser, splat->expression, multiple, true);
12774 }
12775
12776 return UP(splat);
12777 }
12778 case PM_CALL_NODE: {
12779 pm_call_node_t *call = (pm_call_node_t *) target;
12780
12781 // If we have no arguments to the call node and we need this to be a
12782 // target then this is either a method call or a local variable
12783 // write.
12784 if (
12785 (call->message_loc.length > 0) &&
12786 (parser->start[call->message_loc.start + call->message_loc.length - 1] != '!') &&
12787 (parser->start[call->message_loc.start + call->message_loc.length - 1] != '?') &&
12788 (call->opening_loc.length == 0) &&
12789 (call->arguments == NULL) &&
12790 (call->block == NULL)
12791 ) {
12792 if (call->receiver == NULL) {
12793 // When we get here, we have a local variable write, because it
12794 // was previously marked as a method call but now we have an =.
12795 // This looks like:
12796 //
12797 // foo = 1
12798 //
12799 // When it was parsed in the prefix position, foo was seen as a
12800 // method call with no receiver and no arguments. Now we have an
12801 // =, so we know it's a local variable write.
12802 pm_location_t message_loc = call->message_loc;
12803 pm_constant_id_t name = pm_parser_local_add_location(parser, &message_loc, 0);
12804
12805 return UP(pm_local_variable_target_node_create(parser, &message_loc, name, 0));
12806 }
12807
12808 if (peek_at(parser, parser->start + call->message_loc.start) == '_' || parser->encoding->alnum_char(parser->start + call->message_loc.start, (ptrdiff_t) call->message_loc.length)) {
12809 if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
12810 pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
12811 }
12812
12813 parse_write_name(parser, &call->name);
12814 return UP(pm_call_target_node_create(parser, call));
12815 }
12816 }
12817
12818 // If there is no call operator and the message is "[]" then this is
12819 // an aref expression, and we can transform it into an aset
12820 // expression.
12821 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
12822 return UP(pm_index_target_node_create(parser, call));
12823 }
12824 }
12826 default:
12827 // In this case we have a node that we don't know how to convert
12828 // into a target. We need to treat it as an error. For now, we'll
12829 // mark it as an error and just skip right past it.
12830 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
12831 return target;
12832 }
12833}
12834
12839static pm_node_t *
12840parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
12841 pm_node_t *result = parse_target(parser, target, multiple, false);
12842
12843 // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
12844 // parens after the targets.
12845 if (
12846 !match1(parser, PM_TOKEN_EQUAL) &&
12847 !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
12848 !(context_p(parser, PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
12849 ) {
12850 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
12851 }
12852
12853 return result;
12854}
12855
12860static pm_node_t *
12861parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
12862 pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
12863
12864 if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
12865 return UP(pm_shareable_constant_node_create(parser, write, shareable_constant));
12866 }
12867
12868 return write;
12869}
12870
12874static pm_node_t *
12875parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
12876 switch (PM_NODE_TYPE(target)) {
12877 case PM_MISSING_NODE:
12878 return target;
12879 case PM_CLASS_VARIABLE_READ_NODE: {
12880 pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
12881 return UP(node);
12882 }
12883 case PM_CONSTANT_PATH_NODE: {
12884 pm_node_t *node = UP(pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value));
12885
12886 if (context_def_p(parser)) {
12887 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
12888 }
12889
12890 return parse_shareable_constant_write(parser, node);
12891 }
12892 case PM_CONSTANT_READ_NODE: {
12893 pm_node_t *node = UP(pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value));
12894
12895 if (context_def_p(parser)) {
12896 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
12897 }
12898
12899 return parse_shareable_constant_write(parser, node);
12900 }
12901 case PM_BACK_REFERENCE_READ_NODE:
12902 case PM_NUMBERED_REFERENCE_READ_NODE:
12903 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
12905 case PM_GLOBAL_VARIABLE_READ_NODE: {
12906 pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
12907 return UP(node);
12908 }
12909 case PM_LOCAL_VARIABLE_READ_NODE: {
12911
12912 pm_location_t location = target->location;
12913 pm_constant_id_t name = local_read->name;
12914 uint32_t depth = local_read->depth;
12915 pm_scope_t *scope = pm_parser_scope_find(parser, depth);
12916
12917 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(target), PM_NODE_LENGTH(target))) {
12918 pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
12919 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(target), PM_NODE_LENGTH(target), diag_id, parser->start + PM_NODE_START(target));
12920 pm_node_unreference(parser, target);
12921 }
12922
12923 pm_locals_unread(&scope->locals, name);
12924
12925 return UP(pm_local_variable_write_node_create(parser, name, depth, value, &location, operator));
12926 }
12927 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12928 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
12929 pm_node_t *node = UP(pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator));
12930
12931 pm_node_unreference(parser, target);
12932
12933 return node;
12934 }
12935 case PM_INSTANCE_VARIABLE_READ_NODE: {
12936 pm_node_t *write_node = UP(pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value));
12937 return write_node;
12938 }
12939 case PM_MULTI_TARGET_NODE:
12940 return UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value));
12941 case PM_SPLAT_NODE: {
12942 pm_splat_node_t *splat = (pm_splat_node_t *) target;
12943
12944 if (splat->expression != NULL) {
12945 splat->expression = parse_write(parser, splat->expression, operator, value);
12946 }
12947
12948 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
12949 pm_multi_target_node_targets_append(parser, multi_target, UP(splat));
12950
12951 return UP(pm_multi_write_node_create(parser, multi_target, operator, value));
12952 }
12953 case PM_CALL_NODE: {
12954 pm_call_node_t *call = (pm_call_node_t *) target;
12955
12956 // If we have no arguments to the call node and we need this to be a
12957 // target then this is either a method call or a local variable
12958 // write.
12959 if (
12960 (call->message_loc.length > 0) &&
12961 (parser->start[call->message_loc.start + call->message_loc.length - 1] != '!') &&
12962 (parser->start[call->message_loc.start + call->message_loc.length - 1] != '?') &&
12963 (call->opening_loc.length == 0) &&
12964 (call->arguments == NULL) &&
12965 (call->block == NULL)
12966 ) {
12967 if (call->receiver == NULL) {
12968 // When we get here, we have a local variable write, because it
12969 // was previously marked as a method call but now we have an =.
12970 // This looks like:
12971 //
12972 // foo = 1
12973 //
12974 // When it was parsed in the prefix position, foo was seen as a
12975 // method call with no receiver and no arguments. Now we have an
12976 // =, so we know it's a local variable write.
12977 pm_location_t message_loc = call->message_loc;
12978
12979 pm_refute_numbered_parameter(parser, message_loc.start, message_loc.length);
12980 pm_parser_local_add_location(parser, &message_loc, 0);
12981
12982 pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, parser->start + PM_LOCATION_START(&message_loc), parser->start + PM_LOCATION_END(&message_loc));
12983 target = UP(pm_local_variable_write_node_create(parser, constant_id, 0, value, &message_loc, operator));
12984
12985 return target;
12986 }
12987
12988 if (char_is_identifier_start(parser, parser->start + call->message_loc.start, (ptrdiff_t) call->message_loc.length)) {
12989 // When we get here, we have a method call, because it was
12990 // previously marked as a method call but now we have an =. This
12991 // looks like:
12992 //
12993 // foo.bar = 1
12994 //
12995 // When it was parsed in the prefix position, foo.bar was seen as a
12996 // method call with no arguments. Now we have an =, so we know it's
12997 // a method call with an argument. In this case we will create the
12998 // arguments node, parse the argument, and add it to the list.
12999 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
13000 call->arguments = arguments;
13001
13002 pm_arguments_node_arguments_append(parser->arena, arguments, value);
13003 PM_NODE_LENGTH_SET_NODE(call, arguments);
13004 call->equal_loc = TOK2LOC(parser, operator);
13005
13006 parse_write_name(parser, &call->name);
13007 pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13008
13009 return UP(call);
13010 }
13011 }
13012
13013 // If there is no call operator and the message is "[]" then this is
13014 // an aref expression, and we can transform it into an aset
13015 // expression.
13016 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13017 if (call->arguments == NULL) {
13018 call->arguments = pm_arguments_node_create(parser);
13019 }
13020
13021 pm_arguments_node_arguments_append(parser->arena, call->arguments, value);
13022 PM_NODE_LENGTH_SET_NODE(target, value);
13023
13024 // Replace the name with "[]=".
13025 call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
13026 call->equal_loc = TOK2LOC(parser, operator);
13027
13028 // Ensure that the arguments for []= don't contain keywords
13029 pm_index_arguments_check(parser, call->arguments, call->block);
13030 pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13031
13032 return target;
13033 }
13034
13035 // If there are arguments on the call node, then it can't be a
13036 // method call ending with = or a local variable write, so it must
13037 // be a syntax error. In this case we'll fall through to our default
13038 // handling. We need to free the value that we parsed because there
13039 // is no way for us to attach it to the tree at this point.
13040 //
13041 // Since it is possible for the value to contain an implicit
13042 // parameter somewhere in its subtree, we need to walk it and remove
13043 // any implicit parameters from the list of implicit parameters for
13044 // the current scope.
13045 pm_node_unreference(parser, value);
13046 }
13048 default:
13049 // In this case we have a node that we don't know how to convert into a
13050 // target. We need to treat it as an error. For now, we'll mark it as an
13051 // error and just skip right past it.
13052 pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
13053 return target;
13054 }
13055}
13056
13063static pm_node_t *
13064parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t *equals, pm_node_t *value) {
13065 switch (PM_NODE_TYPE(target)) {
13066 case PM_SOURCE_ENCODING_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13067 case PM_FALSE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13068 case PM_SOURCE_FILE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13069 case PM_SOURCE_LINE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13070 case PM_NIL_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13071 case PM_SELF_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13072 case PM_TRUE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13073 default: break;
13074 }
13075
13076 pm_constant_id_t name = pm_parser_local_add_location(parser, &target->location, 1);
13077 pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
13078
13079 return UP(result);
13080}
13081
13092static pm_node_t *
13093parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13094 bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13095
13096 pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13097 pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
13098
13099 while (accept1(parser, PM_TOKEN_COMMA)) {
13100 if (accept1(parser, PM_TOKEN_USTAR)) {
13101 // Here we have a splat operator. It can have a name or be
13102 // anonymous. It can be the final target or be in the middle if
13103 // there haven't been any others yet.
13104 if (has_rest) {
13105 pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
13106 }
13107
13108 pm_token_t star_operator = parser->previous;
13109 pm_node_t *name = NULL;
13110
13111 if (token_begins_expression_p(parser->current.type)) {
13112 name = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13113 name = parse_target(parser, name, true, true);
13114 }
13115
13116 pm_node_t *splat = UP(pm_splat_node_create(parser, &star_operator, name));
13117 pm_multi_target_node_targets_append(parser, result, splat);
13118 has_rest = true;
13119 } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13120 context_push(parser, PM_CONTEXT_MULTI_TARGET);
13121 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13122 target = parse_target(parser, target, true, false);
13123
13124 pm_multi_target_node_targets_append(parser, result, target);
13125 context_pop(parser);
13126 } else if (token_begins_expression_p(parser->current.type)) {
13127 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13128 target = parse_target(parser, target, true, false);
13129
13130 pm_multi_target_node_targets_append(parser, result, target);
13131 } else if (!match1(parser, PM_TOKEN_EOF)) {
13132 // If we get here, then we have a trailing , in a multi target node.
13133 // We'll add an implicit rest node to represent this.
13134 pm_node_t *rest = UP(pm_implicit_rest_node_create(parser, &parser->previous));
13135 pm_multi_target_node_targets_append(parser, result, rest);
13136 break;
13137 }
13138 }
13139
13140 return UP(result);
13141}
13142
13147static pm_node_t *
13148parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13149 pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
13150 accept1(parser, PM_TOKEN_NEWLINE);
13151
13152 // Ensure that we have either an = or a ) after the targets.
13153 if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
13154 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13155 }
13156
13157 return result;
13158}
13159
13163static pm_statements_node_t *
13164parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
13165 // First, skip past any optional terminators that might be at the beginning
13166 // of the statements.
13167 while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13168
13169 // If we have a terminator, then we can just return NULL.
13170 if (context_terminator(context, &parser->current)) return NULL;
13171
13172 pm_statements_node_t *statements = pm_statements_node_create(parser);
13173
13174 // At this point we know we have at least one statement, and that it
13175 // immediately follows the current token.
13176 context_push(parser, context);
13177
13178 while (true) {
13179 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
13180 pm_statements_node_body_append(parser, statements, node, true);
13181
13182 // If we're recovering from a syntax error, then we need to stop parsing
13183 // the statements now.
13184 if (parser->recovering) {
13185 // If this is the level of context where the recovery has happened,
13186 // then we can mark the parser as done recovering.
13187 if (context_terminator(context, &parser->current)) parser->recovering = false;
13188 break;
13189 }
13190
13191 // If we have a terminator, then we will parse all consecutive
13192 // terminators and then continue parsing the statements list.
13193 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13194 // If we have a terminator, then we will continue parsing the
13195 // statements list.
13196 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13197 if (context_terminator(context, &parser->current)) break;
13198
13199 // Now we can continue parsing the list of statements.
13200 continue;
13201 }
13202
13203 // At this point we have a list of statements that are not terminated by
13204 // a newline or semicolon. At this point we need to check if we're at
13205 // the end of the statements list. If we are, then we should break out
13206 // of the loop.
13207 if (context_terminator(context, &parser->current)) break;
13208
13209 // At this point, we have a syntax error, because the statement was not
13210 // terminated by a newline or semicolon, and we're not at the end of the
13211 // statements list. Ideally we should scan forward to determine if we
13212 // should insert a missing terminator or break out of parsing the
13213 // statements list at this point.
13214 //
13215 // We don't have that yet, so instead we'll do a more naive approach. If
13216 // we were unable to parse an expression, then we will skip past this
13217 // token and continue parsing the statements list. Otherwise we'll add
13218 // an error and continue parsing the statements list.
13219 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
13220 parser_lex(parser);
13221
13222 // If we are at the end of the file, then we need to stop parsing
13223 // the statements entirely at this point. Mark the parser as
13224 // recovering, as we know that EOF closes the top-level context, and
13225 // then break out of the loop.
13226 if (match1(parser, PM_TOKEN_EOF)) {
13227 parser->recovering = true;
13228 break;
13229 }
13230
13231 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13232 if (context_terminator(context, &parser->current)) break;
13233 } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
13234 // This is an inlined version of accept1 because the error that we
13235 // want to add has varargs. If this happens again, we should
13236 // probably extract a helper function.
13237 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
13238 parser->previous.start = parser->previous.end;
13239 parser->previous.type = 0;
13240 }
13241 }
13242
13243 context_pop(parser);
13244 bool last_value = true;
13245 switch (context) {
13248 last_value = false;
13249 break;
13250 default:
13251 break;
13252 }
13253 pm_void_statements_check(parser, statements, last_value);
13254
13255 return statements;
13256}
13257
13262static void
13263pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13264 const pm_node_t *duplicated = pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, literals, node, true);
13265
13266 if (duplicated != NULL) {
13267 pm_buffer_t buffer = { 0 };
13268 pm_static_literal_inspect(&buffer, &parser->line_offsets, parser->start, parser->start_line, parser->encoding->name, duplicated);
13269
13270 pm_diagnostic_list_append_format(
13271 &parser->warning_list,
13272 duplicated->location.start,
13273 duplicated->location.length,
13274 PM_WARN_DUPLICATED_HASH_KEY,
13275 (int) pm_buffer_length(&buffer),
13276 pm_buffer_value(&buffer),
13277 pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(node), parser->start_line).line
13278 );
13279
13280 pm_buffer_free(&buffer);
13281 }
13282}
13283
13288static void
13289pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13290 pm_node_t *previous;
13291
13292 if ((previous = pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, literals, node, false)) != NULL) {
13293 pm_diagnostic_list_append_format(
13294 &parser->warning_list,
13295 PM_NODE_START(node),
13296 PM_NODE_LENGTH(node),
13297 PM_WARN_DUPLICATED_WHEN_CLAUSE,
13298 pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(node), parser->start_line).line,
13299 pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(previous), parser->start_line).line
13300 );
13301 }
13302}
13303
13307static bool
13308parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node, uint16_t depth) {
13309 assert(PM_NODE_TYPE_P(node, PM_HASH_NODE) || PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE));
13310 bool contains_keyword_splat = false;
13311
13312 while (true) {
13313 pm_node_t *element;
13314
13315 switch (parser->current.type) {
13316 case PM_TOKEN_USTAR_STAR: {
13317 parser_lex(parser);
13318 pm_token_t operator = parser->previous;
13319 pm_node_t *value = NULL;
13320
13321 if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
13322 // If we're about to parse a nested hash that is being
13323 // pushed into this hash directly with **, then we want the
13324 // inner hash to share the static literals with the outer
13325 // hash.
13326 parser->current_hash_keys = literals;
13327 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
13328 } else if (token_begins_expression_p(parser->current.type)) {
13329 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
13330 } else {
13331 pm_parser_scope_forwarding_keywords_check(parser, &operator);
13332 }
13333
13334 element = UP(pm_assoc_splat_node_create(parser, value, &operator));
13335 contains_keyword_splat = true;
13336 break;
13337 }
13338 case PM_TOKEN_LABEL: {
13339 pm_token_t label = parser->current;
13340 parser_lex(parser);
13341
13342 pm_node_t *key = UP(pm_symbol_node_label_create(parser, &label));
13343 pm_hash_key_static_literals_add(parser, literals, key);
13344
13345 pm_node_t *value = NULL;
13346
13347 if (token_begins_expression_p(parser->current.type)) {
13348 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
13349 } else {
13350 if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
13351 pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
13352 value = UP(pm_constant_read_node_create(parser, &constant));
13353 } else {
13354 int depth = -1;
13355 pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
13356
13357 if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
13358 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
13359 } else {
13360 depth = pm_parser_local_depth(parser, &identifier);
13361 }
13362
13363 if (depth == -1) {
13364 value = UP(pm_call_node_variable_call_create(parser, &identifier));
13365 } else {
13366 value = UP(pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth));
13367 }
13368 }
13369
13370 value->location.length++;
13371 value = UP(pm_implicit_node_create(parser, value));
13372 }
13373
13374 element = UP(pm_assoc_node_create(parser, key, NULL, value));
13375 break;
13376 }
13377 default: {
13378 pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
13379
13380 // Hash keys that are strings are automatically frozen. We will
13381 // mark that here.
13382 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
13383 pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
13384 }
13385
13386 pm_hash_key_static_literals_add(parser, literals, key);
13387
13388 pm_token_t operator = { 0 };
13389 if (!pm_symbol_node_label_p(parser, key)) {
13390 expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
13391 operator = parser->previous;
13392 }
13393
13394 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
13395 element = UP(pm_assoc_node_create(parser, key, NTOK2PTR(operator), value));
13396 break;
13397 }
13398 }
13399
13400 if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
13401 pm_hash_node_elements_append(parser->arena, (pm_hash_node_t *) node, element);
13402 } else {
13403 pm_keyword_hash_node_elements_append(parser->arena, (pm_keyword_hash_node_t *) node, element);
13404 }
13405
13406 // If there's no comma after the element, then we're done.
13407 if (!accept1(parser, PM_TOKEN_COMMA)) break;
13408
13409 // If the next element starts with a label or a **, then we know we have
13410 // another element in the hash, so we'll continue parsing.
13411 if (match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)) continue;
13412
13413 // Otherwise we need to check if the subsequent token begins an expression.
13414 // If it does, then we'll continue parsing.
13415 if (token_begins_expression_p(parser->current.type)) continue;
13416
13417 // Otherwise by default we will exit out of this loop.
13418 break;
13419 }
13420
13421 return contains_keyword_splat;
13422}
13423
13424static inline bool
13425argument_allowed_for_bare_hash(pm_parser_t *parser, pm_node_t *argument) {
13426 if (pm_symbol_node_label_p(parser, argument)) {
13427 return true;
13428 }
13429
13430 switch (PM_NODE_TYPE(argument)) {
13431 case PM_CALL_NODE: {
13432 pm_call_node_t *cast = (pm_call_node_t *) argument;
13433 if (cast->opening_loc.length == 0 && cast->arguments != NULL) {
13434 if (PM_NODE_FLAG_P(cast->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS | PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
13435 return false;
13436 }
13437 if (cast->block != NULL) {
13438 return false;
13439 }
13440 }
13441 break;
13442 }
13443 default: break;
13444 }
13445 return accept1(parser, PM_TOKEN_EQUAL_GREATER);
13446}
13447
13451static inline void
13452parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
13453 if (arguments->arguments == NULL) {
13454 arguments->arguments = pm_arguments_node_create(parser);
13455 }
13456
13457 pm_arguments_node_arguments_append(parser->arena, arguments->arguments, argument);
13458}
13459
13463static void
13464parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
13465 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
13466
13467 // First we need to check if the next token is one that could be the start
13468 // of an argument. If it's not, then we can just return.
13469 if (
13470 match2(parser, terminator, PM_TOKEN_EOF) ||
13471 (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
13472 context_terminator(parser->current_context->context, &parser->current)
13473 ) {
13474 return;
13475 }
13476
13477 bool parsed_first_argument = false;
13478 bool parsed_bare_hash = false;
13479 bool parsed_block_argument = false;
13480 bool parsed_forwarding_arguments = false;
13481
13482 while (!match1(parser, PM_TOKEN_EOF)) {
13483 if (parsed_forwarding_arguments) {
13484 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
13485 }
13486
13487 pm_node_t *argument = NULL;
13488
13489 switch (parser->current.type) {
13490 case PM_TOKEN_USTAR_STAR:
13491 case PM_TOKEN_LABEL: {
13492 if (parsed_bare_hash) {
13493 pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
13494 }
13495
13496 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
13497 argument = UP(hash);
13498
13499 pm_static_literals_t hash_keys = { 0 };
13500 bool contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(hash), (uint16_t) (depth + 1));
13501
13502 parse_arguments_append(parser, arguments, argument);
13503
13504 pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13505 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13506 pm_node_flag_set(UP(arguments->arguments), flags);
13507
13508 pm_static_literals_free(&hash_keys);
13509 parsed_bare_hash = true;
13510
13511 break;
13512 }
13513 case PM_TOKEN_UAMPERSAND: {
13514 parser_lex(parser);
13515 pm_token_t operator = parser->previous;
13516 pm_node_t *expression = NULL;
13517
13518 if (token_begins_expression_p(parser->current.type)) {
13519 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
13520 } else {
13521 pm_parser_scope_forwarding_block_check(parser, &operator);
13522 }
13523
13524 argument = UP(pm_block_argument_node_create(parser, &operator, expression));
13525 if (parsed_block_argument) {
13526 parse_arguments_append(parser, arguments, argument);
13527 } else {
13528 arguments->block = argument;
13529 }
13530
13531 if (match1(parser, PM_TOKEN_COMMA)) {
13532 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
13533 }
13534
13535 parsed_block_argument = true;
13536 break;
13537 }
13538 case PM_TOKEN_USTAR: {
13539 parser_lex(parser);
13540 pm_token_t operator = parser->previous;
13541
13542 if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
13543 pm_parser_scope_forwarding_positionals_check(parser, &operator);
13544 argument = UP(pm_splat_node_create(parser, &operator, NULL));
13545 if (parsed_bare_hash) {
13546 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
13547 }
13548 } else {
13549 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
13550
13551 if (parsed_bare_hash) {
13552 pm_parser_err(parser, PM_TOKEN_START(parser, &operator), PM_NODE_END(expression) - PM_TOKEN_START(parser, &operator), PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
13553 }
13554
13555 argument = UP(pm_splat_node_create(parser, &operator, expression));
13556 }
13557
13558 parse_arguments_append(parser, arguments, argument);
13559 break;
13560 }
13561 case PM_TOKEN_UDOT_DOT_DOT: {
13562 if (accepts_forwarding) {
13563 parser_lex(parser);
13564
13565 if (token_begins_expression_p(parser->current.type)) {
13566 // If the token begins an expression then this ... was
13567 // not actually argument forwarding but was instead a
13568 // range.
13569 pm_token_t operator = parser->previous;
13570 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
13571
13572 // If we parse a range, we need to validate that we
13573 // didn't accidentally violate the nonassoc rules of the
13574 // ... operator.
13575 if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
13576 pm_range_node_t *range = (pm_range_node_t *) right;
13577 pm_parser_err(parser, range->operator_loc.start, range->operator_loc.length, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
13578 }
13579
13580 argument = UP(pm_range_node_create(parser, NULL, &operator, right));
13581 } else {
13582 pm_parser_scope_forwarding_all_check(parser, &parser->previous);
13583 if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
13584 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
13585 }
13586
13587 argument = UP(pm_forwarding_arguments_node_create(parser, &parser->previous));
13588 parse_arguments_append(parser, arguments, argument);
13589 pm_node_flag_set(UP(arguments->arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
13590 arguments->has_forwarding = true;
13591 parsed_forwarding_arguments = true;
13592 break;
13593 }
13594 }
13595 }
13597 default: {
13598 if (argument == NULL) {
13599 argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
13600 }
13601
13602 bool contains_keywords = false;
13603 bool contains_keyword_splat = false;
13604
13605 if (argument_allowed_for_bare_hash(parser, argument)) {
13606 if (parsed_bare_hash) {
13607 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
13608 }
13609
13610 pm_token_t operator = { 0 };
13611 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
13612 operator = parser->previous;
13613 }
13614
13615 pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
13616 contains_keywords = true;
13617
13618 // Create the set of static literals for this hash.
13619 pm_static_literals_t hash_keys = { 0 };
13620 pm_hash_key_static_literals_add(parser, &hash_keys, argument);
13621
13622 // Finish parsing the one we are part way through.
13623 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
13624 argument = UP(pm_assoc_node_create(parser, argument, NTOK2PTR(operator), value));
13625
13626 pm_keyword_hash_node_elements_append(parser->arena, bare_hash, argument);
13627 argument = UP(bare_hash);
13628
13629 // Then parse more if we have a comma
13630 if (accept1(parser, PM_TOKEN_COMMA) && (
13631 token_begins_expression_p(parser->current.type) ||
13632 match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
13633 )) {
13634 contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(bare_hash), (uint16_t) (depth + 1));
13635 }
13636
13637 pm_static_literals_free(&hash_keys);
13638 parsed_bare_hash = true;
13639 }
13640
13641 parse_arguments_append(parser, arguments, argument);
13642
13643 pm_node_flags_t flags = 0;
13644 if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13645 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13646 pm_node_flag_set(UP(arguments->arguments), flags);
13647
13648 break;
13649 }
13650 }
13651
13652 parsed_first_argument = true;
13653
13654 // If parsing the argument failed, we need to stop parsing arguments.
13655 if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
13656
13657 // If the terminator of these arguments is not EOF, then we have a
13658 // specific token we're looking for. In that case we can accept a
13659 // newline here because it is not functioning as a statement terminator.
13660 bool accepted_newline = false;
13661 if (terminator != PM_TOKEN_EOF) {
13662 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
13663 }
13664
13665 if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
13666 // If we previously were on a comma and we just parsed a bare hash,
13667 // then we want to continue parsing arguments. This is because the
13668 // comma was grabbed up by the hash parser.
13669 } else if (accept1(parser, PM_TOKEN_COMMA)) {
13670 // If there was a comma, then we need to check if we also accepted a
13671 // newline. If we did, then this is a syntax error.
13672 if (accepted_newline) {
13673 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
13674 }
13675
13676 // If this is a command call and an argument takes a block,
13677 // there can be no further arguments. For example,
13678 // `foo(bar 1 do end, 2)` should be rejected.
13679 if (PM_NODE_TYPE_P(argument, PM_CALL_NODE)) {
13680 pm_call_node_t *call = (pm_call_node_t *) argument;
13681 if (call->opening_loc.length == 0 && call->arguments != NULL && call->block != NULL) {
13682 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
13683 break;
13684 }
13685 }
13686 } else {
13687 // If there is no comma at the end of the argument list then we're
13688 // done parsing arguments and can break out of this loop.
13689 break;
13690 }
13691
13692 // If we hit the terminator, then that means we have a trailing comma so
13693 // we can accept that output as well.
13694 if (match1(parser, terminator)) break;
13695 }
13696}
13697
13709parse_required_destructured_parameter(pm_parser_t *parser) {
13710 expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
13711
13712 pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
13713 pm_multi_target_node_opening_set(parser, node, &parser->previous);
13714
13715 do {
13716 pm_node_t *param;
13717
13718 // If we get here then we have a trailing comma, which isn't allowed in
13719 // the grammar. In other places, multi targets _do_ allow trailing
13720 // commas, so here we'll assume this is a mistake of the user not
13721 // knowing it's not allowed here.
13722 if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
13723 param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
13724 pm_multi_target_node_targets_append(parser, node, param);
13725 pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
13726 break;
13727 }
13728
13729 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13730 param = UP(parse_required_destructured_parameter(parser));
13731 } else if (accept1(parser, PM_TOKEN_USTAR)) {
13732 pm_token_t star = parser->previous;
13733 pm_node_t *value = NULL;
13734
13735 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
13736 pm_token_t name = parser->previous;
13737 value = UP(pm_required_parameter_node_create(parser, &name));
13738 if (pm_parser_parameter_name_check(parser, &name)) {
13739 pm_node_flag_set_repeated_parameter(value);
13740 }
13741 pm_parser_local_add_token(parser, &name, 1);
13742 }
13743
13744 param = UP(pm_splat_node_create(parser, &star, value));
13745 } else {
13746 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
13747 pm_token_t name = parser->previous;
13748
13749 param = UP(pm_required_parameter_node_create(parser, &name));
13750 if (pm_parser_parameter_name_check(parser, &name)) {
13751 pm_node_flag_set_repeated_parameter(param);
13752 }
13753 pm_parser_local_add_token(parser, &name, 1);
13754 }
13755
13756 pm_multi_target_node_targets_append(parser, node, param);
13757 } while (accept1(parser, PM_TOKEN_COMMA));
13758
13759 accept1(parser, PM_TOKEN_NEWLINE);
13760 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
13761 pm_multi_target_node_closing_set(parser, node, &parser->previous);
13762
13763 return node;
13764}
13765
13770typedef enum {
13771 PM_PARAMETERS_NO_CHANGE = 0, // Extra state for tokens that should not change the state
13772 PM_PARAMETERS_ORDER_NOTHING_AFTER = 1,
13773 PM_PARAMETERS_ORDER_KEYWORDS_REST,
13774 PM_PARAMETERS_ORDER_KEYWORDS,
13775 PM_PARAMETERS_ORDER_REST,
13776 PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
13777 PM_PARAMETERS_ORDER_OPTIONAL,
13778 PM_PARAMETERS_ORDER_NAMED,
13779 PM_PARAMETERS_ORDER_NONE,
13780} pm_parameters_order_t;
13781
13785static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
13786 [0] = PM_PARAMETERS_NO_CHANGE,
13787 [PM_TOKEN_UAMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
13788 [PM_TOKEN_AMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
13789 [PM_TOKEN_UDOT_DOT_DOT] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
13790 [PM_TOKEN_IDENTIFIER] = PM_PARAMETERS_ORDER_NAMED,
13791 [PM_TOKEN_PARENTHESIS_LEFT] = PM_PARAMETERS_ORDER_NAMED,
13792 [PM_TOKEN_EQUAL] = PM_PARAMETERS_ORDER_OPTIONAL,
13793 [PM_TOKEN_LABEL] = PM_PARAMETERS_ORDER_KEYWORDS,
13794 [PM_TOKEN_USTAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
13795 [PM_TOKEN_STAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
13796 [PM_TOKEN_USTAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST,
13797 [PM_TOKEN_STAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST
13798};
13799
13807static bool
13808update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
13809 pm_parameters_order_t state = parameters_ordering[token->type];
13810 if (state == PM_PARAMETERS_NO_CHANGE) return true;
13811
13812 // If we see another ordered argument after a optional argument
13813 // we only continue parsing ordered arguments until we stop seeing ordered arguments.
13814 if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
13815 *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
13816 return true;
13817 } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
13818 return true;
13819 }
13820
13821 if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
13822 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
13823 return false;
13824 } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
13825 pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
13826 return false;
13827 } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
13828 // We know what transition we failed on, so we can provide a better error here.
13829 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
13830 return false;
13831 }
13832
13833 if (state < *current) *current = state;
13834 return true;
13835}
13836
13837static inline void
13838parse_parameters_handle_trailing_comma(
13839 pm_parser_t *parser,
13840 pm_parameters_node_t *params,
13841 pm_parameters_order_t order,
13842 bool in_block,
13843 bool allows_trailing_comma
13844) {
13845 if (!allows_trailing_comma) {
13846 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
13847 return;
13848 }
13849
13850 if (in_block) {
13851 if (order >= PM_PARAMETERS_ORDER_NAMED) {
13852 // foo do |bar,|; end
13853 pm_node_t *param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
13854
13855 if (params->rest == NULL) {
13856 pm_parameters_node_rest_set(params, param);
13857 } else {
13858 pm_parser_err_node(parser, UP(param), PM_ERR_PARAMETER_SPLAT_MULTI);
13859 pm_parameters_node_posts_append(parser->arena, params, UP(param));
13860 }
13861 } else {
13862 // foo do |*bar,|; end
13863 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
13864 }
13865 } else {
13866 // https://bugs.ruby-lang.org/issues/19107
13867 // Allow `def foo(bar,); end`, `def foo(*bar,); end`, etc. but not `def foo(...,); end`
13868 if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1 || order == PM_PARAMETERS_ORDER_NOTHING_AFTER) {
13869 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
13870 }
13871 }
13872}
13873
13877static pm_parameters_node_t *
13878parse_parameters(
13879 pm_parser_t *parser,
13880 pm_binding_power_t binding_power,
13881 bool uses_parentheses,
13882 bool allows_trailing_comma,
13883 bool allows_forwarding_parameters,
13884 bool accepts_blocks_in_defaults,
13885 bool in_block,
13886 pm_diagnostic_id_t diag_id_forwarding,
13887 uint16_t depth
13888) {
13889 pm_do_loop_stack_push(parser, false);
13890
13891 pm_parameters_node_t *params = pm_parameters_node_create(parser);
13892 pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
13893
13894 while (true) {
13895 bool parsing = true;
13896
13897 switch (parser->current.type) {
13898 case PM_TOKEN_PARENTHESIS_LEFT: {
13899 update_parameter_state(parser, &parser->current, &order);
13900 pm_node_t *param = UP(parse_required_destructured_parameter(parser));
13901
13902 if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
13903 pm_parameters_node_requireds_append(parser->arena, params, param);
13904 } else {
13905 pm_parameters_node_posts_append(parser->arena, params, param);
13906 }
13907 break;
13908 }
13909 case PM_TOKEN_UAMPERSAND:
13910 case PM_TOKEN_AMPERSAND: {
13911 update_parameter_state(parser, &parser->current, &order);
13912 parser_lex(parser);
13913
13914 pm_token_t operator = parser->previous;
13915 pm_node_t *param;
13916
13917 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1 && accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
13918 param = (pm_node_t *) pm_no_block_parameter_node_create(parser, &operator, &parser->previous);
13919 } else {
13920 pm_token_t name = {0};
13921
13922 bool repeated = false;
13923 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
13924 name = parser->previous;
13925 repeated = pm_parser_parameter_name_check(parser, &name);
13926 pm_parser_local_add_token(parser, &name, 1);
13927 } else {
13928 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
13929 }
13930
13931 param = (pm_node_t *) pm_block_parameter_node_create(parser, NTOK2PTR(name), &operator);
13932 if (repeated) {
13933 pm_node_flag_set_repeated_parameter(param);
13934 }
13935 }
13936
13937 if (params->block == NULL) {
13938 pm_parameters_node_block_set(params, param);
13939 } else {
13940 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_BLOCK_MULTI);
13941 pm_parameters_node_posts_append(parser->arena, params, param);
13942 }
13943
13944 break;
13945 }
13946 case PM_TOKEN_UDOT_DOT_DOT: {
13947 if (!allows_forwarding_parameters) {
13948 pm_parser_err_current(parser, diag_id_forwarding);
13949 }
13950
13951 bool succeeded = update_parameter_state(parser, &parser->current, &order);
13952 parser_lex(parser);
13953
13954 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
13955 pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
13956
13957 if (params->keyword_rest != NULL) {
13958 // If we already have a keyword rest parameter, then we replace it with the
13959 // forwarding parameter and move the keyword rest parameter to the posts list.
13960 pm_node_t *keyword_rest = params->keyword_rest;
13961 pm_parameters_node_posts_append(parser->arena, params, keyword_rest);
13962 if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
13963 params->keyword_rest = NULL;
13964 }
13965
13966 pm_parameters_node_keyword_rest_set(params, UP(param));
13967 break;
13968 }
13969 case PM_TOKEN_CLASS_VARIABLE:
13970 case PM_TOKEN_IDENTIFIER:
13971 case PM_TOKEN_CONSTANT:
13972 case PM_TOKEN_INSTANCE_VARIABLE:
13973 case PM_TOKEN_GLOBAL_VARIABLE:
13974 case PM_TOKEN_METHOD_NAME: {
13975 parser_lex(parser);
13976 switch (parser->previous.type) {
13977 case PM_TOKEN_CONSTANT:
13978 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
13979 break;
13980 case PM_TOKEN_INSTANCE_VARIABLE:
13981 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
13982 break;
13983 case PM_TOKEN_GLOBAL_VARIABLE:
13984 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
13985 break;
13986 case PM_TOKEN_CLASS_VARIABLE:
13987 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
13988 break;
13989 case PM_TOKEN_METHOD_NAME:
13990 pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
13991 break;
13992 default: break;
13993 }
13994
13995 if (parser->current.type == PM_TOKEN_EQUAL) {
13996 update_parameter_state(parser, &parser->current, &order);
13997 } else {
13998 update_parameter_state(parser, &parser->previous, &order);
13999 }
14000
14001 pm_token_t name = parser->previous;
14002 bool repeated = pm_parser_parameter_name_check(parser, &name);
14003 pm_parser_local_add_token(parser, &name, 1);
14004
14005 if (match1(parser, PM_TOKEN_EQUAL)) {
14006 pm_token_t operator = parser->current;
14007 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14008 parser_lex(parser);
14009
14010 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
14011 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14012
14013 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14014 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
14015 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14016
14017 pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
14018
14019 if (repeated) {
14020 pm_node_flag_set_repeated_parameter(UP(param));
14021 }
14022 pm_parameters_node_optionals_append(parser->arena, params, param);
14023
14024 // If the value of the parameter increased the number of
14025 // reads of that parameter, then we need to warn that we
14026 // have a circular definition.
14027 if ((parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14028 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &name, PM_ERR_PARAMETER_CIRCULAR);
14029 }
14030
14031 context_pop(parser);
14032
14033 // If parsing the value of the parameter resulted in error recovery,
14034 // then we can put a missing node in its place and stop parsing the
14035 // parameters entirely now.
14036 if (parser->recovering) {
14037 parsing = false;
14038 break;
14039 }
14040 } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14041 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14042 if (repeated) {
14043 pm_node_flag_set_repeated_parameter(UP(param));
14044 }
14045 pm_parameters_node_requireds_append(parser->arena, params, UP(param));
14046 } else {
14047 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14048 if (repeated) {
14049 pm_node_flag_set_repeated_parameter(UP(param));
14050 }
14051 pm_parameters_node_posts_append(parser->arena, params, UP(param));
14052 }
14053
14054 break;
14055 }
14056 case PM_TOKEN_LABEL: {
14057 if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
14058 update_parameter_state(parser, &parser->current, &order);
14059
14060 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14061 parser_lex(parser);
14062
14063 pm_token_t name = parser->previous;
14064 pm_token_t local = name;
14065 local.end -= 1;
14066
14067 if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
14068 pm_parser_err(parser, PM_TOKEN_START(parser, &local), PM_TOKEN_LENGTH(&local), PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14069 } else if (local.end[-1] == '!' || local.end[-1] == '?') {
14070 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14071 }
14072
14073 bool repeated = pm_parser_parameter_name_check(parser, &local);
14074 pm_parser_local_add_token(parser, &local, 1);
14075
14076 switch (parser->current.type) {
14077 case PM_TOKEN_COMMA:
14078 case PM_TOKEN_PARENTHESIS_RIGHT:
14079 case PM_TOKEN_PIPE: {
14080 context_pop(parser);
14081
14082 pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14083 if (repeated) {
14084 pm_node_flag_set_repeated_parameter(param);
14085 }
14086
14087 pm_parameters_node_keywords_append(parser->arena, params, param);
14088 break;
14089 }
14090 case PM_TOKEN_SEMICOLON:
14091 case PM_TOKEN_NEWLINE: {
14092 context_pop(parser);
14093
14094 if (uses_parentheses) {
14095 parsing = false;
14096 break;
14097 }
14098
14099 pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14100 if (repeated) {
14101 pm_node_flag_set_repeated_parameter(param);
14102 }
14103
14104 pm_parameters_node_keywords_append(parser->arena, params, param);
14105 break;
14106 }
14107 default: {
14108 pm_node_t *param;
14109
14110 if (token_begins_expression_p(parser->current.type)) {
14111 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14112 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14113
14114 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14115 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
14116 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14117
14118 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14119 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &local, PM_ERR_PARAMETER_CIRCULAR);
14120 }
14121
14122 param = UP(pm_optional_keyword_parameter_node_create(parser, &name, value));
14123 }
14124 else {
14125 param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14126 }
14127
14128 if (repeated) {
14129 pm_node_flag_set_repeated_parameter(param);
14130 }
14131
14132 context_pop(parser);
14133 pm_parameters_node_keywords_append(parser->arena, params, param);
14134
14135 // If parsing the value of the parameter resulted in error recovery,
14136 // then we can put a missing node in its place and stop parsing the
14137 // parameters entirely now.
14138 if (parser->recovering) {
14139 parsing = false;
14140 break;
14141 }
14142 }
14143 }
14144
14145 parser->in_keyword_arg = false;
14146 break;
14147 }
14148 case PM_TOKEN_USTAR:
14149 case PM_TOKEN_STAR: {
14150 update_parameter_state(parser, &parser->current, &order);
14151 parser_lex(parser);
14152
14153 pm_token_t operator = parser->previous;
14154 pm_token_t name = { 0 };
14155 bool repeated = false;
14156
14157 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14158 name = parser->previous;
14159 repeated = pm_parser_parameter_name_check(parser, &name);
14160 pm_parser_local_add_token(parser, &name, 1);
14161 } else {
14162 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
14163 }
14164
14165 pm_node_t *param = UP(pm_rest_parameter_node_create(parser, &operator, NTOK2PTR(name)));
14166 if (repeated) {
14167 pm_node_flag_set_repeated_parameter(param);
14168 }
14169
14170 if (params->rest == NULL) {
14171 pm_parameters_node_rest_set(params, param);
14172 } else {
14173 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
14174 pm_parameters_node_posts_append(parser->arena, params, param);
14175 }
14176
14177 break;
14178 }
14179 case PM_TOKEN_STAR_STAR:
14180 case PM_TOKEN_USTAR_STAR: {
14181 pm_parameters_order_t previous_order = order;
14182 update_parameter_state(parser, &parser->current, &order);
14183 parser_lex(parser);
14184
14185 pm_token_t operator = parser->previous;
14186 pm_node_t *param;
14187
14188 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14189 if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14190 pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14191 }
14192
14193 param = UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
14194 } else {
14195 pm_token_t name = { 0 };
14196
14197 bool repeated = false;
14198 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14199 name = parser->previous;
14200 repeated = pm_parser_parameter_name_check(parser, &name);
14201 pm_parser_local_add_token(parser, &name, 1);
14202 } else {
14203 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
14204 }
14205
14206 param = UP(pm_keyword_rest_parameter_node_create(parser, &operator, NTOK2PTR(name)));
14207 if (repeated) {
14208 pm_node_flag_set_repeated_parameter(param);
14209 }
14210 }
14211
14212 if (params->keyword_rest == NULL) {
14213 pm_parameters_node_keyword_rest_set(params, param);
14214 } else {
14215 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
14216 pm_parameters_node_posts_append(parser->arena, params, param);
14217 }
14218
14219 break;
14220 }
14221 default:
14222 if (parser->previous.type == PM_TOKEN_COMMA) {
14223 parse_parameters_handle_trailing_comma(parser, params, order, in_block, allows_trailing_comma);
14224 }
14225
14226 parsing = false;
14227 break;
14228 }
14229
14230 // If we hit some kind of issue while parsing the parameter, this would
14231 // have been set to false. In that case, we need to break out of the
14232 // loop.
14233 if (!parsing) break;
14234
14235 bool accepted_newline = false;
14236 if (uses_parentheses) {
14237 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14238 }
14239
14240 if (accept1(parser, PM_TOKEN_COMMA)) {
14241 // If there was a comma, but we also accepted a newline, then this
14242 // is a syntax error.
14243 if (accepted_newline) {
14244 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14245 }
14246 } else {
14247 // If there was no comma, then we're done parsing parameters.
14248 break;
14249 }
14250 }
14251
14252 pm_do_loop_stack_pop(parser);
14253
14254 // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
14255 if (PM_NODE_START(params) == PM_NODE_END(params)) {
14256 return NULL;
14257 }
14258
14259 return params;
14260}
14261
14266static size_t
14267token_newline_index(const pm_parser_t *parser) {
14268 if (parser->heredoc_end == NULL) {
14269 // This is the common case. In this case we can look at the previously
14270 // recorded newline in the newline list and subtract from the current
14271 // offset.
14272 return parser->line_offsets.size - 1;
14273 } else {
14274 // This is unlikely. This is the case that we have already parsed the
14275 // start of a heredoc, so we cannot rely on looking at the previous
14276 // offset of the newline list, and instead must go through the whole
14277 // process of a binary search for the line number.
14278 return (size_t) pm_line_offset_list_line(&parser->line_offsets, PM_TOKEN_START(parser, &parser->current), 0);
14279 }
14280}
14281
14286static int64_t
14287token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
14288 const uint8_t *cursor = parser->start + parser->line_offsets.offsets[newline_index];
14289 const uint8_t *end = token->start;
14290
14291 // Skip over the BOM if it is present.
14292 if (
14293 newline_index == 0 &&
14294 parser->start[0] == 0xef &&
14295 parser->start[1] == 0xbb &&
14296 parser->start[2] == 0xbf
14297 ) cursor += 3;
14298
14299 int64_t column = 0;
14300 for (; cursor < end; cursor++) {
14301 switch (*cursor) {
14302 case '\t':
14303 column = ((column / PM_TAB_WHITESPACE_SIZE) + 1) * PM_TAB_WHITESPACE_SIZE;
14304 break;
14305 case ' ':
14306 column++;
14307 break;
14308 default:
14309 column++;
14310 if (break_on_non_space) return -1;
14311 break;
14312 }
14313 }
14314
14315 return column;
14316}
14317
14322static void
14323parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening_token, bool if_after_else, bool allow_indent) {
14324 // If these warnings are disabled (unlikely), then we can just return.
14325 if (!parser->warn_mismatched_indentation) return;
14326
14327 // If the tokens are on the same line, we do not warn.
14328 size_t closing_newline_index = token_newline_index(parser);
14329 if (opening_newline_index == closing_newline_index) return;
14330
14331 // If the opening token has anything other than spaces or tabs before it,
14332 // then we do not warn. This is unless we are matching up an `if`/`end` pair
14333 // and the `if` immediately follows an `else` keyword.
14334 int64_t opening_column = token_column(parser, opening_newline_index, opening_token, !if_after_else);
14335 if (!if_after_else && (opening_column == -1)) return;
14336
14337 // Get a reference to the closing token off the current parser. This assumes
14338 // that the caller has placed this in the correct position.
14339 pm_token_t *closing_token = &parser->current;
14340
14341 // If the tokens are at the same indentation, we do not warn.
14342 int64_t closing_column = token_column(parser, closing_newline_index, closing_token, true);
14343 if ((closing_column == -1) || (opening_column == closing_column)) return;
14344
14345 // If the closing column is greater than the opening column and we are
14346 // allowing indentation, then we do not warn.
14347 if (allow_indent && (closing_column > opening_column)) return;
14348
14349 // Otherwise, add a warning.
14350 PM_PARSER_WARN_FORMAT(
14351 parser,
14352 PM_TOKEN_START(parser, closing_token),
14353 PM_TOKEN_LENGTH(closing_token),
14354 PM_WARN_INDENTATION_MISMATCH,
14355 (int) (closing_token->end - closing_token->start),
14356 (const char *) closing_token->start,
14357 (int) (opening_token->end - opening_token->start),
14358 (const char *) opening_token->start,
14359 ((int32_t) opening_newline_index) + parser->start_line
14360 );
14361}
14362
14363typedef enum {
14364 PM_RESCUES_BEGIN = 1,
14365 PM_RESCUES_BLOCK,
14366 PM_RESCUES_CLASS,
14367 PM_RESCUES_DEF,
14368 PM_RESCUES_LAMBDA,
14369 PM_RESCUES_MODULE,
14370 PM_RESCUES_SCLASS
14371} pm_rescues_type_t;
14372
14377static inline void
14378parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
14379 pm_rescue_node_t *current = NULL;
14380
14381 while (match1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
14382 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14383 parser_lex(parser);
14384
14385 pm_rescue_node_t *rescue = pm_rescue_node_create(parser, &parser->previous);
14386
14387 switch (parser->current.type) {
14388 case PM_TOKEN_EQUAL_GREATER: {
14389 // Here we have an immediate => after the rescue keyword, in which case
14390 // we're going to have an empty list of exceptions to rescue (which
14391 // implies StandardError).
14392 parser_lex(parser);
14393 pm_rescue_node_operator_set(parser, rescue, &parser->previous);
14394
14395 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
14396 reference = parse_target(parser, reference, false, false);
14397
14398 pm_rescue_node_reference_set(rescue, reference);
14399 break;
14400 }
14401 case PM_TOKEN_NEWLINE:
14402 case PM_TOKEN_SEMICOLON:
14403 case PM_TOKEN_KEYWORD_THEN:
14404 // Here we have a terminator for the rescue keyword, in which
14405 // case we're going to just continue on.
14406 break;
14407 default: {
14408 if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
14409 // Here we have something that could be an exception expression, so
14410 // we'll attempt to parse it here and any others delimited by commas.
14411
14412 do {
14413 pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
14414 pm_rescue_node_exceptions_append(parser->arena, rescue, expression);
14415
14416 // If we hit a newline, then this is the end of the rescue expression. We
14417 // can continue on to parse the statements.
14418 if (match3(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_THEN)) break;
14419
14420 // If we hit a `=>` then we're going to parse the exception variable. Once
14421 // we've done that, we'll break out of the loop and parse the statements.
14422 if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
14423 pm_rescue_node_operator_set(parser, rescue, &parser->previous);
14424
14425 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
14426 reference = parse_target(parser, reference, false, false);
14427
14428 pm_rescue_node_reference_set(rescue, reference);
14429 break;
14430 }
14431 } while (accept1(parser, PM_TOKEN_COMMA));
14432 }
14433 }
14434 }
14435
14436 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
14437 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
14438 rescue->then_keyword_loc = TOK2LOC(parser, &parser->previous);
14439 }
14440 } else {
14441 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
14442 rescue->then_keyword_loc = TOK2LOC(parser, &parser->previous);
14443 }
14444
14445 if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
14446 pm_accepts_block_stack_push(parser, true);
14447 pm_context_t context;
14448
14449 switch (type) {
14450 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_RESCUE; break;
14451 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_RESCUE; break;
14452 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_RESCUE; break;
14453 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_RESCUE; break;
14454 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_RESCUE; break;
14455 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_RESCUE; break;
14456 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_RESCUE; break;
14457 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
14458 }
14459
14460 pm_statements_node_t *statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14461 if (statements != NULL) pm_rescue_node_statements_set(rescue, statements);
14462
14463 pm_accepts_block_stack_pop(parser);
14464 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14465 }
14466
14467 if (current == NULL) {
14468 pm_begin_node_rescue_clause_set(parent_node, rescue);
14469 } else {
14470 pm_rescue_node_subsequent_set(current, rescue);
14471 }
14472
14473 current = rescue;
14474 }
14475
14476 // The end node locations on rescue nodes will not be set correctly
14477 // since we won't know the end until we've found all subsequent
14478 // clauses. This sets the end location on all rescues once we know it.
14479 if (current != NULL) {
14480 pm_rescue_node_t *clause = parent_node->rescue_clause;
14481
14482 while (clause != NULL) {
14483 PM_NODE_LENGTH_SET_NODE(clause, current);
14484 clause = clause->subsequent;
14485 }
14486 }
14487
14488 pm_token_t else_keyword;
14489 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
14490 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14491 opening_newline_index = token_newline_index(parser);
14492
14493 else_keyword = parser->current;
14494 opening = &else_keyword;
14495
14496 parser_lex(parser);
14497 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14498
14499 pm_statements_node_t *else_statements = NULL;
14500 if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
14501 pm_accepts_block_stack_push(parser, true);
14502 pm_context_t context;
14503
14504 switch (type) {
14505 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ELSE; break;
14506 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ELSE; break;
14507 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ELSE; break;
14508 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ELSE; break;
14509 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
14510 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
14511 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
14512 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
14513 }
14514
14515 else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14516 pm_accepts_block_stack_pop(parser);
14517
14518 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14519 }
14520
14521 pm_else_node_t *else_clause = pm_else_node_create(parser, &else_keyword, else_statements, &parser->current);
14522 pm_begin_node_else_clause_set(parent_node, else_clause);
14523
14524 // If we don't have a `current` rescue node, then this is a dangling
14525 // else, and it's an error.
14526 if (current == NULL) pm_parser_err_node(parser, UP(else_clause), PM_ERR_BEGIN_LONELY_ELSE);
14527 }
14528
14529 if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
14530 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14531 pm_token_t ensure_keyword = parser->current;
14532
14533 parser_lex(parser);
14534 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14535
14536 pm_statements_node_t *ensure_statements = NULL;
14537 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
14538 pm_accepts_block_stack_push(parser, true);
14539 pm_context_t context;
14540
14541 switch (type) {
14542 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ENSURE; break;
14543 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ENSURE; break;
14544 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ENSURE; break;
14545 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ENSURE; break;
14546 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ENSURE; break;
14547 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ENSURE; break;
14548 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ENSURE; break;
14549 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
14550 }
14551
14552 ensure_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14553 pm_accepts_block_stack_pop(parser);
14554
14555 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14556 }
14557
14558 pm_ensure_node_t *ensure_clause = pm_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->current);
14559 pm_begin_node_ensure_clause_set(parent_node, ensure_clause);
14560 }
14561
14562 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
14563 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14564 pm_begin_node_end_keyword_set(parser, parent_node, &parser->current);
14565 } else {
14566 pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_KEYWORD_END, .start = parser->previous.end, .end = parser->previous.end };
14567 pm_begin_node_end_keyword_set(parser, parent_node, &end_keyword);
14568 }
14569}
14570
14575static pm_begin_node_t *
14576parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
14577 pm_begin_node_t *node = pm_begin_node_create(parser, NULL, statements);
14578 parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
14579
14580 node->base.location.start = U32(start - parser->start);
14581 PM_NODE_LENGTH_SET_TOKEN(parser, node, &parser->current);
14582
14583 return node;
14584}
14585
14590parse_block_parameters(
14591 pm_parser_t *parser,
14592 bool allows_trailing_comma,
14593 const pm_token_t *opening,
14594 bool is_lambda_literal,
14595 bool accepts_blocks_in_defaults,
14596 uint16_t depth
14597) {
14598 pm_parameters_node_t *parameters = NULL;
14599 if (!match1(parser, PM_TOKEN_SEMICOLON)) {
14600 if (!is_lambda_literal) {
14601 context_push(parser, PM_CONTEXT_BLOCK_PARAMETERS);
14602 }
14603 parameters = parse_parameters(
14604 parser,
14605 is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
14606 false,
14607 allows_trailing_comma,
14608 false,
14609 accepts_blocks_in_defaults,
14610 true,
14611 is_lambda_literal ? PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_LAMBDA : PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_BLOCK,
14612 (uint16_t) (depth + 1)
14613 );
14614 if (!is_lambda_literal) {
14615 context_pop(parser);
14616 }
14617 }
14618
14619 pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
14620 if (opening != NULL) {
14621 accept1(parser, PM_TOKEN_NEWLINE);
14622
14623 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
14624 do {
14625 switch (parser->current.type) {
14626 case PM_TOKEN_CONSTANT:
14627 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14628 parser_lex(parser);
14629 break;
14630 case PM_TOKEN_INSTANCE_VARIABLE:
14631 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14632 parser_lex(parser);
14633 break;
14634 case PM_TOKEN_GLOBAL_VARIABLE:
14635 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14636 parser_lex(parser);
14637 break;
14638 case PM_TOKEN_CLASS_VARIABLE:
14639 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14640 parser_lex(parser);
14641 break;
14642 default:
14643 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
14644 break;
14645 }
14646
14647 bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
14648 pm_parser_local_add_token(parser, &parser->previous, 1);
14649
14650 pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
14651 if (repeated) pm_node_flag_set_repeated_parameter(UP(local));
14652
14653 pm_block_parameters_node_append_local(parser->arena, block_parameters, local);
14654 } while (accept1(parser, PM_TOKEN_COMMA));
14655 }
14656 }
14657
14658 return block_parameters;
14659}
14660
14665static bool
14666outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
14667 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
14668 if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
14669 }
14670
14671 return false;
14672}
14673
14679static const char * const pm_numbered_parameter_names[] = {
14680 "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
14681};
14682
14688static pm_node_t *
14689parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
14690 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
14691
14692 // If we have ordinary parameters, then we will return them as the set of
14693 // parameters.
14694 if (parameters != NULL) {
14695 // If we also have implicit parameters, then this is an error.
14696 if (implicit_parameters->size > 0) {
14697 pm_node_t *node = implicit_parameters->nodes[0];
14698
14699 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14700 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
14701 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14702 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
14703 } else {
14704 assert(false && "unreachable");
14705 }
14706 }
14707
14708 return parameters;
14709 }
14710
14711 // If we don't have any implicit parameters, then the set of parameters is
14712 // NULL.
14713 if (implicit_parameters->size == 0) {
14714 return NULL;
14715 }
14716
14717 // If we don't have ordinary parameters, then we now must validate our set
14718 // of implicit parameters. We can only have numbered parameters or it, but
14719 // they cannot be mixed.
14720 uint8_t numbered_parameter = 0;
14721 bool it_parameter = false;
14722
14723 for (size_t index = 0; index < implicit_parameters->size; index++) {
14724 pm_node_t *node = implicit_parameters->nodes[index];
14725
14726 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14727 if (it_parameter) {
14728 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
14729 } else if (outer_scope_using_numbered_parameters_p(parser)) {
14730 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
14731 } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
14732 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
14733 } else if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
14734 numbered_parameter = MAX(numbered_parameter, (uint8_t) (parser->start[node->location.start + 1] - '0'));
14735 } else {
14736 assert(false && "unreachable");
14737 }
14738 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14739 if (numbered_parameter > 0) {
14740 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
14741 } else {
14742 it_parameter = true;
14743 }
14744 }
14745 }
14746
14747 if (numbered_parameter > 0) {
14748 // Go through the parent scopes and mark them as being disallowed from
14749 // using numbered parameters because this inner scope is using them.
14750 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
14751 scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
14752 }
14753 return UP(pm_numbered_parameters_node_create(parser, opening, closing, numbered_parameter));
14754 }
14755
14756 if (it_parameter) {
14757 return UP(pm_it_parameters_node_create(parser, opening, closing));
14758 }
14759
14760 return NULL;
14761}
14762
14766static pm_block_node_t *
14767parse_block(pm_parser_t *parser, uint16_t depth) {
14768 pm_token_t opening = parser->previous;
14769 accept1(parser, PM_TOKEN_NEWLINE);
14770
14771 pm_accepts_block_stack_push(parser, true);
14772 pm_parser_scope_push(parser, false);
14773
14774 pm_block_parameters_node_t *block_parameters = NULL;
14775
14776 if (accept1(parser, PM_TOKEN_PIPE)) {
14777 pm_token_t block_parameters_opening = parser->previous;
14778 if (match1(parser, PM_TOKEN_PIPE)) {
14779 block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
14780 parser->command_start = true;
14781 parser_lex(parser);
14782 } else {
14783 block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false, true, (uint16_t) (depth + 1));
14784 accept1(parser, PM_TOKEN_NEWLINE);
14785 parser->command_start = true;
14786 expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
14787 }
14788
14789 pm_block_parameters_node_closing_set(parser, block_parameters, &parser->previous);
14790 }
14791
14792 accept1(parser, PM_TOKEN_NEWLINE);
14793 pm_node_t *statements = NULL;
14794
14795 if (opening.type == PM_TOKEN_BRACE_LEFT) {
14796 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
14797 statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1)));
14798 }
14799
14800 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE, &opening);
14801 } else {
14802 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
14803 if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE)) {
14804 pm_accepts_block_stack_push(parser, true);
14805 statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1)));
14806 pm_accepts_block_stack_pop(parser);
14807 }
14808
14809 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
14810 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
14811 statements = UP(parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1)));
14812 }
14813 }
14814
14815 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END, &opening);
14816 }
14817
14818 pm_constant_id_list_t locals;
14819 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
14820 pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &opening, &parser->previous);
14821
14822 pm_parser_scope_pop(parser);
14823 pm_accepts_block_stack_pop(parser);
14824
14825 return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
14826}
14827
14833static bool
14834parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call, uint16_t depth) {
14835 bool found = false;
14836
14837 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
14838 found |= true;
14839 arguments->opening_loc = TOK2LOC(parser, &parser->previous);
14840
14841 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14842 arguments->closing_loc = TOK2LOC(parser, &parser->previous);
14843 } else {
14844 pm_accepts_block_stack_push(parser, true);
14845 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1));
14846
14847 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14848 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
14849 parser->previous.start = parser->previous.end;
14850 parser->previous.type = 0;
14851 }
14852
14853 pm_accepts_block_stack_pop(parser);
14854 arguments->closing_loc = TOK2LOC(parser, &parser->previous);
14855 }
14856 } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
14857 found |= true;
14858 pm_accepts_block_stack_push(parser, false);
14859
14860 // If we get here, then the subsequent token cannot be used as an infix
14861 // operator. In this case we assume the subsequent token is part of an
14862 // argument to this method call.
14863 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, (uint16_t) (depth + 1));
14864
14865 // If we have done with the arguments and still not consumed the comma,
14866 // then we have a trailing comma where we need to check whether it is
14867 // allowed or not.
14868 if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
14869 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type));
14870 }
14871
14872 pm_accepts_block_stack_pop(parser);
14873 }
14874
14875 // If we're at the end of the arguments, we can now check if there is a block
14876 // node that starts with a {. If there is, then we can parse it and add it to
14877 // the arguments.
14878 if (accepts_block) {
14879 pm_block_node_t *block = NULL;
14880
14881 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
14882 found |= true;
14883 block = parse_block(parser, (uint16_t) (depth + 1));
14884 pm_arguments_validate_block(parser, arguments, block);
14885 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
14886 found |= true;
14887 block = parse_block(parser, (uint16_t) (depth + 1));
14888 }
14889
14890 if (block != NULL) {
14891 if (arguments->block == NULL && !arguments->has_forwarding) {
14892 arguments->block = UP(block);
14893 } else {
14894 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_BLOCK_MULTI);
14895
14896 if (arguments->block != NULL) {
14897 if (arguments->arguments == NULL) {
14898 arguments->arguments = pm_arguments_node_create(parser);
14899 }
14900 pm_arguments_node_arguments_append(parser->arena, arguments->arguments, arguments->block);
14901 }
14902 arguments->block = UP(block);
14903 }
14904 }
14905 }
14906
14907 return found;
14908}
14909
14914static void
14915parse_return(pm_parser_t *parser, pm_node_t *node) {
14916 bool in_sclass = false;
14917 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
14918 switch (context_node->context) {
14922 case PM_CONTEXT_BEGIN:
14923 case PM_CONTEXT_CASE_IN:
14926 case PM_CONTEXT_DEFINED:
14927 case PM_CONTEXT_ELSE:
14928 case PM_CONTEXT_ELSIF:
14929 case PM_CONTEXT_EMBEXPR:
14931 case PM_CONTEXT_FOR:
14932 case PM_CONTEXT_IF:
14934 case PM_CONTEXT_MAIN:
14936 case PM_CONTEXT_PARENS:
14937 case PM_CONTEXT_POSTEXE:
14939 case PM_CONTEXT_PREEXE:
14941 case PM_CONTEXT_TERNARY:
14942 case PM_CONTEXT_UNLESS:
14943 case PM_CONTEXT_UNTIL:
14944 case PM_CONTEXT_WHILE:
14945 // Keep iterating up the lists of contexts, because returns can
14946 // see through these.
14947 continue;
14951 case PM_CONTEXT_SCLASS:
14952 in_sclass = true;
14953 continue;
14957 case PM_CONTEXT_CLASS:
14961 case PM_CONTEXT_MODULE:
14962 // These contexts are invalid for a return.
14963 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
14964 return;
14975 case PM_CONTEXT_DEF:
14981 // These contexts are valid for a return, and we should not
14982 // continue to loop.
14983 return;
14984 case PM_CONTEXT_NONE:
14985 // This case should never happen.
14986 assert(false && "unreachable");
14987 break;
14988 }
14989 }
14990 if (in_sclass && parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
14991 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
14992 }
14993}
14994
14999static void
15000parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
15001 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15002 switch (context_node->context) {
15009 case PM_CONTEXT_DEFINED:
15010 case PM_CONTEXT_FOR:
15017 case PM_CONTEXT_POSTEXE:
15018 case PM_CONTEXT_UNTIL:
15019 case PM_CONTEXT_WHILE:
15020 // These are the good cases. We're allowed to have a block exit
15021 // in these contexts.
15022 return;
15023 case PM_CONTEXT_DEF:
15028 case PM_CONTEXT_MAIN:
15029 case PM_CONTEXT_PREEXE:
15030 case PM_CONTEXT_SCLASS:
15034 // These are the bad cases. We're not allowed to have a block
15035 // exit in these contexts.
15036 //
15037 // If we get here, then we're about to mark this block exit
15038 // as invalid. However, it could later _become_ valid if we
15039 // find a trailing while/until on the expression. In this
15040 // case instead of adding the error here, we'll add the
15041 // block exit to the list of exits for the expression, and
15042 // the node parsing will handle validating it instead.
15043 assert(parser->current_block_exits != NULL);
15044 pm_node_list_append(parser->arena, parser->current_block_exits, node);
15045 return;
15049 case PM_CONTEXT_BEGIN:
15050 case PM_CONTEXT_CASE_IN:
15055 case PM_CONTEXT_CLASS:
15057 case PM_CONTEXT_ELSE:
15058 case PM_CONTEXT_ELSIF:
15059 case PM_CONTEXT_EMBEXPR:
15061 case PM_CONTEXT_IF:
15065 case PM_CONTEXT_MODULE:
15067 case PM_CONTEXT_PARENS:
15070 case PM_CONTEXT_TERNARY:
15071 case PM_CONTEXT_UNLESS:
15072 // In these contexts we should continue walking up the list of
15073 // contexts.
15074 break;
15075 case PM_CONTEXT_NONE:
15076 // This case should never happen.
15077 assert(false && "unreachable");
15078 break;
15079 }
15080 }
15081}
15082
15087static pm_node_list_t *
15088push_block_exits(pm_parser_t *parser, pm_node_list_t *current_block_exits) {
15089 pm_node_list_t *previous_block_exits = parser->current_block_exits;
15090 parser->current_block_exits = current_block_exits;
15091 return previous_block_exits;
15092}
15093
15099static void
15100flush_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15101 pm_node_t *block_exit;
15102 PM_NODE_LIST_FOREACH(parser->current_block_exits, index, block_exit) {
15103 const char *type;
15104
15105 switch (PM_NODE_TYPE(block_exit)) {
15106 case PM_BREAK_NODE: type = "break"; break;
15107 case PM_NEXT_NODE: type = "next"; break;
15108 case PM_REDO_NODE: type = "redo"; break;
15109 default: assert(false && "unreachable"); type = ""; break;
15110 }
15111
15112 PM_PARSER_ERR_NODE_FORMAT(parser, block_exit, PM_ERR_INVALID_BLOCK_EXIT, type);
15113 }
15114
15115 parser->current_block_exits = previous_block_exits;
15116}
15117
15122static void
15123pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15124 if (match2(parser, PM_TOKEN_KEYWORD_WHILE_MODIFIER, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) {
15125 // If we matched a trailing while/until, then all of the block exits in
15126 // the contained list are valid. In this case we do not need to do
15127 // anything.
15128 parser->current_block_exits = previous_block_exits;
15129 } else if (previous_block_exits != NULL) {
15130 // If we did not matching a trailing while/until, then all of the block
15131 // exits contained in the list are invalid for this specific context.
15132 // However, they could still become valid in a higher level context if
15133 // there is another list above this one. In this case we'll push all of
15134 // the block exits up to the previous list.
15135 pm_node_list_concat(parser->arena, previous_block_exits, parser->current_block_exits);
15136 parser->current_block_exits = previous_block_exits;
15137 } else {
15138 // If we did not match a trailing while/until and this was the last
15139 // chance to do so, then all of the block exits in the list are invalid
15140 // and we need to add an error for each of them.
15141 flush_block_exits(parser, previous_block_exits);
15142 }
15143}
15144
15145static inline pm_node_t *
15146parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
15147 context_push(parser, PM_CONTEXT_PREDICATE);
15148 pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
15149 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, error_id, (uint16_t) (depth + 1));
15150
15151 // Predicates are closed by a term, a "then", or a term and then a "then".
15152 bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15153
15154 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15155 predicate_closed = true;
15156 *then_keyword = parser->previous;
15157 }
15158
15159 if (!predicate_closed) {
15160 pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
15161 }
15162
15163 context_pop(parser);
15164 return predicate;
15165}
15166
15167static inline pm_node_t *
15168parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
15169 pm_node_list_t current_block_exits = { 0 };
15170 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
15171
15172 pm_token_t keyword = parser->previous;
15173 pm_token_t then_keyword = { 0 };
15174
15175 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
15176 pm_statements_node_t *statements = NULL;
15177
15178 if (!match3(parser, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
15179 pm_accepts_block_stack_push(parser, true);
15180 statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15181 pm_accepts_block_stack_pop(parser);
15182 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15183 }
15184
15185 pm_node_t *parent = NULL;
15186
15187 switch (context) {
15188 case PM_CONTEXT_IF:
15189 parent = UP(pm_if_node_create(parser, &keyword, predicate, NTOK2PTR(then_keyword), statements, NULL, NULL));
15190 break;
15191 case PM_CONTEXT_UNLESS:
15192 parent = UP(pm_unless_node_create(parser, &keyword, predicate, NTOK2PTR(then_keyword), statements));
15193 break;
15194 default:
15195 assert(false && "unreachable");
15196 break;
15197 }
15198
15199 pm_node_t *current = parent;
15200
15201 // Parse any number of elsif clauses. This will form a linked list of if
15202 // nodes pointing to each other from the top.
15203 if (context == PM_CONTEXT_IF) {
15204 while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
15205 if (parser_end_of_line_p(parser)) {
15206 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_KEYWORD_EOL);
15207 }
15208
15209 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15210 pm_token_t elsif_keyword = parser->current;
15211 parser_lex(parser);
15212
15213 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
15214 pm_accepts_block_stack_push(parser, true);
15215
15216 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF, (uint16_t) (depth + 1));
15217 pm_accepts_block_stack_pop(parser);
15218 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15219
15220 pm_node_t *elsif = UP(pm_if_node_create(parser, &elsif_keyword, predicate, NTOK2PTR(then_keyword), statements, NULL, NULL));
15221 ((pm_if_node_t *) current)->subsequent = elsif;
15222 current = elsif;
15223 }
15224 }
15225
15226 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15227 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15228 opening_newline_index = token_newline_index(parser);
15229
15230 parser_lex(parser);
15231 pm_token_t else_keyword = parser->previous;
15232
15233 pm_accepts_block_stack_push(parser, true);
15234 pm_statements_node_t *else_statements = parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1));
15235 pm_accepts_block_stack_pop(parser);
15236
15237 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15238 parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
15239 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE, &keyword);
15240
15241 pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
15242
15243 switch (context) {
15244 case PM_CONTEXT_IF:
15245 ((pm_if_node_t *) current)->subsequent = UP(else_node);
15246 break;
15247 case PM_CONTEXT_UNLESS:
15248 ((pm_unless_node_t *) parent)->else_clause = else_node;
15249 break;
15250 default:
15251 assert(false && "unreachable");
15252 break;
15253 }
15254 } else {
15255 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
15256 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM, &keyword);
15257 }
15258
15259 // Set the appropriate end location for all of the nodes in the subtree.
15260 switch (context) {
15261 case PM_CONTEXT_IF: {
15262 pm_node_t *current = parent;
15263 bool recursing = true;
15264
15265 while (recursing) {
15266 switch (PM_NODE_TYPE(current)) {
15267 case PM_IF_NODE:
15268 pm_if_node_end_keyword_loc_set(parser, (pm_if_node_t *) current, &parser->previous);
15269 current = ((pm_if_node_t *) current)->subsequent;
15270 recursing = current != NULL;
15271 break;
15272 case PM_ELSE_NODE:
15273 pm_else_node_end_keyword_loc_set(parser, (pm_else_node_t *) current, &parser->previous);
15274 recursing = false;
15275 break;
15276 default: {
15277 recursing = false;
15278 break;
15279 }
15280 }
15281 }
15282 break;
15283 }
15284 case PM_CONTEXT_UNLESS:
15285 pm_unless_node_end_keyword_loc_set(parser, (pm_unless_node_t *) parent, &parser->previous);
15286 break;
15287 default:
15288 assert(false && "unreachable");
15289 break;
15290 }
15291
15292 pop_block_exits(parser, previous_block_exits);
15293 return parent;
15294}
15295
15300#define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15301 case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
15302 case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
15303 case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
15304 case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
15305 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
15306 case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
15307 case PM_TOKEN_KEYWORD_OR: case PM_TOKEN_KEYWORD_REDO: case PM_TOKEN_KEYWORD_RESCUE: case PM_TOKEN_KEYWORD_RETRY: \
15308 case PM_TOKEN_KEYWORD_RETURN: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_SUPER: case PM_TOKEN_KEYWORD_THEN: \
15309 case PM_TOKEN_KEYWORD_TRUE: case PM_TOKEN_KEYWORD_UNDEF: case PM_TOKEN_KEYWORD_UNLESS: case PM_TOKEN_KEYWORD_UNTIL: \
15310 case PM_TOKEN_KEYWORD_WHEN: case PM_TOKEN_KEYWORD_WHILE: case PM_TOKEN_KEYWORD_YIELD
15311
15316#define PM_CASE_OPERATOR PM_TOKEN_AMPERSAND: case PM_TOKEN_BACKTICK: case PM_TOKEN_BANG_EQUAL: \
15317 case PM_TOKEN_BANG_TILDE: case PM_TOKEN_BANG: case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \
15318 case PM_TOKEN_BRACKET_LEFT_RIGHT: case PM_TOKEN_CARET: case PM_TOKEN_EQUAL_EQUAL_EQUAL: case PM_TOKEN_EQUAL_EQUAL: \
15319 case PM_TOKEN_EQUAL_TILDE: case PM_TOKEN_GREATER_EQUAL: case PM_TOKEN_GREATER_GREATER: case PM_TOKEN_GREATER: \
15320 case PM_TOKEN_LESS_EQUAL_GREATER: case PM_TOKEN_LESS_EQUAL: case PM_TOKEN_LESS_LESS: case PM_TOKEN_LESS: \
15321 case PM_TOKEN_MINUS: case PM_TOKEN_PERCENT: case PM_TOKEN_PIPE: case PM_TOKEN_PLUS: case PM_TOKEN_SLASH: \
15322 case PM_TOKEN_STAR_STAR: case PM_TOKEN_STAR: case PM_TOKEN_TILDE: case PM_TOKEN_UAMPERSAND: case PM_TOKEN_UMINUS: \
15323 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_UPLUS: case PM_TOKEN_USTAR: case PM_TOKEN_USTAR_STAR
15324
15330#define PM_CASE_PRIMITIVE PM_TOKEN_INTEGER: case PM_TOKEN_INTEGER_IMAGINARY: case PM_TOKEN_INTEGER_RATIONAL: \
15331 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: case PM_TOKEN_FLOAT: case PM_TOKEN_FLOAT_IMAGINARY: \
15332 case PM_TOKEN_FLOAT_RATIONAL: case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: case PM_TOKEN_SYMBOL_BEGIN: \
15333 case PM_TOKEN_REGEXP_BEGIN: case PM_TOKEN_BACKTICK: case PM_TOKEN_PERCENT_LOWER_X: case PM_TOKEN_PERCENT_LOWER_I: \
15334 case PM_TOKEN_PERCENT_LOWER_W: case PM_TOKEN_PERCENT_UPPER_I: case PM_TOKEN_PERCENT_UPPER_W: \
15335 case PM_TOKEN_STRING_BEGIN: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_TRUE: \
15336 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15337 case PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_MINUS_GREATER: case PM_TOKEN_HEREDOC_START: \
15338 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_CHARACTER_LITERAL
15339
15344#define PM_CASE_PARAMETER PM_TOKEN_UAMPERSAND: case PM_TOKEN_AMPERSAND: case PM_TOKEN_UDOT_DOT_DOT: \
15345 case PM_TOKEN_IDENTIFIER: case PM_TOKEN_LABEL: case PM_TOKEN_USTAR: case PM_TOKEN_STAR: case PM_TOKEN_STAR_STAR: \
15346 case PM_TOKEN_USTAR_STAR: case PM_TOKEN_CONSTANT: case PM_TOKEN_INSTANCE_VARIABLE: case PM_TOKEN_GLOBAL_VARIABLE: \
15347 case PM_TOKEN_CLASS_VARIABLE
15348
15353#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
15354 case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
15355 case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
15356 case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
15357
15358// Assert here that the flags are the same so that we can safely switch the type
15359// of the node without having to move the flags.
15360PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING), "Expected the flags to match.");
15361
15366static inline pm_node_flags_t
15367parse_unescaped_encoding(const pm_parser_t *parser) {
15368 if (parser->explicit_encoding != NULL) {
15370 // If the there's an explicit encoding and it's using a UTF-8 escape
15371 // sequence, then mark the string as UTF-8.
15372 return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
15373 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
15374 // If there's a non-UTF-8 escape sequence being used, then the
15375 // string uses the source encoding, unless the source is marked as
15376 // US-ASCII. In that case the string is forced as ASCII-8BIT in
15377 // order to keep the string valid.
15378 return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
15379 }
15380 }
15381 return 0;
15382}
15383
15388static pm_node_t *
15389parse_string_part(pm_parser_t *parser, uint16_t depth) {
15390 switch (parser->current.type) {
15391 // Here the lexer has returned to us plain string content. In this case
15392 // we'll create a string node that has no opening or closing and return that
15393 // as the part. These kinds of parts look like:
15394 //
15395 // "aaa #{bbb} #@ccc ddd"
15396 // ^^^^ ^ ^^^^
15397 case PM_TOKEN_STRING_CONTENT: {
15398 pm_node_t *node = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
15399 pm_node_flag_set(node, parse_unescaped_encoding(parser));
15400
15401 parser_lex(parser);
15402 return node;
15403 }
15404 // Here the lexer has returned the beginning of an embedded expression. In
15405 // that case we'll parse the inner statements and return that as the part.
15406 // These kinds of parts look like:
15407 //
15408 // "aaa #{bbb} #@ccc ddd"
15409 // ^^^^^^
15410 case PM_TOKEN_EMBEXPR_BEGIN: {
15411 // Ruby disallows seeing encoding around interpolation in strings,
15412 // even though it is known at parse time.
15413 parser->explicit_encoding = NULL;
15414
15415 pm_lex_state_t state = parser->lex_state;
15416 int brace_nesting = parser->brace_nesting;
15417
15418 parser->brace_nesting = 0;
15419 lex_state_set(parser, PM_LEX_STATE_BEG);
15420 parser_lex(parser);
15421
15422 pm_token_t opening = parser->previous;
15423 pm_statements_node_t *statements = NULL;
15424
15425 if (!match3(parser, PM_TOKEN_EMBEXPR_END, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
15426 pm_accepts_block_stack_push(parser, true);
15427 statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
15428 pm_accepts_block_stack_pop(parser);
15429 }
15430
15431 parser->brace_nesting = brace_nesting;
15432 lex_state_set(parser, state);
15433 expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
15434
15435 // If this set of embedded statements only contains a single
15436 // statement, then Ruby does not consider it as a possible statement
15437 // that could emit a line event.
15438 if (statements != NULL && statements->body.size == 1) {
15439 pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
15440 }
15441
15442 return UP(pm_embedded_statements_node_create(parser, &opening, statements, &parser->previous));
15443 }
15444
15445 // Here the lexer has returned the beginning of an embedded variable.
15446 // In that case we'll parse the variable and create an appropriate node
15447 // for it and then return that node. These kinds of parts look like:
15448 //
15449 // "aaa #{bbb} #@ccc ddd"
15450 // ^^^^^
15451 case PM_TOKEN_EMBVAR: {
15452 // Ruby disallows seeing encoding around interpolation in strings,
15453 // even though it is known at parse time.
15454 parser->explicit_encoding = NULL;
15455
15456 lex_state_set(parser, PM_LEX_STATE_BEG);
15457 parser_lex(parser);
15458
15459 pm_token_t operator = parser->previous;
15460 pm_node_t *variable;
15461
15462 switch (parser->current.type) {
15463 // In this case a back reference is being interpolated. We'll
15464 // create a global variable read node.
15465 case PM_TOKEN_BACK_REFERENCE:
15466 parser_lex(parser);
15467 variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
15468 break;
15469 // In this case an nth reference is being interpolated. We'll
15470 // create a global variable read node.
15471 case PM_TOKEN_NUMBERED_REFERENCE:
15472 parser_lex(parser);
15473 variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
15474 break;
15475 // In this case a global variable is being interpolated. We'll
15476 // create a global variable read node.
15477 case PM_TOKEN_GLOBAL_VARIABLE:
15478 parser_lex(parser);
15479 variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
15480 break;
15481 // In this case an instance variable is being interpolated.
15482 // We'll create an instance variable read node.
15483 case PM_TOKEN_INSTANCE_VARIABLE:
15484 parser_lex(parser);
15485 variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
15486 break;
15487 // In this case a class variable is being interpolated. We'll
15488 // create a class variable read node.
15489 case PM_TOKEN_CLASS_VARIABLE:
15490 parser_lex(parser);
15491 variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
15492 break;
15493 // We can hit here if we got an invalid token. In that case
15494 // we'll not attempt to lex this token and instead just return a
15495 // missing node.
15496 default:
15497 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
15498 variable = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
15499 break;
15500 }
15501
15502 return UP(pm_embedded_variable_node_create(parser, &operator, variable));
15503 }
15504 default:
15505 parser_lex(parser);
15506 pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
15507 return NULL;
15508 }
15509}
15510
15516static const uint8_t *
15517parse_operator_symbol_name(const pm_token_t *name) {
15518 switch (name->type) {
15519 case PM_TOKEN_TILDE:
15520 case PM_TOKEN_BANG:
15521 if (name->end[-1] == '@') return name->end - 1;
15523 default:
15524 return name->end;
15525 }
15526}
15527
15528static pm_node_t *
15529parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
15530 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, NULL);
15531 const uint8_t *end = parse_operator_symbol_name(&parser->current);
15532
15533 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15534 parser_lex(parser);
15535
15536 pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
15537 pm_node_flag_set(UP(symbol), PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
15538
15539 return UP(symbol);
15540}
15541
15547static pm_node_t *
15548parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state, uint16_t depth) {
15549 const pm_token_t opening = parser->previous;
15550
15551 if (lex_mode->mode != PM_LEX_STRING) {
15552 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15553
15554 switch (parser->current.type) {
15555 case PM_CASE_OPERATOR:
15556 return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
15557 case PM_TOKEN_IDENTIFIER:
15558 case PM_TOKEN_CONSTANT:
15559 case PM_TOKEN_INSTANCE_VARIABLE:
15560 case PM_TOKEN_METHOD_NAME:
15561 case PM_TOKEN_CLASS_VARIABLE:
15562 case PM_TOKEN_GLOBAL_VARIABLE:
15563 case PM_TOKEN_NUMBERED_REFERENCE:
15564 case PM_TOKEN_BACK_REFERENCE:
15565 case PM_CASE_KEYWORD:
15566 parser_lex(parser);
15567 break;
15568 default:
15569 expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
15570 break;
15571 }
15572
15573 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, NULL);
15574 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15575 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15576
15577 return UP(symbol);
15578 }
15579
15580 if (lex_mode->as.string.interpolation) {
15581 // If we have the end of the symbol, then we can return an empty symbol.
15582 if (match1(parser, PM_TOKEN_STRING_END)) {
15583 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15584 parser_lex(parser);
15585 pm_token_t content = {
15586 .type = PM_TOKEN_STRING_CONTENT,
15587 .start = parser->previous.start,
15588 .end = parser->previous.start
15589 };
15590
15591 return UP(pm_symbol_node_create(parser, &opening, &content, &parser->previous));
15592 }
15593
15594 // Now we can parse the first part of the symbol.
15595 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
15596
15597 // If we got a string part, then it's possible that we could transform
15598 // what looks like an interpolated symbol into a regular symbol.
15599 if (part && PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15600 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15601 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15602
15603 return UP(pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous));
15604 }
15605
15606 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
15607 if (part) pm_interpolated_symbol_node_append(parser->arena, symbol, part);
15608
15609 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15610 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
15611 pm_interpolated_symbol_node_append(parser->arena, symbol, part);
15612 }
15613 }
15614
15615 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15616 if (match1(parser, PM_TOKEN_EOF)) {
15617 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15618 } else {
15619 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15620 }
15621
15622 pm_interpolated_symbol_node_closing_loc_set(parser, symbol, &parser->previous);
15623 return UP(symbol);
15624 }
15625
15626 pm_token_t content;
15627 pm_string_t unescaped;
15628
15629 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15630 content = parser->current;
15631 unescaped = parser->current_string;
15632 parser_lex(parser);
15633
15634 // If we have two string contents in a row, then the content of this
15635 // symbol is split because of heredoc contents. This looks like:
15636 //
15637 // <<A; :'a
15638 // A
15639 // b'
15640 //
15641 // In this case, the best way we have to represent this is as an
15642 // interpolated string node, so that's what we'll do here.
15643 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15644 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
15645 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &unescaped));
15646 pm_interpolated_symbol_node_append(parser->arena, symbol, part);
15647
15648 part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->current, NULL, &parser->current_string));
15649 pm_interpolated_symbol_node_append(parser->arena, symbol, part);
15650
15651 if (next_state != PM_LEX_STATE_NONE) {
15652 lex_state_set(parser, next_state);
15653 }
15654
15655 parser_lex(parser);
15656 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
15657
15658 pm_interpolated_symbol_node_closing_loc_set(parser, symbol, &parser->previous);
15659 return UP(symbol);
15660 }
15661 } else {
15662 content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
15663 pm_string_shared_init(&unescaped, content.start, content.end);
15664 }
15665
15666 if (next_state != PM_LEX_STATE_NONE) {
15667 lex_state_set(parser, next_state);
15668 }
15669
15670 if (match1(parser, PM_TOKEN_EOF)) {
15671 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
15672 } else {
15673 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
15674 }
15675
15676 return UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false)));
15677}
15678
15683static inline pm_node_t *
15684parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
15685 switch (parser->current.type) {
15686 case PM_CASE_OPERATOR:
15687 return parse_operator_symbol(parser, NULL, PM_LEX_STATE_NONE);
15688 case PM_CASE_KEYWORD:
15689 case PM_TOKEN_CONSTANT:
15690 case PM_TOKEN_IDENTIFIER:
15691 case PM_TOKEN_METHOD_NAME: {
15692 parser_lex(parser);
15693
15694 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, NULL, &parser->previous, NULL);
15695 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15696 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15697
15698 return UP(symbol);
15699 }
15700 case PM_TOKEN_SYMBOL_BEGIN: {
15701 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
15702 parser_lex(parser);
15703
15704 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
15705 }
15706 default:
15707 pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
15708 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
15709 }
15710}
15711
15718static inline pm_node_t *
15719parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
15720 switch (parser->current.type) {
15721 case PM_CASE_OPERATOR:
15722 return parse_operator_symbol(parser, NULL, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
15723 case PM_CASE_KEYWORD:
15724 case PM_TOKEN_CONSTANT:
15725 case PM_TOKEN_IDENTIFIER:
15726 case PM_TOKEN_METHOD_NAME: {
15727 if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
15728 parser_lex(parser);
15729
15730 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, NULL, &parser->previous, NULL);
15731 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15732 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15733
15734 return UP(symbol);
15735 }
15736 case PM_TOKEN_SYMBOL_BEGIN: {
15737 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
15738 parser_lex(parser);
15739
15740 return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
15741 }
15742 case PM_TOKEN_BACK_REFERENCE:
15743 parser_lex(parser);
15744 return UP(pm_back_reference_read_node_create(parser, &parser->previous));
15745 case PM_TOKEN_NUMBERED_REFERENCE:
15746 parser_lex(parser);
15747 return UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
15748 case PM_TOKEN_GLOBAL_VARIABLE:
15749 parser_lex(parser);
15750 return UP(pm_global_variable_read_node_create(parser, &parser->previous));
15751 default:
15752 pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
15753 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
15754 }
15755}
15756
15761static pm_node_t *
15762parse_variable(pm_parser_t *parser) {
15763 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
15764 int depth;
15765 bool is_numbered_param = pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous));
15766
15767 if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
15768 return UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false));
15769 }
15770
15771 pm_scope_t *current_scope = parser->current_scope;
15772 if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
15773 if (is_numbered_param) {
15774 // When you use a numbered parameter, it implies the existence of
15775 // all of the locals that exist before it. For example, referencing
15776 // _2 means that _1 must exist. Therefore here we loop through all
15777 // of the possibilities and add them into the constant pool.
15778 uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
15779 for (uint8_t number = 1; number <= maximum; number++) {
15780 pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
15781 }
15782
15783 if (!match1(parser, PM_TOKEN_EQUAL)) {
15784 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
15785 }
15786
15787 pm_node_t *node = UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false));
15788 pm_node_list_append(parser->arena, &current_scope->implicit_parameters, node);
15789
15790 return node;
15791 } else if ((parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
15792 pm_node_t *node = UP(pm_it_local_variable_read_node_create(parser, &parser->previous));
15793 pm_node_list_append(parser->arena, &current_scope->implicit_parameters, node);
15794
15795 return node;
15796 }
15797 }
15798
15799 return NULL;
15800}
15801
15805static pm_node_t *
15806parse_variable_call(pm_parser_t *parser) {
15807 pm_node_flags_t flags = 0;
15808
15809 if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
15810 pm_node_t *node = parse_variable(parser);
15811 if (node != NULL) return node;
15812 flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
15813 }
15814
15815 pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
15816 pm_node_flag_set(UP(node), flags);
15817
15818 return UP(node);
15819}
15820
15826static inline pm_token_t
15827parse_method_definition_name(pm_parser_t *parser) {
15828 switch (parser->current.type) {
15829 case PM_CASE_KEYWORD:
15830 case PM_TOKEN_CONSTANT:
15831 case PM_TOKEN_METHOD_NAME:
15832 parser_lex(parser);
15833 return parser->previous;
15834 case PM_TOKEN_IDENTIFIER:
15835 pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current));
15836 parser_lex(parser);
15837 return parser->previous;
15838 case PM_CASE_OPERATOR:
15839 lex_state_set(parser, PM_LEX_STATE_ENDFN);
15840 parser_lex(parser);
15841 return parser->previous;
15842 default:
15843 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type));
15844 return (pm_token_t) { .type = 0, .start = parser->current.start, .end = parser->current.end };
15845 }
15846}
15847
15848static void
15849parse_heredoc_dedent_string(pm_arena_t *arena, pm_string_t *string, size_t common_whitespace) {
15850 // Make a writable copy in the arena if the string isn't already writable.
15851 // We keep a mutable pointer to the arena memory so we can memmove into it
15852 // below without casting away const from the string's source field.
15853 uint8_t *writable;
15854
15855 if (string->type != PM_STRING_OWNED) {
15856 size_t length = pm_string_length(string);
15857 writable = (uint8_t *) pm_arena_memdup(arena, pm_string_source(string), length, PRISM_ALIGNOF(uint8_t));
15858 pm_string_constant_init(string, (const char *) writable, length);
15859 } else {
15860 writable = (uint8_t *) string->source;
15861 }
15862
15863 // Now get the bounds of the existing string. We'll use this as a
15864 // destination to move bytes into. We'll also use it for bounds checking
15865 // since we don't require that these strings be null terminated.
15866 size_t dest_length = pm_string_length(string);
15867 const uint8_t *source_cursor = writable;
15868 const uint8_t *source_end = source_cursor + dest_length;
15869
15870 // We're going to move bytes backward in the string when we get leading
15871 // whitespace, so we'll maintain a pointer to the current position in the
15872 // string that we're writing to.
15873 size_t trimmed_whitespace = 0;
15874
15875 // While we haven't reached the amount of common whitespace that we need to
15876 // trim and we haven't reached the end of the string, we'll keep trimming
15877 // whitespace. Trimming in this context means skipping over these bytes such
15878 // that they aren't copied into the new string.
15879 while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
15880 if (*source_cursor == '\t') {
15881 trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
15882 if (trimmed_whitespace > common_whitespace) break;
15883 } else {
15884 trimmed_whitespace++;
15885 }
15886
15887 source_cursor++;
15888 dest_length--;
15889 }
15890
15891 memmove(writable, source_cursor, (size_t) (source_end - source_cursor));
15892 string->length = dest_length;
15893}
15894
15899static inline bool
15900heredoc_dedent_discard_string_node(pm_parser_t *parser, pm_string_node_t *string_node) {
15901 if (string_node->unescaped.length == 0) {
15902 const uint8_t *cursor = parser->start + PM_LOCATION_START(&string_node->content_loc);
15903 return pm_memchr(cursor, '\\', string_node->content_loc.length, parser->encoding_changed, parser->encoding) == NULL;
15904 }
15905 return false;
15906}
15907
15911static void
15912parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
15913 // The next node should be dedented if it's the first node in the list or if
15914 // it follows a string node.
15915 bool dedent_next = true;
15916
15917 // Iterate over all nodes, and trim whitespace accordingly. We're going to
15918 // keep around two indices: a read and a write.
15919 size_t write_index = 0;
15920
15921 pm_node_t *node;
15922 PM_NODE_LIST_FOREACH(nodes, read_index, node) {
15923 // We're not manipulating child nodes that aren't strings. In this case
15924 // we'll skip past it and indicate that the subsequent node should not
15925 // be dedented.
15926 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) {
15927 nodes->nodes[write_index++] = node;
15928 dedent_next = false;
15929 continue;
15930 }
15931
15932 pm_string_node_t *string_node = ((pm_string_node_t *) node);
15933 if (dedent_next) {
15934 parse_heredoc_dedent_string(parser->arena, &string_node->unescaped, common_whitespace);
15935 }
15936
15937 if (heredoc_dedent_discard_string_node(parser, string_node)) {
15938 } else {
15939 nodes->nodes[write_index++] = node;
15940 }
15941
15942 // We always dedent the next node if it follows a string node.
15943 dedent_next = true;
15944 }
15945
15946 nodes->size = write_index;
15947}
15948
15952static pm_token_t
15953parse_strings_empty_content(const uint8_t *location) {
15954 return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
15955}
15956
15960static inline pm_node_t *
15961parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
15962 assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
15963 bool concating = false;
15964
15965 while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
15966 pm_node_t *node = NULL;
15967
15968 // Here we have found a string literal. We'll parse it and add it to
15969 // the list of strings.
15970 const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
15971 assert(lex_mode->mode == PM_LEX_STRING);
15972 bool lex_interpolation = lex_mode->as.string.interpolation;
15973 bool label_allowed = lex_mode->as.string.label_allowed && accepts_label;
15974
15975 pm_token_t opening = parser->current;
15976 parser_lex(parser);
15977
15978 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15979 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
15980 // If we get here, then we have an end immediately after a
15981 // start. In that case we'll create an empty content token and
15982 // return an uninterpolated string.
15983 pm_token_t content = parse_strings_empty_content(parser->previous.start);
15984 pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
15985
15986 pm_string_shared_init(&string->unescaped, content.start, content.end);
15987 node = UP(string);
15988 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
15989 // If we get here, then we have an end of a label immediately
15990 // after a start. In that case we'll create an empty symbol
15991 // node.
15992 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, NULL, &parser->previous);
15993 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.start);
15994 node = UP(symbol);
15995
15996 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
15997 } else if (!lex_interpolation) {
15998 // If we don't accept interpolation then we expect the string to
15999 // start with a single string content node.
16000 pm_string_t unescaped;
16001 pm_token_t content;
16002
16003 if (match1(parser, PM_TOKEN_EOF)) {
16004 unescaped = PM_STRING_EMPTY;
16005 content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->start, .end = parser->start };
16006 } else {
16007 unescaped = parser->current_string;
16008 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16009 content = parser->previous;
16010 }
16011
16012 // It is unfortunately possible to have multiple string content
16013 // nodes in a row in the case that there's heredoc content in
16014 // the middle of the string, like this cursed example:
16015 //
16016 // <<-END+'b
16017 // a
16018 // END
16019 // c'+'d'
16020 //
16021 // In that case we need to switch to an interpolated string to
16022 // be able to contain all of the parts.
16023 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16024 pm_node_list_t parts = { 0 };
16025 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &unescaped));
16026 pm_node_list_append(parser->arena, &parts, part);
16027
16028 do {
16029 part = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
16030 pm_node_list_append(parser->arena, &parts, part);
16031 parser_lex(parser);
16032 } while (match1(parser, PM_TOKEN_STRING_CONTENT));
16033
16034 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16035 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16036 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16037 node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
16038 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16039 } else if (match1(parser, PM_TOKEN_EOF)) {
16040 pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16041 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
16042 } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16043 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
16044 } else {
16045 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
16046 parser->previous.start = parser->previous.end;
16047 parser->previous.type = 0;
16048 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
16049 }
16050 } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16051 // In this case we've hit string content so we know the string
16052 // at least has something in it. We'll need to check if the
16053 // following token is the end (in which case we can return a
16054 // plain string) or if it's not then it has interpolation.
16055 pm_token_t content = parser->current;
16056 pm_string_t unescaped = parser->current_string;
16057 parser_lex(parser);
16058
16059 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16060 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
16061 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16062
16063 // Kind of odd behavior, but basically if we have an
16064 // unterminated string and it ends in a newline, we back up one
16065 // character so that the error message is on the last line of
16066 // content in the string.
16067 if (!accept1(parser, PM_TOKEN_STRING_END)) {
16068 const uint8_t *location = parser->previous.end;
16069 if (location > parser->start && location[-1] == '\n') location--;
16070 pm_parser_err(parser, U32(location - parser->start), 0, PM_ERR_STRING_LITERAL_EOF);
16071
16072 parser->previous.start = parser->previous.end;
16073 parser->previous.type = 0;
16074 }
16075 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16076 node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
16077 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16078 } else {
16079 // If we get here, then we have interpolation so we'll need
16080 // to create a string or symbol node with interpolation.
16081 pm_node_list_t parts = { 0 };
16082 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
16083 pm_node_flag_set(part, parse_unescaped_encoding(parser));
16084 pm_node_list_append(parser->arena, &parts, part);
16085
16086 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16087 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16088 pm_node_list_append(parser->arena, &parts, part);
16089 }
16090 }
16091
16092 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16093 node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
16094 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16095 } else if (match1(parser, PM_TOKEN_EOF)) {
16096 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16097 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
16098 } else {
16099 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16100 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16101 }
16102 }
16103 } else {
16104 // If we get here, then the first part of the string is not plain
16105 // string content, in which case we need to parse the string as an
16106 // interpolated string.
16107 pm_node_list_t parts = { 0 };
16108 pm_node_t *part;
16109
16110 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16111 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16112 pm_node_list_append(parser->arena, &parts, part);
16113 }
16114 }
16115
16116 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16117 node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
16118 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16119 } else if (match1(parser, PM_TOKEN_EOF)) {
16120 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16121 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
16122 } else {
16123 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16124 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16125 }
16126 }
16127
16128 if (current == NULL) {
16129 // If the node we just parsed is a symbol node, then we can't
16130 // concatenate it with anything else, so we can now return that
16131 // node.
16132 if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
16133 return node;
16134 }
16135
16136 // If we don't already have a node, then it's fine and we can just
16137 // set the result to be the node we just parsed.
16138 current = node;
16139 } else {
16140 // Otherwise we need to check the type of the node we just parsed.
16141 // If it cannot be concatenated with the previous node, then we'll
16142 // need to add a syntax error.
16143 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
16144 pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16145 }
16146
16147 // If we haven't already created our container for concatenation,
16148 // we'll do that now.
16149 if (!concating) {
16150 if (!PM_NODE_TYPE_P(current, PM_STRING_NODE) && !PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
16151 pm_parser_err_node(parser, current, PM_ERR_STRING_CONCATENATION);
16152 }
16153
16154 concating = true;
16155 pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
16156 pm_interpolated_string_node_append(parser->arena, container, current);
16157 current = UP(container);
16158 }
16159
16160 pm_interpolated_string_node_append(parser->arena, (pm_interpolated_string_node_t *) current, node);
16161 }
16162 }
16163
16164 return current;
16165}
16166
16167#define PM_PARSE_PATTERN_SINGLE 0
16168#define PM_PARSE_PATTERN_TOP 1
16169#define PM_PARSE_PATTERN_MULTI 2
16170
16171static pm_node_t *
16172parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
16173
16179static void
16180parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
16181 // Skip this capture if it starts with an underscore.
16182 if (peek_at(parser, parser->start + location->start) == '_') return;
16183
16184 if (pm_constant_id_list_includes(captures, capture)) {
16185 pm_parser_err(parser, location->start, location->length, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
16186 } else {
16187 pm_constant_id_list_append(parser->arena, captures, capture);
16188 }
16189}
16190
16194static pm_node_t *
16195parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node, uint16_t depth) {
16196 // Now, if there are any :: operators that follow, parse them as constant
16197 // path nodes.
16198 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
16199 pm_token_t delimiter = parser->previous;
16200 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16201 node = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
16202 }
16203
16204 // If there is a [ or ( that follows, then this is part of a larger pattern
16205 // expression. We'll parse the inner pattern here, then modify the returned
16206 // inner pattern with our constant path attached.
16207 if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
16208 return node;
16209 }
16210
16211 pm_token_t opening;
16212 pm_token_t closing;
16213 pm_node_t *inner = NULL;
16214
16215 if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
16216 opening = parser->previous;
16217 accept1(parser, PM_TOKEN_NEWLINE);
16218
16219 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16220 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16221 accept1(parser, PM_TOKEN_NEWLINE);
16222 expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
16223 }
16224
16225 closing = parser->previous;
16226 } else {
16227 parser_lex(parser);
16228 opening = parser->previous;
16229 accept1(parser, PM_TOKEN_NEWLINE);
16230
16231 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
16232 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16233 accept1(parser, PM_TOKEN_NEWLINE);
16234 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
16235 }
16236
16237 closing = parser->previous;
16238 }
16239
16240 if (!inner) {
16241 // If there was no inner pattern, then we have something like Foo() or
16242 // Foo[]. In that case we'll create an array pattern with no requireds.
16243 return UP(pm_array_pattern_node_constant_create(parser, node, &opening, &closing));
16244 }
16245
16246 // Now that we have the inner pattern, check to see if it's an array, find,
16247 // or hash pattern. If it is, then we'll attach our constant path to it if
16248 // it doesn't already have a constant. If it's not one of those node types
16249 // or it does have a constant, then we'll create an array pattern.
16250 switch (PM_NODE_TYPE(inner)) {
16251 case PM_ARRAY_PATTERN_NODE: {
16252 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16253
16254 if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
16255 PM_NODE_START_SET_NODE(pattern_node, node);
16256 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16257
16258 pattern_node->constant = node;
16259 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16260 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16261
16262 return UP(pattern_node);
16263 }
16264
16265 break;
16266 }
16267 case PM_FIND_PATTERN_NODE: {
16268 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16269
16270 if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
16271 PM_NODE_START_SET_NODE(pattern_node, node);
16272 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16273
16274 pattern_node->constant = node;
16275 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16276 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16277
16278 return UP(pattern_node);
16279 }
16280
16281 break;
16282 }
16283 case PM_HASH_PATTERN_NODE: {
16284 pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
16285
16286 if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
16287 PM_NODE_START_SET_NODE(pattern_node, node);
16288 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16289
16290 pattern_node->constant = node;
16291 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16292 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16293
16294 return UP(pattern_node);
16295 }
16296
16297 break;
16298 }
16299 default:
16300 break;
16301 }
16302
16303 // If we got here, then we didn't return one of the inner patterns by
16304 // attaching its constant. In this case we'll create an array pattern and
16305 // attach our constant to it.
16306 pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16307 pm_array_pattern_node_requireds_append(parser->arena, pattern_node, inner);
16308 return UP(pattern_node);
16309}
16310
16314static pm_splat_node_t *
16315parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16316 assert(parser->previous.type == PM_TOKEN_USTAR);
16317 pm_token_t operator = parser->previous;
16318 pm_node_t *name = NULL;
16319
16320 // Rest patterns don't necessarily have a name associated with them. So we
16321 // will check for that here. If they do, then we'll add it to the local
16322 // table since this pattern will cause it to become a local variable.
16323 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16324 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16325
16326 int depth;
16327 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16328 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16329 }
16330
16331 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
16332 name = UP(pm_local_variable_target_node_create(
16333 parser,
16334 &TOK2LOC(parser, &parser->previous),
16335 constant_id,
16336 (uint32_t) (depth == -1 ? 0 : depth)
16337 ));
16338 }
16339
16340 // Finally we can return the created node.
16341 return pm_splat_node_create(parser, &operator, name);
16342}
16343
16347static pm_node_t *
16348parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16349 assert(parser->current.type == PM_TOKEN_USTAR_STAR);
16350 parser_lex(parser);
16351
16352 pm_token_t operator = parser->previous;
16353 pm_node_t *value = NULL;
16354
16355 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
16356 return UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
16357 }
16358
16359 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16360 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16361
16362 int depth;
16363 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16364 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16365 }
16366
16367 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
16368 value = UP(pm_local_variable_target_node_create(
16369 parser,
16370 &TOK2LOC(parser, &parser->previous),
16371 constant_id,
16372 (uint32_t) (depth == -1 ? 0 : depth)
16373 ));
16374 }
16375
16376 return UP(pm_assoc_splat_node_create(parser, value, &operator));
16377}
16378
16383static bool
16384pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
16385 ptrdiff_t length = end - start;
16386 if (length == 0) return false;
16387
16388 // First ensure that it starts with a valid identifier starting character.
16389 size_t width = char_is_identifier_start(parser, start, end - start);
16390 if (width == 0) return false;
16391
16392 // Next, ensure that it's not an uppercase character.
16393 if (parser->encoding_changed) {
16394 if (parser->encoding->isupper_char(start, length)) return false;
16395 } else {
16396 if (pm_encoding_utf_8_isupper_char(start, length)) return false;
16397 }
16398
16399 // Next, iterate through all of the bytes of the string to ensure that they
16400 // are all valid identifier characters.
16401 const uint8_t *cursor = start + width;
16402 while ((width = char_is_identifier(parser, cursor, end - cursor))) cursor += width;
16403 return cursor == end;
16404}
16405
16410static pm_node_t *
16411parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
16412 const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
16413 const uint8_t *start = parser->start + PM_LOCATION_START(value_loc);
16414 const uint8_t *end = parser->start + PM_LOCATION_END(value_loc);
16415
16416 pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, start, end);
16417 int depth = -1;
16418
16419 if (pm_slice_is_valid_local(parser, start, end)) {
16420 depth = pm_parser_local_depth_constant_id(parser, constant_id);
16421 } else {
16422 pm_parser_err(parser, PM_NODE_START(key), PM_NODE_LENGTH(key), PM_ERR_PATTERN_HASH_KEY_LOCALS);
16423
16424 if ((end > start) && ((end[-1] == '!') || (end[-1] == '?'))) {
16425 PM_PARSER_ERR_FORMAT(parser, value_loc->start, value_loc->length, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (end - start), (const char *) start);
16426 }
16427 }
16428
16429 if (depth == -1) {
16430 pm_parser_local_add(parser, constant_id, start, end, 0);
16431 }
16432
16433 parse_pattern_capture(parser, captures, constant_id, value_loc);
16434 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
16435 parser,
16436 value_loc,
16437 constant_id,
16438 (uint32_t) (depth == -1 ? 0 : depth)
16439 );
16440
16441 return UP(pm_implicit_node_create(parser, UP(target)));
16442}
16443
16448static void
16449parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
16450 if (pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, keys, node, true) != NULL) {
16451 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
16452 }
16453}
16454
16459parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, uint16_t depth) {
16460 pm_node_list_t assocs = { 0 };
16461 pm_static_literals_t keys = { 0 };
16462 pm_node_t *rest = NULL;
16463
16464 switch (PM_NODE_TYPE(first_node)) {
16465 case PM_ASSOC_SPLAT_NODE:
16466 case PM_NO_KEYWORDS_PARAMETER_NODE:
16467 rest = first_node;
16468 break;
16469 case PM_SYMBOL_NODE: {
16470 if (pm_symbol_node_label_p(parser, first_node)) {
16471 parse_pattern_hash_key(parser, &keys, first_node);
16472 pm_node_t *value;
16473
16474 if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
16475 // Otherwise, we will create an implicit local variable
16476 // target for the value.
16477 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
16478 } else {
16479 // Here we have a value for the first assoc in the list, so
16480 // we will parse it now.
16481 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
16482 }
16483
16484 pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, NULL, value));
16485 pm_node_list_append(parser->arena, &assocs, assoc);
16486 break;
16487 }
16488 }
16490 default: {
16491 // If we get anything else, then this is an error. For this we'll
16492 // create a missing node for the value and create an assoc node for
16493 // the first node in the list.
16494 pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
16495 pm_parser_err_node(parser, first_node, diag_id);
16496
16497 pm_node_t *value = UP(pm_missing_node_create(parser, PM_NODE_START(first_node), PM_NODE_LENGTH(first_node)));
16498 pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, NULL, value));
16499
16500 pm_node_list_append(parser->arena, &assocs, assoc);
16501 break;
16502 }
16503 }
16504
16505 // If there are any other assocs, then we'll parse them now.
16506 while (accept1(parser, PM_TOKEN_COMMA)) {
16507 // Here we need to break to support trailing commas.
16508 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
16509 // Trailing commas are not allowed to follow a rest pattern.
16510 if (rest != NULL) {
16511 pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16512 }
16513
16514 break;
16515 }
16516
16517 if (match1(parser, PM_TOKEN_USTAR_STAR)) {
16518 pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures);
16519
16520 if (rest == NULL) {
16521 rest = assoc;
16522 } else {
16523 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16524 pm_node_list_append(parser->arena, &assocs, assoc);
16525 }
16526 } else {
16527 pm_node_t *key;
16528
16529 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16530 key = parse_strings(parser, NULL, true, (uint16_t) (depth + 1));
16531
16532 if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
16533 pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
16534 } else if (!pm_symbol_node_label_p(parser, key)) {
16535 pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16536 }
16537 } else if (accept1(parser, PM_TOKEN_LABEL)) {
16538 key = UP(pm_symbol_node_label_create(parser, &parser->previous));
16539 } else {
16540 expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16541
16542 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = parser->previous.end, .end = parser->previous.end };
16543 key = UP(pm_symbol_node_create(parser, NULL, &label, NULL));
16544 }
16545
16546 parse_pattern_hash_key(parser, &keys, key);
16547 pm_node_t *value = NULL;
16548
16549 if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
16550 if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) {
16551 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
16552 } else {
16553 value = UP(pm_missing_node_create(parser, PM_NODE_END(key), 0));
16554 }
16555 } else {
16556 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
16557 }
16558
16559 pm_node_t *assoc = UP(pm_assoc_node_create(parser, key, NULL, value));
16560
16561 if (rest != NULL) {
16562 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16563 }
16564
16565 pm_node_list_append(parser->arena, &assocs, assoc);
16566 }
16567 }
16568
16569 pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
16570 // assocs.nodes is arena-allocated; no explicit free needed.
16571
16572 pm_static_literals_free(&keys);
16573 return node;
16574}
16575
16579static pm_node_t *
16580parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id, uint16_t depth) {
16581 switch (parser->current.type) {
16582 case PM_TOKEN_IDENTIFIER:
16583 case PM_TOKEN_METHOD_NAME: {
16584 parser_lex(parser);
16585 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16586
16587 int depth;
16588 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16589 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16590 }
16591
16592 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
16593 return UP(pm_local_variable_target_node_create(
16594 parser,
16595 &TOK2LOC(parser, &parser->previous),
16596 constant_id,
16597 (uint32_t) (depth == -1 ? 0 : depth)
16598 ));
16599 }
16600 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
16601 pm_token_t opening = parser->current;
16602 parser_lex(parser);
16603
16604 if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16605 // If we have an empty array pattern, then we'll just return a new
16606 // array pattern node.
16607 return UP(pm_array_pattern_node_empty_create(parser, &opening, &parser->previous));
16608 }
16609
16610 // Otherwise, we'll parse the inner pattern, then deal with it depending
16611 // on the type it returns.
16612 pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16613
16614 accept1(parser, PM_TOKEN_NEWLINE);
16615 expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
16616 pm_token_t closing = parser->previous;
16617
16618 switch (PM_NODE_TYPE(inner)) {
16619 case PM_ARRAY_PATTERN_NODE: {
16620 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16621 if (pattern_node->opening_loc.length == 0) {
16622 PM_NODE_START_SET_TOKEN(parser, pattern_node, &opening);
16623 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16624
16625 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16626 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16627
16628 return UP(pattern_node);
16629 }
16630
16631 break;
16632 }
16633 case PM_FIND_PATTERN_NODE: {
16634 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16635 if (pattern_node->opening_loc.length == 0) {
16636 PM_NODE_START_SET_TOKEN(parser, pattern_node, &opening);
16637 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16638
16639 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16640 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16641
16642 return UP(pattern_node);
16643 }
16644
16645 break;
16646 }
16647 default:
16648 break;
16649 }
16650
16651 pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
16652 pm_array_pattern_node_requireds_append(parser->arena, node, inner);
16653 return UP(node);
16654 }
16655 case PM_TOKEN_BRACE_LEFT: {
16656 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
16657 parser->pattern_matching_newlines = false;
16658
16660 pm_token_t opening = parser->current;
16661 parser_lex(parser);
16662
16663 if (accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
16664 // If we have an empty hash pattern, then we'll just return a new hash
16665 // pattern node.
16666 node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
16667 } else {
16668 pm_node_t *first_node;
16669
16670 switch (parser->current.type) {
16671 case PM_TOKEN_LABEL:
16672 parser_lex(parser);
16673 first_node = UP(pm_symbol_node_label_create(parser, &parser->previous));
16674 break;
16675 case PM_TOKEN_USTAR_STAR:
16676 first_node = parse_pattern_keyword_rest(parser, captures);
16677 break;
16678 case PM_TOKEN_STRING_BEGIN:
16679 first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
16680 break;
16681 default: {
16682 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
16683 parser_lex(parser);
16684
16685 first_node = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
16686 break;
16687 }
16688 }
16689
16690 node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
16691
16692 accept1(parser, PM_TOKEN_NEWLINE);
16693 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE, &opening);
16694 pm_token_t closing = parser->previous;
16695
16696 PM_NODE_START_SET_TOKEN(parser, node, &opening);
16697 PM_NODE_LENGTH_SET_TOKEN(parser, node, &closing);
16698
16699 node->opening_loc = TOK2LOC(parser, &opening);
16700 node->closing_loc = TOK2LOC(parser, &closing);
16701 }
16702
16703 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
16704 return UP(node);
16705 }
16706 case PM_TOKEN_UDOT_DOT:
16707 case PM_TOKEN_UDOT_DOT_DOT: {
16708 pm_token_t operator = parser->current;
16709 parser_lex(parser);
16710
16711 // Since we have a unary range operator, we need to parse the subsequent
16712 // expression as the right side of the range.
16713 switch (parser->current.type) {
16714 case PM_CASE_PRIMITIVE: {
16715 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
16716 return UP(pm_range_node_create(parser, NULL, &operator, right));
16717 }
16718 default: {
16719 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
16720 pm_node_t *right = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &operator), PM_TOKEN_LENGTH(&operator)));
16721 return UP(pm_range_node_create(parser, NULL, &operator, right));
16722 }
16723 }
16724 }
16725 case PM_CASE_PRIMITIVE: {
16726 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, diag_id, (uint16_t) (depth + 1));
16727
16728 // If we found a label, we need to immediately return to the caller.
16729 if (pm_symbol_node_label_p(parser, node)) return node;
16730
16731 // Call nodes (arithmetic operations) are not allowed in patterns
16732 if (PM_NODE_TYPE(node) == PM_CALL_NODE) {
16733 pm_parser_err_node(parser, node, diag_id);
16734 pm_missing_node_t *missing_node = pm_missing_node_create(parser, PM_NODE_START(node), PM_NODE_LENGTH(node));
16735
16736 pm_node_unreference(parser, node);
16737 return UP(missing_node);
16738 }
16739
16740 // Now that we have a primitive, we need to check if it's part of a range.
16741 if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
16742 pm_token_t operator = parser->previous;
16743
16744 // Now that we have the operator, we need to check if this is followed
16745 // by another expression. If it is, then we will create a full range
16746 // node. Otherwise, we'll create an endless range.
16747 switch (parser->current.type) {
16748 case PM_CASE_PRIMITIVE: {
16749 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
16750 return UP(pm_range_node_create(parser, node, &operator, right));
16751 }
16752 default:
16753 return UP(pm_range_node_create(parser, node, &operator, NULL));
16754 }
16755 }
16756
16757 return node;
16758 }
16759 case PM_TOKEN_CARET: {
16760 parser_lex(parser);
16761 pm_token_t operator = parser->previous;
16762
16763 // At this point we have a pin operator. We need to check the subsequent
16764 // expression to determine if it's a variable or an expression.
16765 switch (parser->current.type) {
16766 case PM_TOKEN_IDENTIFIER: {
16767 parser_lex(parser);
16768 pm_node_t *variable = UP(parse_variable(parser));
16769
16770 if (variable == NULL) {
16771 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
16772 variable = UP(pm_local_variable_read_node_missing_create(parser, &parser->previous, 0));
16773 }
16774
16775 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16776 }
16777 case PM_TOKEN_INSTANCE_VARIABLE: {
16778 parser_lex(parser);
16779 pm_node_t *variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
16780
16781 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16782 }
16783 case PM_TOKEN_CLASS_VARIABLE: {
16784 parser_lex(parser);
16785 pm_node_t *variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
16786
16787 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16788 }
16789 case PM_TOKEN_GLOBAL_VARIABLE: {
16790 parser_lex(parser);
16791 pm_node_t *variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
16792
16793 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16794 }
16795 case PM_TOKEN_NUMBERED_REFERENCE: {
16796 parser_lex(parser);
16797 pm_node_t *variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
16798
16799 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16800 }
16801 case PM_TOKEN_BACK_REFERENCE: {
16802 parser_lex(parser);
16803 pm_node_t *variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
16804
16805 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16806 }
16807 case PM_TOKEN_PARENTHESIS_LEFT: {
16808 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
16809 parser->pattern_matching_newlines = false;
16810
16811 pm_token_t lparen = parser->current;
16812 parser_lex(parser);
16813
16814 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
16815 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
16816
16817 accept1(parser, PM_TOKEN_NEWLINE);
16818 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &lparen);
16819 return UP(pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous));
16820 }
16821 default: {
16822 // If we get here, then we have a pin operator followed by something
16823 // not understood. We'll create a missing node and return that.
16824 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
16825 pm_node_t *variable = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &operator), PM_TOKEN_LENGTH(&operator)));
16826 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16827 }
16828 }
16829 }
16830 case PM_TOKEN_UCOLON_COLON: {
16831 pm_token_t delimiter = parser->current;
16832 parser_lex(parser);
16833
16834 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16835 pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
16836
16837 return parse_pattern_constant_path(parser, captures, UP(node), (uint16_t) (depth + 1));
16838 }
16839 case PM_TOKEN_CONSTANT: {
16840 pm_token_t constant = parser->current;
16841 parser_lex(parser);
16842
16843 pm_node_t *node = UP(pm_constant_read_node_create(parser, &constant));
16844 return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
16845 }
16846 default:
16847 pm_parser_err_current(parser, diag_id);
16848 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
16849 }
16850}
16851
16852static bool
16853parse_pattern_alternation_error_each(const pm_node_t *node, void *data) {
16854 switch (PM_NODE_TYPE(node)) {
16855 case PM_LOCAL_VARIABLE_TARGET_NODE: {
16856 pm_parser_t *parser = (pm_parser_t *) data;
16857 pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE);
16858 return false;
16859 }
16860 default:
16861 return true;
16862 }
16863}
16864
16869static void
16870parse_pattern_alternation_error(pm_parser_t *parser, const pm_node_t *node) {
16871 pm_visit_node(node, parse_pattern_alternation_error_each, parser);
16872}
16873
16878static pm_node_t *
16879parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
16880 pm_node_t *node = first_node;
16881 bool alternation = false;
16882
16883 while ((node == NULL) || (alternation = accept1(parser, PM_TOKEN_PIPE))) {
16884 if (alternation && !PM_NODE_TYPE_P(node, PM_ALTERNATION_PATTERN_NODE) && captures->size) {
16885 parse_pattern_alternation_error(parser, node);
16886 }
16887
16888 switch (parser->current.type) {
16889 case PM_TOKEN_IDENTIFIER:
16890 case PM_TOKEN_BRACKET_LEFT_ARRAY:
16891 case PM_TOKEN_BRACE_LEFT:
16892 case PM_TOKEN_CARET:
16893 case PM_TOKEN_CONSTANT:
16894 case PM_TOKEN_UCOLON_COLON:
16895 case PM_TOKEN_UDOT_DOT:
16896 case PM_TOKEN_UDOT_DOT_DOT:
16897 case PM_CASE_PRIMITIVE: {
16898 if (!alternation) {
16899 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
16900 } else {
16901 pm_token_t operator = parser->previous;
16902 pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
16903
16904 if (captures->size) parse_pattern_alternation_error(parser, right);
16905 node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
16906 }
16907
16908 break;
16909 }
16910 case PM_TOKEN_PARENTHESIS_LEFT:
16911 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
16912 pm_token_t operator = parser->previous;
16913 pm_token_t opening = parser->current;
16914 parser_lex(parser);
16915
16916 pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16917 accept1(parser, PM_TOKEN_NEWLINE);
16918 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
16919 pm_node_t *right = UP(pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0));
16920
16921 if (!alternation) {
16922 node = right;
16923 } else {
16924 if (captures->size) parse_pattern_alternation_error(parser, right);
16925 node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
16926 }
16927
16928 break;
16929 }
16930 default: {
16931 pm_parser_err_current(parser, diag_id);
16932 pm_node_t *right = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
16933
16934 if (!alternation) {
16935 node = right;
16936 } else {
16937 if (captures->size) parse_pattern_alternation_error(parser, right);
16938 node = UP(pm_alternation_pattern_node_create(parser, node, right, &parser->previous));
16939 }
16940
16941 break;
16942 }
16943 }
16944 }
16945
16946 // If we have an =>, then we are assigning this pattern to a variable.
16947 // In this case we should create an assignment node.
16948 while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
16949 pm_token_t operator = parser->previous;
16950 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
16951
16952 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16953 int depth;
16954
16955 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16956 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16957 }
16958
16959 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
16960 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
16961 parser,
16962 &TOK2LOC(parser, &parser->previous),
16963 constant_id,
16964 (uint32_t) (depth == -1 ? 0 : depth)
16965 );
16966
16967 node = UP(pm_capture_pattern_node_create(parser, node, target, &operator));
16968 }
16969
16970 return node;
16971}
16972
16976static pm_node_t *
16977parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
16978 pm_node_t *node = NULL;
16979
16980 bool leading_rest = false;
16981 bool trailing_rest = false;
16982
16983 switch (parser->current.type) {
16984 case PM_TOKEN_LABEL: {
16985 parser_lex(parser);
16986 pm_node_t *key = UP(pm_symbol_node_label_create(parser, &parser->previous));
16987 node = UP(parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1)));
16988
16989 if (!(flags & PM_PARSE_PATTERN_TOP)) {
16990 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
16991 }
16992
16993 return node;
16994 }
16995 case PM_TOKEN_USTAR_STAR: {
16996 node = parse_pattern_keyword_rest(parser, captures);
16997 node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
16998
16999 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17000 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17001 }
17002
17003 return node;
17004 }
17005 case PM_TOKEN_STRING_BEGIN: {
17006 // We need special handling for string beginnings because they could
17007 // be dynamic symbols leading to hash patterns.
17008 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17009
17010 if (pm_symbol_node_label_p(parser, node)) {
17011 node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
17012
17013 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17014 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17015 }
17016
17017 return node;
17018 }
17019
17020 node = parse_pattern_primitives(parser, captures, node, diag_id, (uint16_t) (depth + 1));
17021 break;
17022 }
17023 case PM_TOKEN_USTAR: {
17024 if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
17025 parser_lex(parser);
17026 node = UP(parse_pattern_rest(parser, captures));
17027 leading_rest = true;
17028 break;
17029 }
17030 }
17032 default:
17033 node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
17034 break;
17035 }
17036
17037 // If we got a dynamic label symbol, then we need to treat it like the
17038 // beginning of a hash pattern.
17039 if (pm_symbol_node_label_p(parser, node)) {
17040 return UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
17041 }
17042
17043 if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
17044 // If we have a comma, then we are now parsing either an array pattern
17045 // or a find pattern. We need to parse all of the patterns, put them
17046 // into a big list, and then determine which type of node we have.
17047 pm_node_list_t nodes = { 0 };
17048 pm_node_list_append(parser->arena, &nodes, node);
17049
17050 // Gather up all of the patterns into the list.
17051 while (accept1(parser, PM_TOKEN_COMMA)) {
17052 // Break early here in case we have a trailing comma.
17053 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
17054 node = UP(pm_implicit_rest_node_create(parser, &parser->previous));
17055 pm_node_list_append(parser->arena, &nodes, node);
17056 trailing_rest = true;
17057 break;
17058 }
17059
17060 if (accept1(parser, PM_TOKEN_USTAR)) {
17061 node = UP(parse_pattern_rest(parser, captures));
17062
17063 // If we have already parsed a splat pattern, then this is an
17064 // error. We will continue to parse the rest of the patterns,
17065 // but we will indicate it as an error.
17066 if (trailing_rest) {
17067 pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
17068 }
17069
17070 trailing_rest = true;
17071 } else {
17072 node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
17073 }
17074
17075 pm_node_list_append(parser->arena, &nodes, node);
17076 }
17077
17078 // If the first pattern and the last pattern are rest patterns, then we
17079 // will call this a find pattern, regardless of how many rest patterns
17080 // are in between because we know we already added the appropriate
17081 // errors. Otherwise we will create an array pattern.
17082 if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
17083 node = UP(pm_find_pattern_node_create(parser, &nodes));
17084
17085 if (nodes.size == 2) {
17086 pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
17087 }
17088 } else {
17089 node = UP(pm_array_pattern_node_node_list_create(parser, &nodes));
17090
17091 if (leading_rest && trailing_rest) {
17092 pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
17093 }
17094 }
17095
17096 // nodes.nodes is arena-allocated; no explicit free needed.
17097 } else if (leading_rest) {
17098 // Otherwise, if we parsed a single splat pattern, then we know we have
17099 // an array pattern, so we can go ahead and create that node.
17100 node = UP(pm_array_pattern_node_rest_create(parser, node));
17101 }
17102
17103 return node;
17104}
17105
17111static inline void
17112parse_negative_numeric(pm_node_t *node) {
17113 switch (PM_NODE_TYPE(node)) {
17114 case PM_INTEGER_NODE: {
17115 pm_integer_node_t *cast = (pm_integer_node_t *) node;
17116 cast->base.location.start--;
17117 cast->base.location.length++;
17118 cast->value.negative = true;
17119 break;
17120 }
17121 case PM_FLOAT_NODE: {
17122 pm_float_node_t *cast = (pm_float_node_t *) node;
17123 cast->base.location.start--;
17124 cast->base.location.length++;
17125 cast->value = -cast->value;
17126 break;
17127 }
17128 case PM_RATIONAL_NODE: {
17129 pm_rational_node_t *cast = (pm_rational_node_t *) node;
17130 cast->base.location.start--;
17131 cast->base.location.length++;
17132 cast->numerator.negative = true;
17133 break;
17134 }
17135 case PM_IMAGINARY_NODE:
17136 node->location.start--;
17137 node->location.length++;
17138 parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
17139 break;
17140 default:
17141 assert(false && "unreachable");
17142 break;
17143 }
17144}
17145
17151static void
17152pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
17153 switch (diag_id) {
17154 case PM_ERR_HASH_KEY: {
17155 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, pm_token_type_human(parser->previous.type));
17156 break;
17157 }
17158 case PM_ERR_HASH_VALUE:
17159 case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17160 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_type_human(parser->current.type));
17161 break;
17162 }
17163 case PM_ERR_UNARY_RECEIVER: {
17164 const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
17165 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, human, parser->previous.start[0]);
17166 break;
17167 }
17168 case PM_ERR_UNARY_DISALLOWED:
17169 case PM_ERR_EXPECT_ARGUMENT: {
17170 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_type_human(parser->current.type));
17171 break;
17172 }
17173 default:
17174 pm_parser_err_previous(parser, diag_id);
17175 break;
17176 }
17177}
17178
17182static void
17183parse_retry(pm_parser_t *parser, const pm_node_t *node) {
17184#define CONTEXT_NONE 0
17185#define CONTEXT_THROUGH_ENSURE 1
17186#define CONTEXT_THROUGH_ELSE 2
17187
17188 pm_context_node_t *context_node = parser->current_context;
17189 int context = CONTEXT_NONE;
17190
17191 while (context_node != NULL) {
17192 switch (context_node->context) {
17200 case PM_CONTEXT_DEFINED:
17202 // These are the good cases. We're allowed to have a retry here.
17203 return;
17204 case PM_CONTEXT_CLASS:
17205 case PM_CONTEXT_DEF:
17207 case PM_CONTEXT_MAIN:
17208 case PM_CONTEXT_MODULE:
17209 case PM_CONTEXT_PREEXE:
17210 case PM_CONTEXT_SCLASS:
17211 // These are the bad cases. We're not allowed to have a retry in
17212 // these contexts.
17213 if (context == CONTEXT_NONE) {
17214 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE);
17215 } else if (context == CONTEXT_THROUGH_ENSURE) {
17216 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ENSURE);
17217 } else if (context == CONTEXT_THROUGH_ELSE) {
17218 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ELSE);
17219 }
17220 return;
17228 // These are also bad cases, but with a more specific error
17229 // message indicating the else.
17230 context = CONTEXT_THROUGH_ELSE;
17231 break;
17239 // These are also bad cases, but with a more specific error
17240 // message indicating the ensure.
17241 context = CONTEXT_THROUGH_ENSURE;
17242 break;
17243 case PM_CONTEXT_NONE:
17244 // This case should never happen.
17245 assert(false && "unreachable");
17246 break;
17247 case PM_CONTEXT_BEGIN:
17251 case PM_CONTEXT_CASE_IN:
17254 case PM_CONTEXT_ELSE:
17255 case PM_CONTEXT_ELSIF:
17256 case PM_CONTEXT_EMBEXPR:
17258 case PM_CONTEXT_FOR:
17259 case PM_CONTEXT_IF:
17264 case PM_CONTEXT_PARENS:
17265 case PM_CONTEXT_POSTEXE:
17267 case PM_CONTEXT_TERNARY:
17268 case PM_CONTEXT_UNLESS:
17269 case PM_CONTEXT_UNTIL:
17270 case PM_CONTEXT_WHILE:
17271 // In these contexts we should continue walking up the list of
17272 // contexts.
17273 break;
17274 }
17275
17276 context_node = context_node->prev;
17277 }
17278
17279#undef CONTEXT_NONE
17280#undef CONTEXT_ENSURE
17281#undef CONTEXT_ELSE
17282}
17283
17287static void
17288parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17289 pm_context_node_t *context_node = parser->current_context;
17290
17291 while (context_node != NULL) {
17292 switch (context_node->context) {
17293 case PM_CONTEXT_DEF:
17295 case PM_CONTEXT_DEFINED:
17299 // These are the good cases. We're allowed to have a block exit
17300 // in these contexts.
17301 return;
17302 case PM_CONTEXT_CLASS:
17306 case PM_CONTEXT_MAIN:
17307 case PM_CONTEXT_MODULE:
17311 case PM_CONTEXT_SCLASS:
17315 // These are the bad cases. We're not allowed to have a retry in
17316 // these contexts.
17317 pm_parser_err_node(parser, node, PM_ERR_INVALID_YIELD);
17318 return;
17319 case PM_CONTEXT_NONE:
17320 // This case should never happen.
17321 assert(false && "unreachable");
17322 break;
17323 case PM_CONTEXT_BEGIN:
17333 case PM_CONTEXT_CASE_IN:
17336 case PM_CONTEXT_ELSE:
17337 case PM_CONTEXT_ELSIF:
17338 case PM_CONTEXT_EMBEXPR:
17340 case PM_CONTEXT_FOR:
17341 case PM_CONTEXT_IF:
17349 case PM_CONTEXT_PARENS:
17350 case PM_CONTEXT_POSTEXE:
17352 case PM_CONTEXT_PREEXE:
17354 case PM_CONTEXT_TERNARY:
17355 case PM_CONTEXT_UNLESS:
17356 case PM_CONTEXT_UNTIL:
17357 case PM_CONTEXT_WHILE:
17358 // In these contexts we should continue walking up the list of
17359 // contexts.
17360 break;
17361 }
17362
17363 context_node = context_node->prev;
17364 }
17365}
17366
17371typedef struct {
17374
17376 const uint8_t *start;
17377
17379 const uint8_t *end;
17380
17389
17394static void
17395parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
17397 pm_token_t location;
17398
17399 if (callback_data->shared) {
17400 location = (pm_token_t) { .type = 0, .start = start, .end = end };
17401 } else {
17402 location = (pm_token_t) { .type = 0, .start = callback_data->start, .end = callback_data->end };
17403 }
17404
17405 PM_PARSER_ERR_FORMAT(callback_data->parser, PM_TOKEN_START(callback_data->parser, &location), PM_TOKEN_LENGTH(&location), PM_ERR_REGEXP_PARSE_ERROR, message);
17406}
17407
17411static void
17412parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
17413 const pm_string_t *unescaped = &node->unescaped;
17415 .parser = parser,
17416 .start = parser->start + PM_NODE_START(node),
17417 .end = parser->start + PM_NODE_END(node),
17418 .shared = unescaped->type == PM_STRING_SHARED
17419 };
17420
17421 pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED), NULL, NULL, parse_regular_expression_error, &error_data);
17422}
17423
17427static inline pm_node_t *
17428parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
17429 switch (parser->current.type) {
17430 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
17431 parser_lex(parser);
17432
17433 pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
17434 pm_accepts_block_stack_push(parser, true);
17435 bool parsed_bare_hash = false;
17436
17437 while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
17438 bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
17439
17440 // Handle the case where we don't have a comma and we have a
17441 // newline followed by a right bracket.
17442 if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17443 break;
17444 }
17445
17446 // Ensure that we have a comma between elements in the array.
17447 if (array->elements.size > 0) {
17448 if (accept1(parser, PM_TOKEN_COMMA)) {
17449 // If there was a comma but we also accepts a newline,
17450 // then this is a syntax error.
17451 if (accepted_newline) {
17452 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
17453 }
17454 } else {
17455 // If there was no comma, then we need to add a syntax
17456 // error.
17457 PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_END(parser, &parser->previous), 0, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
17458 parser->previous.start = parser->previous.end;
17459 parser->previous.type = 0;
17460 }
17461 }
17462
17463 // If we have a right bracket immediately following a comma,
17464 // this is allowed since it's a trailing comma. In this case we
17465 // can break out of the loop.
17466 if (match1(parser, PM_TOKEN_BRACKET_RIGHT)) break;
17467
17468 pm_node_t *element;
17469
17470 if (accept1(parser, PM_TOKEN_USTAR)) {
17471 pm_token_t operator = parser->previous;
17472 pm_node_t *expression = NULL;
17473
17474 if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
17475 pm_parser_scope_forwarding_positionals_check(parser, &operator);
17476 } else {
17477 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
17478 }
17479
17480 element = UP(pm_splat_node_create(parser, &operator, expression));
17481 } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
17482 if (parsed_bare_hash) {
17483 pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
17484 }
17485
17486 element = UP(pm_keyword_hash_node_create(parser));
17487 pm_static_literals_t hash_keys = { 0 };
17488
17489 if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
17490 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
17491 }
17492
17493 pm_static_literals_free(&hash_keys);
17494 parsed_bare_hash = true;
17495 } else {
17496 element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
17497
17498 if (pm_symbol_node_label_p(parser, element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
17499 if (parsed_bare_hash) {
17500 pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
17501 }
17502
17503 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
17504 pm_static_literals_t hash_keys = { 0 };
17505 pm_hash_key_static_literals_add(parser, &hash_keys, element);
17506
17507 pm_token_t operator = { 0 };
17508 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
17509 operator = parser->previous;
17510 }
17511
17512 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
17513 pm_node_t *assoc = UP(pm_assoc_node_create(parser, element, NTOK2PTR(operator), value));
17514 pm_keyword_hash_node_elements_append(parser->arena, hash, assoc);
17515
17516 element = UP(hash);
17517 if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17518 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
17519 }
17520
17521 pm_static_literals_free(&hash_keys);
17522 parsed_bare_hash = true;
17523 }
17524 }
17525
17526 pm_array_node_elements_append(parser->arena, array, element);
17527 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
17528 }
17529
17530 accept1(parser, PM_TOKEN_NEWLINE);
17531
17532 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17533 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
17534 parser->previous.start = parser->previous.end;
17535 parser->previous.type = 0;
17536 }
17537
17538 pm_array_node_close_set(parser, array, &parser->previous);
17539 pm_accepts_block_stack_pop(parser);
17540
17541 return UP(array);
17542 }
17543 case PM_TOKEN_PARENTHESIS_LEFT:
17544 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
17545 pm_token_t opening = parser->current;
17546 pm_node_flags_t flags = 0;
17547
17548 pm_node_list_t current_block_exits = { 0 };
17549 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
17550
17551 parser_lex(parser);
17552 while (true) {
17553 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
17554 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17555 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
17556 break;
17557 }
17558 }
17559
17560 // If this is the end of the file or we match a right parenthesis, then
17561 // we have an empty parentheses node, and we can immediately return.
17562 if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
17563 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
17564 pop_block_exits(parser, previous_block_exits);
17565 return UP(pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, flags));
17566 }
17567
17568 // Otherwise, we're going to parse the first statement in the list
17569 // of statements within the parentheses.
17570 pm_accepts_block_stack_push(parser, true);
17571 context_push(parser, PM_CONTEXT_PARENS);
17572 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
17573 context_pop(parser);
17574
17575 // Determine if this statement is followed by a terminator. In the
17576 // case of a single statement, this is fine. But in the case of
17577 // multiple statements it's required.
17578 bool terminator_found = false;
17579
17580 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
17581 terminator_found = true;
17582 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17583 } else if (accept1(parser, PM_TOKEN_NEWLINE)) {
17584 terminator_found = true;
17585 }
17586
17587 if (terminator_found) {
17588 while (true) {
17589 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
17590 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17591 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
17592 break;
17593 }
17594 }
17595 }
17596
17597 // If we hit a right parenthesis, then we're done parsing the
17598 // parentheses node, and we can check which kind of node we should
17599 // return.
17600 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
17601 if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
17602 lex_state_set(parser, PM_LEX_STATE_ENDARG);
17603 }
17604
17605 parser_lex(parser);
17606 pm_accepts_block_stack_pop(parser);
17607 pop_block_exits(parser, previous_block_exits);
17608
17609 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
17610 // If we have a single statement and are ending on a right
17611 // parenthesis, then we need to check if this is possibly a
17612 // multiple target node.
17613 pm_multi_target_node_t *multi_target;
17614
17615 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.length == 0) {
17616 multi_target = (pm_multi_target_node_t *) statement;
17617 } else {
17618 multi_target = pm_multi_target_node_create(parser);
17619 pm_multi_target_node_targets_append(parser, multi_target, statement);
17620 }
17621
17622 multi_target->lparen_loc = TOK2LOC(parser, &opening);
17623 multi_target->rparen_loc = TOK2LOC(parser, &parser->previous);
17624 PM_NODE_START_SET_TOKEN(parser, multi_target, &opening);
17625 PM_NODE_LENGTH_SET_TOKEN(parser, multi_target, &parser->previous);
17626
17627 pm_node_t *result;
17628 if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
17629 result = parse_targets(parser, UP(multi_target), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17630 accept1(parser, PM_TOKEN_NEWLINE);
17631 } else {
17632 result = UP(multi_target);
17633 }
17634
17635 if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
17636 // All set, this is explicitly allowed by the parent
17637 // context.
17638 } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
17639 // All set, we're inside a for loop and we're parsing
17640 // multiple targets.
17641 } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
17642 // Multi targets are not allowed when it's not a
17643 // statement level.
17644 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
17645 } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
17646 // Multi targets must be followed by an equal sign in
17647 // order to be valid (or a right parenthesis if they are
17648 // nested).
17649 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
17650 }
17651
17652 return result;
17653 }
17654
17655 // If we have a single statement and are ending on a right parenthesis
17656 // and we didn't return a multiple assignment node, then we can return a
17657 // regular parentheses node now.
17658 pm_statements_node_t *statements = pm_statements_node_create(parser);
17659 pm_statements_node_body_append(parser, statements, statement, true);
17660
17661 return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, flags));
17662 }
17663
17664 // If we have more than one statement in the set of parentheses,
17665 // then we are going to parse all of them as a list of statements.
17666 // We'll do that here.
17667 context_push(parser, PM_CONTEXT_PARENS);
17668 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17669
17670 pm_statements_node_t *statements = pm_statements_node_create(parser);
17671 pm_statements_node_body_append(parser, statements, statement, true);
17672
17673 // If we didn't find a terminator and we didn't find a right
17674 // parenthesis, then this is a syntax error.
17675 if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
17676 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
17677 }
17678
17679 // Parse each statement within the parentheses.
17680 while (true) {
17681 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
17682 pm_statements_node_body_append(parser, statements, node, true);
17683
17684 // If we're recovering from a syntax error, then we need to stop
17685 // parsing the statements now.
17686 if (parser->recovering) {
17687 // If this is the level of context where the recovery has
17688 // happened, then we can mark the parser as done recovering.
17689 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
17690 break;
17691 }
17692
17693 // If we couldn't parse an expression at all, then we need to
17694 // bail out of the loop.
17695 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) break;
17696
17697 // If we successfully parsed a statement, then we are going to
17698 // need terminator to delimit them.
17699 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
17700 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
17701 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
17702 } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
17703 break;
17704 } else if (!match1(parser, PM_TOKEN_EOF)) {
17705 // If we're at the end of the file, then we're going to add
17706 // an error after this for the ) anyway.
17707 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
17708 }
17709 }
17710
17711 context_pop(parser);
17712 pm_accepts_block_stack_pop(parser);
17713 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
17714
17715 // When we're parsing multi targets, we allow them to be followed by
17716 // a right parenthesis if they are at the statement level. This is
17717 // only possible if they are the final statement in a parentheses.
17718 // We need to explicitly reject that here.
17719 {
17720 pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
17721
17722 if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
17723 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
17724 pm_multi_target_node_targets_append(parser, multi_target, statement);
17725
17726 statement = UP(multi_target);
17727 statements->body.nodes[statements->body.size - 1] = statement;
17728 }
17729
17730 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
17731 const uint8_t *offset = parser->start + PM_NODE_END(statement);
17732 pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
17733 pm_node_t *value = UP(pm_missing_node_create(parser, PM_NODE_END(statement), 0));
17734
17735 statement = UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value));
17736 statements->body.nodes[statements->body.size - 1] = statement;
17737
17738 pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
17739 }
17740 }
17741
17742 pop_block_exits(parser, previous_block_exits);
17743 pm_void_statements_check(parser, statements, true);
17744 return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, flags));
17745 }
17746 case PM_TOKEN_BRACE_LEFT: {
17747 // If we were passed a current_hash_keys via the parser, then that
17748 // means we're already parsing a hash and we want to share the set
17749 // of hash keys with this inner hash we're about to parse for the
17750 // sake of warnings. We'll set it to NULL after we grab it to make
17751 // sure subsequent expressions don't use it. Effectively this is a
17752 // way of getting around passing it to every call to
17753 // parse_expression.
17754 pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
17755 parser->current_hash_keys = NULL;
17756
17757 pm_accepts_block_stack_push(parser, true);
17758 parser_lex(parser);
17759
17760 pm_token_t opening = parser->previous;
17761 pm_hash_node_t *node = pm_hash_node_create(parser, &opening);
17762
17763 if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
17764 if (current_hash_keys != NULL) {
17765 parse_assocs(parser, current_hash_keys, UP(node), (uint16_t) (depth + 1));
17766 } else {
17767 pm_static_literals_t hash_keys = { 0 };
17768 parse_assocs(parser, &hash_keys, UP(node), (uint16_t) (depth + 1));
17769 pm_static_literals_free(&hash_keys);
17770 }
17771
17772 accept1(parser, PM_TOKEN_NEWLINE);
17773 }
17774
17775 pm_accepts_block_stack_pop(parser);
17776 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM, &opening);
17777 pm_hash_node_closing_loc_set(parser, node, &parser->previous);
17778
17779 return UP(node);
17780 }
17781 case PM_TOKEN_CHARACTER_LITERAL: {
17782 pm_node_t *node = UP(pm_string_node_create_current_string(
17783 parser,
17784 &(pm_token_t) {
17785 .type = PM_TOKEN_STRING_BEGIN,
17786 .start = parser->current.start,
17787 .end = parser->current.start + 1
17788 },
17789 &(pm_token_t) {
17790 .type = PM_TOKEN_STRING_CONTENT,
17791 .start = parser->current.start + 1,
17792 .end = parser->current.end
17793 },
17794 NULL
17795 ));
17796
17797 pm_node_flag_set(node, parse_unescaped_encoding(parser));
17798
17799 // Skip past the character literal here, since now we have handled
17800 // parser->explicit_encoding correctly.
17801 parser_lex(parser);
17802
17803 // Characters can be followed by strings in which case they are
17804 // automatically concatenated.
17805 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
17806 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
17807 }
17808
17809 return node;
17810 }
17811 case PM_TOKEN_CLASS_VARIABLE: {
17812 parser_lex(parser);
17813 pm_node_t *node = UP(pm_class_variable_read_node_create(parser, &parser->previous));
17814
17815 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17816 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17817 }
17818
17819 return node;
17820 }
17821 case PM_TOKEN_CONSTANT: {
17822 parser_lex(parser);
17823 pm_token_t constant = parser->previous;
17824
17825 // If a constant is immediately followed by parentheses, then this is in
17826 // fact a method call, not a constant read.
17827 if (
17828 match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
17829 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
17830 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
17831 match1(parser, PM_TOKEN_BRACE_LEFT)
17832 ) {
17833 pm_arguments_t arguments = { 0 };
17834 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
17835 return UP(pm_call_node_fcall_create(parser, &constant, &arguments));
17836 }
17837
17838 pm_node_t *node = UP(pm_constant_read_node_create(parser, &parser->previous));
17839
17840 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17841 // If we get here, then we have a comma immediately following a
17842 // constant, so we're going to parse this as a multiple assignment.
17843 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17844 }
17845
17846 return node;
17847 }
17848 case PM_TOKEN_UCOLON_COLON: {
17849 parser_lex(parser);
17850 pm_token_t delimiter = parser->previous;
17851
17852 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17853 pm_node_t *node = UP(pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous));
17854
17855 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17856 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17857 }
17858
17859 return node;
17860 }
17861 case PM_TOKEN_UDOT_DOT:
17862 case PM_TOKEN_UDOT_DOT_DOT: {
17863 pm_token_t operator = parser->current;
17864 parser_lex(parser);
17865
17866 pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
17867
17868 // Unary .. and ... are special because these are non-associative
17869 // operators that can also be unary operators. In this case we need
17870 // to explicitly reject code that has a .. or ... that follows this
17871 // expression.
17872 if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
17873 pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
17874 }
17875
17876 return UP(pm_range_node_create(parser, NULL, &operator, right));
17877 }
17878 case PM_TOKEN_FLOAT:
17879 parser_lex(parser);
17880 return UP(pm_float_node_create(parser, &parser->previous));
17881 case PM_TOKEN_FLOAT_IMAGINARY:
17882 parser_lex(parser);
17883 return UP(pm_float_node_imaginary_create(parser, &parser->previous));
17884 case PM_TOKEN_FLOAT_RATIONAL:
17885 parser_lex(parser);
17886 return UP(pm_float_node_rational_create(parser, &parser->previous));
17887 case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
17888 parser_lex(parser);
17889 return UP(pm_float_node_rational_imaginary_create(parser, &parser->previous));
17890 case PM_TOKEN_NUMBERED_REFERENCE: {
17891 parser_lex(parser);
17892 pm_node_t *node = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
17893
17894 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17895 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17896 }
17897
17898 return node;
17899 }
17900 case PM_TOKEN_GLOBAL_VARIABLE: {
17901 parser_lex(parser);
17902 pm_node_t *node = UP(pm_global_variable_read_node_create(parser, &parser->previous));
17903
17904 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17905 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17906 }
17907
17908 return node;
17909 }
17910 case PM_TOKEN_BACK_REFERENCE: {
17911 parser_lex(parser);
17912 pm_node_t *node = UP(pm_back_reference_read_node_create(parser, &parser->previous));
17913
17914 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17915 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17916 }
17917
17918 return node;
17919 }
17920 case PM_TOKEN_IDENTIFIER:
17921 case PM_TOKEN_METHOD_NAME: {
17922 parser_lex(parser);
17923 pm_token_t identifier = parser->previous;
17924 pm_node_t *node = parse_variable_call(parser);
17925
17926 if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
17927 // If parse_variable_call returned with a call node, then we
17928 // know the identifier is not in the local table. In that case
17929 // we need to check if there are arguments following the
17930 // identifier.
17931 pm_call_node_t *call = (pm_call_node_t *) node;
17932 pm_arguments_t arguments = { 0 };
17933
17934 if (parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1))) {
17935 // Since we found arguments, we need to turn off the
17936 // variable call bit in the flags.
17937 pm_node_flag_unset(UP(call), PM_CALL_NODE_FLAGS_VARIABLE_CALL);
17938
17939 call->opening_loc = arguments.opening_loc;
17940 call->arguments = arguments.arguments;
17941 call->closing_loc = arguments.closing_loc;
17942 call->block = arguments.block;
17943
17944 const pm_location_t *end = pm_arguments_end(&arguments);
17945 if (end == NULL) {
17946 PM_NODE_LENGTH_SET_LOCATION(call, &call->message_loc);
17947 } else {
17948 PM_NODE_LENGTH_SET_LOCATION(call, end);
17949 }
17950 }
17951 } else {
17952 // Otherwise, we know the identifier is in the local table. This
17953 // can still be a method call if it is followed by arguments or
17954 // a block, so we need to check for that here.
17955 if (
17956 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
17957 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
17958 match1(parser, PM_TOKEN_BRACE_LEFT)
17959 ) {
17960 pm_arguments_t arguments = { 0 };
17961 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
17962 pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
17963
17964 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
17965 // If we're about to convert an 'it' implicit local
17966 // variable read into a method call, we need to remove
17967 // it from the list of implicit local variables.
17968 pm_node_unreference(parser, node);
17969 } else {
17970 // Otherwise, we're about to convert a regular local
17971 // variable read into a method call, in which case we
17972 // need to indicate that this was not a read for the
17973 // purposes of warnings.
17974 assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
17975
17976 if (pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &identifier), PM_TOKEN_LENGTH(&identifier))) {
17977 pm_node_unreference(parser, node);
17978 } else {
17980 pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
17981 }
17982 }
17983
17984 return UP(fcall);
17985 }
17986 }
17987
17988 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17989 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17990 }
17991
17992 return node;
17993 }
17994 case PM_TOKEN_HEREDOC_START: {
17995 // Here we have found a heredoc. We'll parse it and add it to the
17996 // list of strings.
17997 assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
17998 pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
17999
18000 size_t common_whitespace = (size_t) -1;
18001 parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
18002
18003 parser_lex(parser);
18004 pm_token_t opening = parser->previous;
18005
18006 pm_node_t *node;
18007 pm_node_t *part;
18008
18009 if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18010 // If we get here, then we have an empty heredoc. We'll create
18011 // an empty content token and return an empty string node.
18012 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18013 pm_token_t content = parse_strings_empty_content(parser->previous.start);
18014
18015 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18016 node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
18017 } else {
18018 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
18019 }
18020
18021 PM_NODE_LENGTH_SET_TOKEN(parser, node, &opening);
18022 } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
18023 // If we get here, then we tried to find something in the
18024 // heredoc but couldn't actually parse anything, so we'll just
18025 // return a missing node.
18026 //
18027 // parse_string_part handles its own errors, so there is no need
18028 // for us to add one here.
18029 node = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
18030 } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18031 // If we get here, then the part that we parsed was plain string
18032 // content and we're at the end of the heredoc, so we can return
18033 // just a string node with the heredoc opening and closing as
18034 // its opening and closing.
18035 pm_node_flag_set(part, parse_unescaped_encoding(parser));
18036 pm_string_node_t *cast = (pm_string_node_t *) part;
18037
18038 cast->opening_loc = TOK2LOC(parser, &opening);
18039 cast->closing_loc = TOK2LOC(parser, &parser->current);
18040 cast->base.location = cast->opening_loc;
18041
18042 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18043 assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
18044 cast->base.type = PM_X_STRING_NODE;
18045 }
18046
18047 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18048 parse_heredoc_dedent_string(parser->arena, &cast->unescaped, common_whitespace);
18049 }
18050
18051 node = UP(cast);
18052 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18053 } else {
18054 // If we get here, then we have multiple parts in the heredoc,
18055 // so we'll need to create an interpolated string node to hold
18056 // them all.
18057 pm_node_list_t parts = { 0 };
18058 pm_node_list_append(parser->arena, &parts, part);
18059
18060 while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18061 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
18062 pm_node_list_append(parser->arena, &parts, part);
18063 }
18064 }
18065
18066 // Now that we have all of the parts, create the correct type of
18067 // interpolated node.
18068 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18069 pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
18070 cast->parts = parts;
18071
18072 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18073 pm_interpolated_xstring_node_closing_set(parser, cast, &parser->previous);
18074
18075 cast->base.location = cast->opening_loc;
18076 node = UP(cast);
18077 } else {
18078 pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
18079
18080 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18081 pm_interpolated_string_node_closing_set(parser, cast, &parser->previous);
18082
18083 cast->base.location = cast->opening_loc;
18084 node = UP(cast);
18085 }
18086
18087 // If this is a heredoc that is indented with a ~, then we need
18088 // to dedent each line by the common leading whitespace.
18089 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18090 pm_node_list_t *nodes;
18091 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18092 nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
18093 } else {
18094 nodes = &((pm_interpolated_string_node_t *) node)->parts;
18095 }
18096
18097 parse_heredoc_dedent(parser, nodes, common_whitespace);
18098 }
18099 }
18100
18101 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18102 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18103 }
18104
18105 return node;
18106 }
18107 case PM_TOKEN_INSTANCE_VARIABLE: {
18108 parser_lex(parser);
18109 pm_node_t *node = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
18110
18111 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18112 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18113 }
18114
18115 return node;
18116 }
18117 case PM_TOKEN_INTEGER: {
18118 pm_node_flags_t base = parser->integer_base;
18119 parser_lex(parser);
18120 return UP(pm_integer_node_create(parser, base, &parser->previous));
18121 }
18122 case PM_TOKEN_INTEGER_IMAGINARY: {
18123 pm_node_flags_t base = parser->integer_base;
18124 parser_lex(parser);
18125 return UP(pm_integer_node_imaginary_create(parser, base, &parser->previous));
18126 }
18127 case PM_TOKEN_INTEGER_RATIONAL: {
18128 pm_node_flags_t base = parser->integer_base;
18129 parser_lex(parser);
18130 return UP(pm_integer_node_rational_create(parser, base, &parser->previous));
18131 }
18132 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: {
18133 pm_node_flags_t base = parser->integer_base;
18134 parser_lex(parser);
18135 return UP(pm_integer_node_rational_imaginary_create(parser, base, &parser->previous));
18136 }
18137 case PM_TOKEN_KEYWORD___ENCODING__:
18138 parser_lex(parser);
18139 return UP(pm_source_encoding_node_create(parser, &parser->previous));
18140 case PM_TOKEN_KEYWORD___FILE__:
18141 parser_lex(parser);
18142 return UP(pm_source_file_node_create(parser, &parser->previous));
18143 case PM_TOKEN_KEYWORD___LINE__:
18144 parser_lex(parser);
18145 return UP(pm_source_line_node_create(parser, &parser->previous));
18146 case PM_TOKEN_KEYWORD_ALIAS: {
18147 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18148 pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
18149 }
18150
18151 parser_lex(parser);
18152 pm_token_t keyword = parser->previous;
18153
18154 pm_node_t *new_name = parse_alias_argument(parser, true, (uint16_t) (depth + 1));
18155 pm_node_t *old_name = parse_alias_argument(parser, false, (uint16_t) (depth + 1));
18156
18157 switch (PM_NODE_TYPE(new_name)) {
18158 case PM_BACK_REFERENCE_READ_NODE:
18159 case PM_NUMBERED_REFERENCE_READ_NODE:
18160 case PM_GLOBAL_VARIABLE_READ_NODE: {
18161 if (PM_NODE_TYPE_P(old_name, PM_BACK_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_GLOBAL_VARIABLE_READ_NODE)) {
18162 if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) {
18163 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
18164 }
18165 } else {
18166 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18167 }
18168
18169 return UP(pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name));
18170 }
18171 case PM_SYMBOL_NODE:
18172 case PM_INTERPOLATED_SYMBOL_NODE: {
18173 if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE)) {
18174 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18175 }
18176 }
18178 default:
18179 return UP(pm_alias_method_node_create(parser, &keyword, new_name, old_name));
18180 }
18181 }
18182 case PM_TOKEN_KEYWORD_CASE: {
18183 size_t opening_newline_index = token_newline_index(parser);
18184 parser_lex(parser);
18185
18186 pm_token_t case_keyword = parser->previous;
18187 pm_node_t *predicate = NULL;
18188
18189 pm_node_list_t current_block_exits = { 0 };
18190 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18191
18192 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18193 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18194 predicate = NULL;
18195 } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
18196 predicate = NULL;
18197 } else if (!token_begins_expression_p(parser->current.type)) {
18198 predicate = NULL;
18199 } else {
18200 predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
18201 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18202 }
18203
18204 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
18205 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18206 parser_lex(parser);
18207 pop_block_exits(parser, previous_block_exits);
18208 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18209 return UP(pm_case_node_create(parser, &case_keyword, predicate, &parser->previous));
18210 }
18211
18212 // At this point we can create a case node, though we don't yet know
18213 // if it is a case-in or case-when node.
18214 pm_node_t *node;
18215
18216 if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18217 pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, NULL);
18218 pm_static_literals_t literals = { 0 };
18219
18220 // At this point we've seen a when keyword, so we know this is a
18221 // case-when node. We will continue to parse the when nodes
18222 // until we hit the end of the list.
18223 while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18224 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18225 parser_lex(parser);
18226
18227 pm_token_t when_keyword = parser->previous;
18228 pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
18229
18230 do {
18231 if (accept1(parser, PM_TOKEN_USTAR)) {
18232 pm_token_t operator = parser->previous;
18233 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18234
18235 pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
18236 pm_when_node_conditions_append(parser->arena, when_node, UP(splat_node));
18237
18238 if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
18239 } else {
18240 pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
18241 pm_when_node_conditions_append(parser->arena, when_node, condition);
18242
18243 // If we found a missing node, then this is a syntax
18244 // error and we should stop looping.
18245 if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
18246
18247 // If this is a string node, then we need to mark it
18248 // as frozen because when clause strings are frozen.
18249 if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
18250 pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
18251 } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
18252 pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
18253 }
18254
18255 pm_when_clause_static_literals_add(parser, &literals, condition);
18256 }
18257 } while (accept1(parser, PM_TOKEN_COMMA));
18258
18259 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18260 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18261 pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous);
18262 }
18263 } else {
18264 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
18265 pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous);
18266 }
18267
18268 if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18269 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
18270 if (statements != NULL) {
18271 pm_when_node_statements_set(when_node, statements);
18272 }
18273 }
18274
18275 pm_case_node_condition_append(parser->arena, case_node, UP(when_node));
18276 }
18277
18278 // If we didn't parse any conditions (in or when) then we need
18279 // to indicate that we have an error.
18280 if (case_node->conditions.size == 0) {
18281 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18282 }
18283
18284 pm_static_literals_free(&literals);
18285 node = UP(case_node);
18286 } else {
18287 pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate);
18288
18289 // If this is a case-match node (i.e., it is a pattern matching
18290 // case statement) then we must have a predicate.
18291 if (predicate == NULL) {
18292 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
18293 }
18294
18295 // At this point we expect that we're parsing a case-in node. We
18296 // will continue to parse the in nodes until we hit the end of
18297 // the list.
18298 while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
18299 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18300
18301 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
18302 parser->pattern_matching_newlines = true;
18303
18304 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
18305 parser->command_start = false;
18306 parser_lex(parser);
18307
18308 pm_token_t in_keyword = parser->previous;
18309
18310 pm_constant_id_list_t captures = { 0 };
18311 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
18312
18313 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
18314
18315 // Since we're in the top-level of the case-in node we need
18316 // to check for guard clauses in the form of `if` or
18317 // `unless` statements.
18318 if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
18319 pm_token_t keyword = parser->previous;
18320 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
18321 pattern = UP(pm_if_node_modifier_create(parser, pattern, &keyword, predicate));
18322 } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
18323 pm_token_t keyword = parser->previous;
18324 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
18325 pattern = UP(pm_unless_node_modifier_create(parser, pattern, &keyword, predicate));
18326 }
18327
18328 // Now we need to check for the terminator of the in node's
18329 // pattern. It can be a newline or semicolon optionally
18330 // followed by a `then` keyword.
18331 pm_token_t then_keyword = { 0 };
18332 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18333 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18334 then_keyword = parser->previous;
18335 }
18336 } else {
18337 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
18338 then_keyword = parser->previous;
18339 }
18340
18341 // Now we can actually parse the statements associated with
18342 // the in node.
18343 pm_statements_node_t *statements;
18344 if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18345 statements = NULL;
18346 } else {
18347 statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
18348 }
18349
18350 // Now that we have the full pattern and statements, we can
18351 // create the node and attach it to the case node.
18352 pm_node_t *condition = UP(pm_in_node_create(parser, pattern, statements, &in_keyword, NTOK2PTR(then_keyword)));
18353 pm_case_match_node_condition_append(parser->arena, case_node, condition);
18354 }
18355
18356 // If we didn't parse any conditions (in or when) then we need
18357 // to indicate that we have an error.
18358 if (case_node->conditions.size == 0) {
18359 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18360 }
18361
18362 node = UP(case_node);
18363 }
18364
18365 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18366 if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
18367 pm_token_t else_keyword = parser->previous;
18368 pm_else_node_t *else_node;
18369
18370 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
18371 else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
18372 } else {
18373 else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
18374 }
18375
18376 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18377 pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
18378 } else {
18379 pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
18380 }
18381 }
18382
18383 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18384 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM, &case_keyword);
18385
18386 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18387 pm_case_node_end_keyword_loc_set(parser, (pm_case_node_t *) node, &parser->previous);
18388 } else {
18389 pm_case_match_node_end_keyword_loc_set(parser, (pm_case_match_node_t *) node, &parser->previous);
18390 }
18391
18392 pop_block_exits(parser, previous_block_exits);
18393 return node;
18394 }
18395 case PM_TOKEN_KEYWORD_BEGIN: {
18396 size_t opening_newline_index = token_newline_index(parser);
18397 parser_lex(parser);
18398
18399 pm_token_t begin_keyword = parser->previous;
18400 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18401
18402 pm_node_list_t current_block_exits = { 0 };
18403 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18404 pm_statements_node_t *begin_statements = NULL;
18405
18406 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18407 pm_accepts_block_stack_push(parser, true);
18408 begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN, (uint16_t) (depth + 1));
18409 pm_accepts_block_stack_pop(parser);
18410 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18411 }
18412
18413 pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
18414 parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
18415 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM, &begin_keyword);
18416
18417 PM_NODE_LENGTH_SET_TOKEN(parser, begin_node, &parser->previous);
18418 pm_begin_node_end_keyword_set(parser, begin_node, &parser->previous);
18419 pop_block_exits(parser, previous_block_exits);
18420 return UP(begin_node);
18421 }
18422 case PM_TOKEN_KEYWORD_BEGIN_UPCASE: {
18423 pm_node_list_t current_block_exits = { 0 };
18424 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18425
18426 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18427 pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
18428 }
18429
18430 parser_lex(parser);
18431 pm_token_t keyword = parser->previous;
18432
18433 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE);
18434 pm_token_t opening = parser->previous;
18435 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
18436
18437 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM, &opening);
18438 pm_context_t context = parser->current_context->context;
18439 if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
18440 pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
18441 }
18442
18443 flush_block_exits(parser, previous_block_exits);
18444 return UP(pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
18445 }
18446 case PM_TOKEN_KEYWORD_BREAK:
18447 case PM_TOKEN_KEYWORD_NEXT:
18448 case PM_TOKEN_KEYWORD_RETURN: {
18449 parser_lex(parser);
18450
18451 pm_token_t keyword = parser->previous;
18452 pm_arguments_t arguments = { 0 };
18453
18454 if (
18455 token_begins_expression_p(parser->current.type) ||
18456 match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)
18457 ) {
18458 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
18459
18460 if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
18461 pm_token_t next = parser->current;
18462 parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, (uint16_t) (depth + 1));
18463
18464 // Reject `foo && return bar`.
18465 if (!accepts_command_call && arguments.arguments != NULL) {
18466 PM_PARSER_ERR_TOKEN_FORMAT(parser, &next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(next.type));
18467 }
18468 }
18469 }
18470
18471 switch (keyword.type) {
18472 case PM_TOKEN_KEYWORD_BREAK: {
18473 pm_node_t *node = UP(pm_break_node_create(parser, &keyword, arguments.arguments));
18474 if (!parser->partial_script) parse_block_exit(parser, node);
18475 return node;
18476 }
18477 case PM_TOKEN_KEYWORD_NEXT: {
18478 pm_node_t *node = UP(pm_next_node_create(parser, &keyword, arguments.arguments));
18479 if (!parser->partial_script) parse_block_exit(parser, node);
18480 return node;
18481 }
18482 case PM_TOKEN_KEYWORD_RETURN: {
18483 pm_node_t *node = UP(pm_return_node_create(parser, &keyword, arguments.arguments));
18484 parse_return(parser, node);
18485 return node;
18486 }
18487 default:
18488 assert(false && "unreachable");
18489 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
18490 }
18491 }
18492 case PM_TOKEN_KEYWORD_SUPER: {
18493 parser_lex(parser);
18494
18495 pm_token_t keyword = parser->previous;
18496 pm_arguments_t arguments = { 0 };
18497 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18498
18499 if (
18500 arguments.opening_loc.length == 0 &&
18501 arguments.arguments == NULL &&
18502 ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
18503 ) {
18504 return UP(pm_forwarding_super_node_create(parser, &keyword, &arguments));
18505 }
18506
18507 return UP(pm_super_node_create(parser, &keyword, &arguments));
18508 }
18509 case PM_TOKEN_KEYWORD_YIELD: {
18510 parser_lex(parser);
18511
18512 pm_token_t keyword = parser->previous;
18513 pm_arguments_t arguments = { 0 };
18514 parse_arguments_list(parser, &arguments, false, accepts_command_call, (uint16_t) (depth + 1));
18515
18516 // It's possible that we've parsed a block argument through our
18517 // call to parse_arguments_list. If we found one, we should mark it
18518 // as invalid and destroy it, as we don't have a place for it on the
18519 // yield node.
18520 if (arguments.block != NULL) {
18521 pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
18522 pm_node_unreference(parser, arguments.block);
18523 arguments.block = NULL;
18524 }
18525
18526 pm_node_t *node = UP(pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc));
18527 if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
18528
18529 return node;
18530 }
18531 case PM_TOKEN_KEYWORD_CLASS: {
18532 size_t opening_newline_index = token_newline_index(parser);
18533 parser_lex(parser);
18534
18535 pm_token_t class_keyword = parser->previous;
18536 pm_do_loop_stack_push(parser, false);
18537
18538 pm_node_list_t current_block_exits = { 0 };
18539 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18540
18541 if (accept1(parser, PM_TOKEN_LESS_LESS)) {
18542 pm_token_t operator = parser->previous;
18543 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
18544
18545 pm_parser_scope_push(parser, true);
18546 if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18547 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type));
18548 }
18549
18550 pm_node_t *statements = NULL;
18551 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18552 pm_accepts_block_stack_push(parser, true);
18553 statements = UP(parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1)));
18554 pm_accepts_block_stack_pop(parser);
18555 }
18556
18557 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
18558 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18559 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1)));
18560 } else {
18561 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
18562 }
18563
18564 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
18565
18566 pm_constant_id_list_t locals;
18567 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18568
18569 pm_parser_scope_pop(parser);
18570 pm_do_loop_stack_pop(parser);
18571
18572 flush_block_exits(parser, previous_block_exits);
18573 return UP(pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous));
18574 }
18575
18576 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
18577 pm_token_t name = parser->previous;
18578 if (name.type != PM_TOKEN_CONSTANT) {
18579 pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
18580 }
18581
18582 pm_token_t inheritance_operator = { 0 };
18583 pm_node_t *superclass;
18584
18585 if (match1(parser, PM_TOKEN_LESS)) {
18586 inheritance_operator = parser->current;
18587 lex_state_set(parser, PM_LEX_STATE_BEG);
18588
18589 parser->command_start = true;
18590 parser_lex(parser);
18591
18592 superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
18593 } else {
18594 superclass = NULL;
18595 }
18596
18597 pm_parser_scope_push(parser, true);
18598
18599 if (inheritance_operator.start != NULL) {
18600 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
18601 } else {
18602 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18603 }
18604 pm_node_t *statements = NULL;
18605
18606 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18607 pm_accepts_block_stack_push(parser, true);
18608 statements = UP(parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1)));
18609 pm_accepts_block_stack_pop(parser);
18610 }
18611
18612 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
18613 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18614 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1)));
18615 } else {
18616 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
18617 }
18618
18619 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
18620
18621 if (context_def_p(parser)) {
18622 pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
18623 }
18624
18625 pm_constant_id_list_t locals;
18626 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18627
18628 pm_parser_scope_pop(parser);
18629 pm_do_loop_stack_pop(parser);
18630
18631 if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
18632 pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
18633 }
18634
18635 pop_block_exits(parser, previous_block_exits);
18636 return UP(pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, NTOK2PTR(inheritance_operator), superclass, statements, &parser->previous));
18637 }
18638 case PM_TOKEN_KEYWORD_DEF: {
18639 pm_node_list_t current_block_exits = { 0 };
18640 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18641
18642 pm_token_t def_keyword = parser->current;
18643 size_t opening_newline_index = token_newline_index(parser);
18644
18645 pm_node_t *receiver = NULL;
18646 pm_token_t operator = { 0 };
18647 pm_token_t name;
18648
18649 // This context is necessary for lexing `...` in a bare params
18650 // correctly. It must be pushed before lexing the first param, so it
18651 // is here.
18652 context_push(parser, PM_CONTEXT_DEF_PARAMS);
18653 parser_lex(parser);
18654
18655 // This will be false if the method name is not a valid identifier
18656 // but could be followed by an operator.
18657 bool valid_name = true;
18658
18659 switch (parser->current.type) {
18660 case PM_CASE_OPERATOR:
18661 pm_parser_scope_push(parser, true);
18662 lex_state_set(parser, PM_LEX_STATE_ENDFN);
18663 parser_lex(parser);
18664
18665 name = parser->previous;
18666 break;
18667 case PM_TOKEN_IDENTIFIER: {
18668 parser_lex(parser);
18669
18670 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
18671 receiver = parse_variable_call(parser);
18672
18673 pm_parser_scope_push(parser, true);
18674 lex_state_set(parser, PM_LEX_STATE_FNAME);
18675 parser_lex(parser);
18676
18677 operator = parser->previous;
18678 name = parse_method_definition_name(parser);
18679 } else {
18680 pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous));
18681 pm_parser_scope_push(parser, true);
18682
18683 name = parser->previous;
18684 }
18685
18686 break;
18687 }
18688 case PM_TOKEN_INSTANCE_VARIABLE:
18689 case PM_TOKEN_CLASS_VARIABLE:
18690 case PM_TOKEN_GLOBAL_VARIABLE:
18691 valid_name = false;
18693 case PM_TOKEN_CONSTANT:
18694 case PM_TOKEN_KEYWORD_NIL:
18695 case PM_TOKEN_KEYWORD_SELF:
18696 case PM_TOKEN_KEYWORD_TRUE:
18697 case PM_TOKEN_KEYWORD_FALSE:
18698 case PM_TOKEN_KEYWORD___FILE__:
18699 case PM_TOKEN_KEYWORD___LINE__:
18700 case PM_TOKEN_KEYWORD___ENCODING__: {
18701 pm_parser_scope_push(parser, true);
18702 parser_lex(parser);
18703
18704 pm_token_t identifier = parser->previous;
18705
18706 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
18707 lex_state_set(parser, PM_LEX_STATE_FNAME);
18708 parser_lex(parser);
18709 operator = parser->previous;
18710
18711 switch (identifier.type) {
18712 case PM_TOKEN_CONSTANT:
18713 receiver = UP(pm_constant_read_node_create(parser, &identifier));
18714 break;
18715 case PM_TOKEN_INSTANCE_VARIABLE:
18716 receiver = UP(pm_instance_variable_read_node_create(parser, &identifier));
18717 break;
18718 case PM_TOKEN_CLASS_VARIABLE:
18719 receiver = UP(pm_class_variable_read_node_create(parser, &identifier));
18720 break;
18721 case PM_TOKEN_GLOBAL_VARIABLE:
18722 receiver = UP(pm_global_variable_read_node_create(parser, &identifier));
18723 break;
18724 case PM_TOKEN_KEYWORD_NIL:
18725 receiver = UP(pm_nil_node_create(parser, &identifier));
18726 break;
18727 case PM_TOKEN_KEYWORD_SELF:
18728 receiver = UP(pm_self_node_create(parser, &identifier));
18729 break;
18730 case PM_TOKEN_KEYWORD_TRUE:
18731 receiver = UP(pm_true_node_create(parser, &identifier));
18732 break;
18733 case PM_TOKEN_KEYWORD_FALSE:
18734 receiver = UP(pm_false_node_create(parser, &identifier));
18735 break;
18736 case PM_TOKEN_KEYWORD___FILE__:
18737 receiver = UP(pm_source_file_node_create(parser, &identifier));
18738 break;
18739 case PM_TOKEN_KEYWORD___LINE__:
18740 receiver = UP(pm_source_line_node_create(parser, &identifier));
18741 break;
18742 case PM_TOKEN_KEYWORD___ENCODING__:
18743 receiver = UP(pm_source_encoding_node_create(parser, &identifier));
18744 break;
18745 default:
18746 break;
18747 }
18748
18749 name = parse_method_definition_name(parser);
18750 } else {
18751 if (!valid_name) {
18752 PM_PARSER_ERR_TOKEN_FORMAT(parser, &identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
18753 }
18754
18755 name = identifier;
18756 }
18757 break;
18758 }
18759 case PM_TOKEN_PARENTHESIS_LEFT: {
18760 // The current context is `PM_CONTEXT_DEF_PARAMS`, however
18761 // the inner expression of this parenthesis should not be
18762 // processed under this context. Thus, the context is popped
18763 // here.
18764 context_pop(parser);
18765 parser_lex(parser);
18766
18767 pm_token_t lparen = parser->previous;
18768 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
18769
18770 accept1(parser, PM_TOKEN_NEWLINE);
18771 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18772 pm_token_t rparen = parser->previous;
18773
18774 lex_state_set(parser, PM_LEX_STATE_FNAME);
18775 expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
18776
18777 operator = parser->previous;
18778 receiver = UP(pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0));
18779
18780 // To push `PM_CONTEXT_DEF_PARAMS` again is for the same
18781 // reason as described the above.
18782 pm_parser_scope_push(parser, true);
18783 context_push(parser, PM_CONTEXT_DEF_PARAMS);
18784 name = parse_method_definition_name(parser);
18785 break;
18786 }
18787 default:
18788 pm_parser_scope_push(parser, true);
18789 name = parse_method_definition_name(parser);
18790 break;
18791 }
18792
18793 pm_token_t lparen = { 0 };
18794 pm_token_t rparen = { 0 };
18795 pm_parameters_node_t *params;
18796
18797 bool accept_endless_def = true;
18798 switch (parser->current.type) {
18799 case PM_TOKEN_PARENTHESIS_LEFT: {
18800 parser_lex(parser);
18801 lparen = parser->previous;
18802
18803 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18804 params = NULL;
18805 } else {
18806 // https://bugs.ruby-lang.org/issues/19107
18807 bool allow_trailing_comma = parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1;
18808 params = parse_parameters(
18809 parser,
18810 PM_BINDING_POWER_DEFINED,
18811 true,
18812 allow_trailing_comma,
18813 true,
18814 true,
18815 false,
18816 PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
18817 (uint16_t) (depth + 1)
18818 );
18819 }
18820
18821 lex_state_set(parser, PM_LEX_STATE_BEG);
18822 parser->command_start = true;
18823
18824 context_pop(parser);
18825 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18826 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
18827 parser->previous.start = parser->previous.end;
18828 parser->previous.type = 0;
18829 }
18830
18831 rparen = parser->previous;
18832 break;
18833 }
18834 case PM_CASE_PARAMETER: {
18835 // If we're about to lex a label, we need to add the label
18836 // state to make sure the next newline is ignored.
18837 if (parser->current.type == PM_TOKEN_LABEL) {
18838 lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
18839 }
18840
18841 params = parse_parameters(
18842 parser,
18843 PM_BINDING_POWER_DEFINED,
18844 false,
18845 false,
18846 true,
18847 true,
18848 false,
18849 PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
18850 (uint16_t) (depth + 1)
18851 );
18852
18853 // Reject `def * = 1` and similar. We have to specifically check
18854 // for them because they create ambiguity with optional arguments.
18855 accept_endless_def = false;
18856
18857 context_pop(parser);
18858 break;
18859 }
18860 default: {
18861 params = NULL;
18862 context_pop(parser);
18863 break;
18864 }
18865 }
18866
18867 pm_node_t *statements = NULL;
18868 pm_token_t equal = { 0 };
18869 pm_token_t end_keyword = { 0 };
18870
18871 if (accept1(parser, PM_TOKEN_EQUAL)) {
18872 if (token_is_setter_name(&name)) {
18873 pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
18874 }
18875 if (!accept_endless_def) {
18876 pm_parser_err_previous(parser, PM_ERR_DEF_ENDLESS_PARAMETERS);
18877 }
18878 if (
18881 ) {
18882 PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &def_keyword), PM_TOKENS_LENGTH(&def_keyword, &parser->previous), PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE, "endless method definition");
18883 }
18884 equal = parser->previous;
18885
18886 context_push(parser, PM_CONTEXT_DEF);
18887 pm_do_loop_stack_push(parser, false);
18888 statements = UP(pm_statements_node_create(parser));
18889
18890 bool allow_command_call;
18891 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
18892 allow_command_call = accepts_command_call;
18893 } else {
18894 // Allow `def foo = puts "Hello"` but not `private def foo = puts "Hello"`
18895 allow_command_call = binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION;
18896 }
18897
18898 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_command_call, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
18899
18900 // In an endless method definition, the body is not allowed to
18901 // be a command with a do..end block.
18902 if (PM_NODE_TYPE_P(statement, PM_CALL_NODE)) {
18903 pm_call_node_t *call = (pm_call_node_t *) statement;
18904
18905 if (call->arguments != NULL && call->block != NULL && PM_NODE_TYPE_P(call->block, PM_BLOCK_NODE)) {
18906 pm_block_node_t *block = (pm_block_node_t *) call->block;
18907
18908 if (parser->start[block->opening_loc.start] != '{') {
18909 pm_parser_err_node(parser, call->block, PM_ERR_DEF_ENDLESS_DO_BLOCK);
18910 }
18911 }
18912 }
18913
18914 if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
18915 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
18916
18917 pm_token_t rescue_keyword = parser->previous;
18918 pm_node_t *value = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
18919 context_pop(parser);
18920
18921 statement = UP(pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value));
18922 }
18923
18924 pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
18925 pm_do_loop_stack_pop(parser);
18926 context_pop(parser);
18927 } else {
18928 if (lparen.start == NULL) {
18929 lex_state_set(parser, PM_LEX_STATE_BEG);
18930 parser->command_start = true;
18931 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
18932 } else {
18933 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18934 }
18935
18936 pm_accepts_block_stack_push(parser, true);
18937 pm_do_loop_stack_push(parser, false);
18938
18939 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18940 pm_accepts_block_stack_push(parser, true);
18941 statements = UP(parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1)));
18942 pm_accepts_block_stack_pop(parser);
18943 }
18944
18945 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
18946 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18947 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1)));
18948 } else {
18949 parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
18950 }
18951
18952 pm_accepts_block_stack_pop(parser);
18953 pm_do_loop_stack_pop(parser);
18954
18955 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM, &def_keyword);
18956 end_keyword = parser->previous;
18957 }
18958
18959 pm_constant_id_list_t locals;
18960 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18961 pm_parser_scope_pop(parser);
18962
18968 pm_constant_id_t name_id = pm_parser_constant_id_raw(parser, name.start, parse_operator_symbol_name(&name));
18969
18970 flush_block_exits(parser, previous_block_exits);
18971
18972 return UP(pm_def_node_create(
18973 parser,
18974 name_id,
18975 &name,
18976 receiver,
18977 params,
18978 statements,
18979 &locals,
18980 &def_keyword,
18981 NTOK2PTR(operator),
18982 NTOK2PTR(lparen),
18983 NTOK2PTR(rparen),
18984 NTOK2PTR(equal),
18985 NTOK2PTR(end_keyword)
18986 ));
18987 }
18988 case PM_TOKEN_KEYWORD_DEFINED: {
18989 parser_lex(parser);
18990
18991 pm_token_t keyword = parser->previous;
18992 pm_token_t lparen = { 0 };
18993 pm_token_t rparen = { 0 };
18994 pm_node_t *expression;
18995
18996 context_push(parser, PM_CONTEXT_DEFINED);
18997 bool newline = accept1(parser, PM_TOKEN_NEWLINE);
18998
18999 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19000 lparen = parser->previous;
19001
19002 if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19003 expression = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
19004 lparen = (pm_token_t) { 0 };
19005 } else {
19006 expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19007
19008 if (!parser->recovering) {
19009 accept1(parser, PM_TOKEN_NEWLINE);
19010 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19011 rparen = parser->previous;
19012 }
19013 }
19014 } else {
19015 expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19016 }
19017
19018 context_pop(parser);
19019 return UP(pm_defined_node_create(
19020 parser,
19021 NTOK2PTR(lparen),
19022 expression,
19023 NTOK2PTR(rparen),
19024 &keyword
19025 ));
19026 }
19027 case PM_TOKEN_KEYWORD_END_UPCASE: {
19028 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19029 pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
19030 }
19031
19032 parser_lex(parser);
19033 pm_token_t keyword = parser->previous;
19034
19035 if (context_def_p(parser)) {
19036 pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
19037 }
19038
19039 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
19040 pm_token_t opening = parser->previous;
19041 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
19042
19043 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM, &opening);
19044 return UP(pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
19045 }
19046 case PM_TOKEN_KEYWORD_FALSE:
19047 parser_lex(parser);
19048 return UP(pm_false_node_create(parser, &parser->previous));
19049 case PM_TOKEN_KEYWORD_FOR: {
19050 size_t opening_newline_index = token_newline_index(parser);
19051 parser_lex(parser);
19052
19053 pm_token_t for_keyword = parser->previous;
19054 pm_node_t *index;
19055
19056 context_push(parser, PM_CONTEXT_FOR_INDEX);
19057
19058 // First, parse out the first index expression.
19059 if (accept1(parser, PM_TOKEN_USTAR)) {
19060 pm_token_t star_operator = parser->previous;
19061 pm_node_t *name = NULL;
19062
19063 if (token_begins_expression_p(parser->current.type)) {
19064 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19065 }
19066
19067 index = UP(pm_splat_node_create(parser, &star_operator, name));
19068 } else if (token_begins_expression_p(parser->current.type)) {
19069 index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
19070 } else {
19071 pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
19072 index = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &for_keyword), PM_TOKEN_LENGTH(&for_keyword)));
19073 }
19074
19075 // Now, if there are multiple index expressions, parse them out.
19076 if (match1(parser, PM_TOKEN_COMMA)) {
19077 index = parse_targets(parser, index, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19078 } else {
19079 index = parse_target(parser, index, false, false);
19080 }
19081
19082 context_pop(parser);
19083 pm_do_loop_stack_push(parser, true);
19084
19085 expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
19086 pm_token_t in_keyword = parser->previous;
19087
19088 pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
19089 pm_do_loop_stack_pop(parser);
19090
19091 pm_token_t do_keyword = { 0 };
19092 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19093 do_keyword = parser->previous;
19094 } else {
19095 if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
19096 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type));
19097 }
19098 }
19099
19100 pm_statements_node_t *statements = NULL;
19101 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19102 statements = parse_statements(parser, PM_CONTEXT_FOR, (uint16_t) (depth + 1));
19103 }
19104
19105 parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
19106 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM, &for_keyword);
19107
19108 return UP(pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, NTOK2PTR(do_keyword), &parser->previous));
19109 }
19110 case PM_TOKEN_KEYWORD_IF:
19111 if (parser_end_of_line_p(parser)) {
19112 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_KEYWORD_EOL);
19113 }
19114
19115 size_t opening_newline_index = token_newline_index(parser);
19116 bool if_after_else = parser->previous.type == PM_TOKEN_KEYWORD_ELSE;
19117 parser_lex(parser);
19118
19119 return parse_conditional(parser, PM_CONTEXT_IF, opening_newline_index, if_after_else, (uint16_t) (depth + 1));
19120 case PM_TOKEN_KEYWORD_UNDEF: {
19121 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19122 pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
19123 }
19124
19125 parser_lex(parser);
19126 pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
19127 pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19128
19129 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19130 } else {
19131 pm_undef_node_append(parser->arena, undef, name);
19132
19133 while (match1(parser, PM_TOKEN_COMMA)) {
19134 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
19135 parser_lex(parser);
19136 name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19137
19138 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19139 break;
19140 }
19141
19142 pm_undef_node_append(parser->arena, undef, name);
19143 }
19144 }
19145
19146 return UP(undef);
19147 }
19148 case PM_TOKEN_KEYWORD_NOT: {
19149 parser_lex(parser);
19150
19151 pm_token_t message = parser->previous;
19152 pm_arguments_t arguments = { 0 };
19153 pm_node_t *receiver = NULL;
19154
19155 // If we do not accept a command call, then we also do not accept a
19156 // not without parentheses. In this case we need to reject this
19157 // syntax.
19158 if (!accepts_command_call && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19159 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) {
19160 pm_parser_err(parser, PM_TOKEN_END(parser, &parser->previous), 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN);
19161 } else {
19162 accept1(parser, PM_TOKEN_NEWLINE);
19163 pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER);
19164 }
19165
19166 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
19167 }
19168
19169 accept1(parser, PM_TOKEN_NEWLINE);
19170
19171 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19172 pm_token_t lparen = parser->previous;
19173
19174 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19175 receiver = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
19176 } else {
19177 arguments.opening_loc = TOK2LOC(parser, &lparen);
19178 receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19179
19180 if (!parser->recovering) {
19181 accept1(parser, PM_TOKEN_NEWLINE);
19182 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19183 arguments.closing_loc = TOK2LOC(parser, &parser->previous);
19184 }
19185 }
19186 } else {
19187 receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19188 }
19189
19190 return UP(pm_call_node_not_create(parser, receiver, &message, &arguments));
19191 }
19192 case PM_TOKEN_KEYWORD_UNLESS: {
19193 size_t opening_newline_index = token_newline_index(parser);
19194 parser_lex(parser);
19195
19196 return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
19197 }
19198 case PM_TOKEN_KEYWORD_MODULE: {
19199 pm_node_list_t current_block_exits = { 0 };
19200 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19201
19202 size_t opening_newline_index = token_newline_index(parser);
19203 parser_lex(parser);
19204 pm_token_t module_keyword = parser->previous;
19205
19206 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
19207 pm_token_t name;
19208
19209 // If we can recover from a syntax error that occurred while parsing
19210 // the name of the module, then we'll handle that here.
19211 if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) {
19212 pop_block_exits(parser, previous_block_exits);
19213
19214 pm_token_t missing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
19215 return UP(pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing));
19216 }
19217
19218 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
19219 pm_token_t double_colon = parser->previous;
19220
19221 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
19222 constant_path = UP(pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous));
19223 }
19224
19225 // Here we retrieve the name of the module. If it wasn't a constant,
19226 // then it's possible that `module foo` was passed, which is a
19227 // syntax error. We handle that here as well.
19228 name = parser->previous;
19229 if (name.type != PM_TOKEN_CONSTANT) {
19230 pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
19231 }
19232
19233 pm_parser_scope_push(parser, true);
19234 accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
19235 pm_node_t *statements = NULL;
19236
19237 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19238 pm_accepts_block_stack_push(parser, true);
19239 statements = UP(parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1)));
19240 pm_accepts_block_stack_pop(parser);
19241 }
19242
19243 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
19244 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19245 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1)));
19246 } else {
19247 parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
19248 }
19249
19250 pm_constant_id_list_t locals;
19251 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19252
19253 pm_parser_scope_pop(parser);
19254 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM, &module_keyword);
19255
19256 if (context_def_p(parser)) {
19257 pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
19258 }
19259
19260 pop_block_exits(parser, previous_block_exits);
19261
19262 return UP(pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous));
19263 }
19264 case PM_TOKEN_KEYWORD_NIL:
19265 parser_lex(parser);
19266 return UP(pm_nil_node_create(parser, &parser->previous));
19267 case PM_TOKEN_KEYWORD_REDO: {
19268 parser_lex(parser);
19269
19270 pm_node_t *node = UP(pm_redo_node_create(parser, &parser->previous));
19271 if (!parser->partial_script) parse_block_exit(parser, node);
19272
19273 return node;
19274 }
19275 case PM_TOKEN_KEYWORD_RETRY: {
19276 parser_lex(parser);
19277
19278 pm_node_t *node = UP(pm_retry_node_create(parser, &parser->previous));
19279 parse_retry(parser, node);
19280
19281 return node;
19282 }
19283 case PM_TOKEN_KEYWORD_SELF:
19284 parser_lex(parser);
19285 return UP(pm_self_node_create(parser, &parser->previous));
19286 case PM_TOKEN_KEYWORD_TRUE:
19287 parser_lex(parser);
19288 return UP(pm_true_node_create(parser, &parser->previous));
19289 case PM_TOKEN_KEYWORD_UNTIL: {
19290 size_t opening_newline_index = token_newline_index(parser);
19291
19292 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19293 pm_do_loop_stack_push(parser, true);
19294
19295 parser_lex(parser);
19296 pm_token_t keyword = parser->previous;
19297 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
19298
19299 pm_do_loop_stack_pop(parser);
19300 context_pop(parser);
19301
19302 pm_token_t do_keyword = { 0 };
19303 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19304 do_keyword = parser->previous;
19305 } else {
19306 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19307 }
19308
19309 pm_statements_node_t *statements = NULL;
19310 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19311 pm_accepts_block_stack_push(parser, true);
19312 statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
19313 pm_accepts_block_stack_pop(parser);
19314 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19315 }
19316
19317 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19318 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM, &keyword);
19319
19320 return UP(pm_until_node_create(parser, &keyword, NTOK2PTR(do_keyword), &parser->previous, predicate, statements, 0));
19321 }
19322 case PM_TOKEN_KEYWORD_WHILE: {
19323 size_t opening_newline_index = token_newline_index(parser);
19324
19325 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19326 pm_do_loop_stack_push(parser, true);
19327
19328 parser_lex(parser);
19329 pm_token_t keyword = parser->previous;
19330 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
19331
19332 pm_do_loop_stack_pop(parser);
19333 context_pop(parser);
19334
19335 pm_token_t do_keyword = { 0 };
19336 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19337 do_keyword = parser->previous;
19338 } else {
19339 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19340 }
19341
19342 pm_statements_node_t *statements = NULL;
19343 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19344 pm_accepts_block_stack_push(parser, true);
19345 statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
19346 pm_accepts_block_stack_pop(parser);
19347 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19348 }
19349
19350 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19351 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM, &keyword);
19352
19353 return UP(pm_while_node_create(parser, &keyword, NTOK2PTR(do_keyword), &parser->previous, predicate, statements, 0));
19354 }
19355 case PM_TOKEN_PERCENT_LOWER_I: {
19356 parser_lex(parser);
19357 pm_token_t opening = parser->previous;
19358 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19359 pm_node_t *current = NULL;
19360
19361 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19362 accept1(parser, PM_TOKEN_WORDS_SEP);
19363 if (match1(parser, PM_TOKEN_STRING_END)) break;
19364
19365 // Interpolation is not possible but nested heredocs can still lead to
19366 // consecutive (disjoint) string tokens when the final newline is escaped.
19367 while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19368 // Record the string node, moving to interpolation if needed.
19369 if (current == NULL) {
19370 current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL));
19371 parser_lex(parser);
19372 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19373 pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
19374 parser_lex(parser);
19375 pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, string);
19376 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19377 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
19378 pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = parser->start + cast->value_loc.start, .end = parser->start + cast->value_loc.start + cast->value_loc.length };
19379 pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped));
19380 pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL));
19381 parser_lex(parser);
19382
19383 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
19384 pm_interpolated_symbol_node_append(parser->arena, interpolated, first_string);
19385 pm_interpolated_symbol_node_append(parser->arena, interpolated, second_string);
19386
19387 // current is arena-allocated so no explicit free is needed.
19388 current = UP(interpolated);
19389 } else {
19390 assert(false && "unreachable");
19391 }
19392 }
19393
19394 if (current) {
19395 pm_array_node_elements_append(parser->arena, array, current);
19396 current = NULL;
19397 } else {
19398 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
19399 }
19400 }
19401
19402 pm_token_t closing = parser->current;
19403 if (match1(parser, PM_TOKEN_EOF)) {
19404 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
19405 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
19406 } else {
19407 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
19408 }
19409 pm_array_node_close_set(parser, array, &closing);
19410
19411 return UP(array);
19412 }
19413 case PM_TOKEN_PERCENT_UPPER_I: {
19414 parser_lex(parser);
19415 pm_token_t opening = parser->previous;
19416 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19417
19418 // This is the current node that we are parsing that will be added to the
19419 // list of elements.
19420 pm_node_t *current = NULL;
19421
19422 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19423 switch (parser->current.type) {
19424 case PM_TOKEN_WORDS_SEP: {
19425 if (current == NULL) {
19426 // If we hit a separator before we have any content, then we don't
19427 // need to do anything.
19428 } else {
19429 // If we hit a separator after we've hit content, then we need to
19430 // append that content to the list and reset the current node.
19431 pm_array_node_elements_append(parser->arena, array, current);
19432 current = NULL;
19433 }
19434
19435 parser_lex(parser);
19436 break;
19437 }
19438 case PM_TOKEN_STRING_CONTENT: {
19439 if (current == NULL) {
19440 // If we hit content and the current node is NULL, then this is
19441 // the first string content we've seen. In that case we're going
19442 // to create a new string node and set that to the current.
19443 current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL));
19444 parser_lex(parser);
19445 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19446 // If we hit string content and the current node is an
19447 // interpolated string, then we need to append the string content
19448 // to the list of child nodes.
19449 pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
19450 parser_lex(parser);
19451
19452 pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, string);
19453 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19454 // If we hit string content and the current node is a symbol node,
19455 // then we need to convert the current node into an interpolated
19456 // string and add the string content to the list of child nodes.
19457 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
19458 pm_token_t content = {
19459 .type = PM_TOKEN_STRING_CONTENT,
19460 .start = parser->start + cast->value_loc.start,
19461 .end = parser->start + cast->value_loc.start + cast->value_loc.length
19462 };
19463
19464 pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped));
19465 pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL));
19466 parser_lex(parser);
19467
19468 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
19469 pm_interpolated_symbol_node_append(parser->arena, interpolated, first_string);
19470 pm_interpolated_symbol_node_append(parser->arena, interpolated, second_string);
19471
19472 // current is arena-allocated so no explicit free is needed.
19473 current = UP(interpolated);
19474 } else {
19475 assert(false && "unreachable");
19476 }
19477
19478 break;
19479 }
19480 case PM_TOKEN_EMBVAR: {
19481 bool start_location_set = false;
19482 if (current == NULL) {
19483 // If we hit an embedded variable and the current node is NULL,
19484 // then this is the start of a new string. We'll set the current
19485 // node to a new interpolated string.
19486 current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL));
19487 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19488 // If we hit an embedded variable and the current node is a string
19489 // node, then we'll convert the current into an interpolated
19490 // string and add the string node to the list of parts.
19491 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
19492
19493 current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
19494 pm_interpolated_symbol_node_append(parser->arena, interpolated, current);
19495 PM_NODE_START_SET_NODE(interpolated, current);
19496 start_location_set = true;
19497 current = UP(interpolated);
19498 } else {
19499 // If we hit an embedded variable and the current node is an
19500 // interpolated string, then we'll just add the embedded variable.
19501 }
19502
19503 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19504 pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, part);
19505 if (!start_location_set) {
19506 PM_NODE_START_SET_NODE(current, part);
19507 }
19508 break;
19509 }
19510 case PM_TOKEN_EMBEXPR_BEGIN: {
19511 bool start_location_set = false;
19512 if (current == NULL) {
19513 // If we hit an embedded expression and the current node is NULL,
19514 // then this is the start of a new string. We'll set the current
19515 // node to a new interpolated string.
19516 current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL));
19517 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19518 // If we hit an embedded expression and the current node is a
19519 // string node, then we'll convert the current into an
19520 // interpolated string and add the string node to the list of
19521 // parts.
19522 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
19523
19524 current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
19525 pm_interpolated_symbol_node_append(parser->arena, interpolated, current);
19526 PM_NODE_START_SET_NODE(interpolated, current);
19527 start_location_set = true;
19528 current = UP(interpolated);
19529 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19530 // If we hit an embedded expression and the current node is an
19531 // interpolated string, then we'll just continue on.
19532 } else {
19533 assert(false && "unreachable");
19534 }
19535
19536 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19537 pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, part);
19538 if (!start_location_set) {
19539 PM_NODE_START_SET_NODE(current, part);
19540 }
19541 break;
19542 }
19543 default:
19544 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
19545 parser_lex(parser);
19546 break;
19547 }
19548 }
19549
19550 // If we have a current node, then we need to append it to the list.
19551 if (current) {
19552 pm_array_node_elements_append(parser->arena, array, current);
19553 }
19554
19555 pm_token_t closing = parser->current;
19556 if (match1(parser, PM_TOKEN_EOF)) {
19557 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
19558 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
19559 } else {
19560 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
19561 }
19562 pm_array_node_close_set(parser, array, &closing);
19563
19564 return UP(array);
19565 }
19566 case PM_TOKEN_PERCENT_LOWER_W: {
19567 parser_lex(parser);
19568 pm_token_t opening = parser->previous;
19569 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19570 pm_node_t *current = NULL;
19571
19572 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19573 accept1(parser, PM_TOKEN_WORDS_SEP);
19574 if (match1(parser, PM_TOKEN_STRING_END)) break;
19575
19576 // Interpolation is not possible but nested heredocs can still lead to
19577 // consecutive (disjoint) string tokens when the final newline is escaped.
19578 while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19579 pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
19580
19581 // Record the string node, moving to interpolation if needed.
19582 if (current == NULL) {
19583 current = string;
19584 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
19585 pm_interpolated_string_node_append(parser->arena, (pm_interpolated_string_node_t *) current, string);
19586 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19587 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
19588 pm_interpolated_string_node_append(parser->arena, interpolated, current);
19589 pm_interpolated_string_node_append(parser->arena, interpolated, string);
19590 current = UP(interpolated);
19591 } else {
19592 assert(false && "unreachable");
19593 }
19594 parser_lex(parser);
19595 }
19596
19597 if (current) {
19598 pm_array_node_elements_append(parser->arena, array, current);
19599 current = NULL;
19600 } else {
19601 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
19602 }
19603 }
19604
19605 pm_token_t closing = parser->current;
19606 if (match1(parser, PM_TOKEN_EOF)) {
19607 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
19608 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
19609 } else {
19610 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
19611 }
19612
19613 pm_array_node_close_set(parser, array, &closing);
19614 return UP(array);
19615 }
19616 case PM_TOKEN_PERCENT_UPPER_W: {
19617 parser_lex(parser);
19618 pm_token_t opening = parser->previous;
19619 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19620
19621 // This is the current node that we are parsing that will be added
19622 // to the list of elements.
19623 pm_node_t *current = NULL;
19624
19625 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19626 switch (parser->current.type) {
19627 case PM_TOKEN_WORDS_SEP: {
19628 // Reset the explicit encoding if we hit a separator
19629 // since each element can have its own encoding.
19630 parser->explicit_encoding = NULL;
19631
19632 if (current == NULL) {
19633 // If we hit a separator before we have any content,
19634 // then we don't need to do anything.
19635 } else {
19636 // If we hit a separator after we've hit content,
19637 // then we need to append that content to the list
19638 // and reset the current node.
19639 pm_array_node_elements_append(parser->arena, array, current);
19640 current = NULL;
19641 }
19642
19643 parser_lex(parser);
19644 break;
19645 }
19646 case PM_TOKEN_STRING_CONTENT: {
19647 pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
19648 pm_node_flag_set(string, parse_unescaped_encoding(parser));
19649 parser_lex(parser);
19650
19651 if (current == NULL) {
19652 // If we hit content and the current node is NULL,
19653 // then this is the first string content we've seen.
19654 // In that case we're going to create a new string
19655 // node and set that to the current.
19656 current = string;
19657 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
19658 // If we hit string content and the current node is
19659 // an interpolated string, then we need to append
19660 // the string content to the list of child nodes.
19661 pm_interpolated_string_node_append(parser->arena, (pm_interpolated_string_node_t *) current, string);
19662 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19663 // If we hit string content and the current node is
19664 // a string node, then we need to convert the
19665 // current node into an interpolated string and add
19666 // the string content to the list of child nodes.
19667 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
19668 pm_interpolated_string_node_append(parser->arena, interpolated, current);
19669 pm_interpolated_string_node_append(parser->arena, interpolated, string);
19670 current = UP(interpolated);
19671 } else {
19672 assert(false && "unreachable");
19673 }
19674
19675 break;
19676 }
19677 case PM_TOKEN_EMBVAR: {
19678 if (current == NULL) {
19679 // If we hit an embedded variable and the current
19680 // node is NULL, then this is the start of a new
19681 // string. We'll set the current node to a new
19682 // interpolated string.
19683 current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL));
19684 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19685 // If we hit an embedded variable and the current
19686 // node is a string node, then we'll convert the
19687 // current into an interpolated string and add the
19688 // string node to the list of parts.
19689 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
19690 pm_interpolated_string_node_append(parser->arena, interpolated, current);
19691 current = UP(interpolated);
19692 } else {
19693 // If we hit an embedded variable and the current
19694 // node is an interpolated string, then we'll just
19695 // add the embedded variable.
19696 }
19697
19698 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19699 pm_interpolated_string_node_append(parser->arena, (pm_interpolated_string_node_t *) current, part);
19700 break;
19701 }
19702 case PM_TOKEN_EMBEXPR_BEGIN: {
19703 if (current == NULL) {
19704 // If we hit an embedded expression and the current
19705 // node is NULL, then this is the start of a new
19706 // string. We'll set the current node to a new
19707 // interpolated string.
19708 current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL));
19709 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19710 // If we hit an embedded expression and the current
19711 // node is a string node, then we'll convert the
19712 // current into an interpolated string and add the
19713 // string node to the list of parts.
19714 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
19715 pm_interpolated_string_node_append(parser->arena, interpolated, current);
19716 current = UP(interpolated);
19717 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
19718 // If we hit an embedded expression and the current
19719 // node is an interpolated string, then we'll just
19720 // continue on.
19721 } else {
19722 assert(false && "unreachable");
19723 }
19724
19725 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19726 pm_interpolated_string_node_append(parser->arena, (pm_interpolated_string_node_t *) current, part);
19727 break;
19728 }
19729 default:
19730 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
19731 parser_lex(parser);
19732 break;
19733 }
19734 }
19735
19736 // If we have a current node, then we need to append it to the list.
19737 if (current) {
19738 pm_array_node_elements_append(parser->arena, array, current);
19739 }
19740
19741 pm_token_t closing = parser->current;
19742 if (match1(parser, PM_TOKEN_EOF)) {
19743 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
19744 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
19745 } else {
19746 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
19747 }
19748
19749 pm_array_node_close_set(parser, array, &closing);
19750 return UP(array);
19751 }
19752 case PM_TOKEN_REGEXP_BEGIN: {
19753 pm_token_t opening = parser->current;
19754 parser_lex(parser);
19755
19756 if (match1(parser, PM_TOKEN_REGEXP_END)) {
19757 // If we get here, then we have an end immediately after a start. In
19758 // that case we'll create an empty content token and return an
19759 // uninterpolated regular expression.
19760 pm_token_t content = (pm_token_t) {
19761 .type = PM_TOKEN_STRING_CONTENT,
19762 .start = parser->previous.end,
19763 .end = parser->previous.end
19764 };
19765
19766 parser_lex(parser);
19767
19768 pm_node_t *node = UP(pm_regular_expression_node_create(parser, &opening, &content, &parser->previous));
19769 pm_node_flag_set(node, PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING);
19770
19771 return node;
19772 }
19773
19775
19776 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19777 // In this case we've hit string content so we know the regular
19778 // expression at least has something in it. We'll need to check if the
19779 // following token is the end (in which case we can return a plain
19780 // regular expression) or if it's not then it has interpolation.
19781 pm_string_t unescaped = parser->current_string;
19782 pm_token_t content = parser->current;
19783 bool ascii_only = parser->current_regular_expression_ascii_only;
19784 parser_lex(parser);
19785
19786 // If we hit an end, then we can create a regular expression
19787 // node without interpolation, which can be represented more
19788 // succinctly and more easily compiled.
19789 if (accept1(parser, PM_TOKEN_REGEXP_END)) {
19790 pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
19791
19792 // If we're not immediately followed by a =~, then we want
19793 // to parse all of the errors at this point. If it is
19794 // followed by a =~, then it will get parsed higher up while
19795 // parsing the named captures as well.
19796 if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
19797 parse_regular_expression_errors(parser, node);
19798 }
19799
19800 pm_node_flag_set(UP(node), parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, FL(node)));
19801 return UP(node);
19802 }
19803
19804 // If we get here, then we have interpolation so we'll need to create
19805 // a regular expression node with interpolation.
19806 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
19807
19808 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
19809 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
19810 // This is extremely strange, but the first string part of a
19811 // regular expression will always be tagged as binary if we
19812 // are in a US-ASCII file, no matter its contents.
19813 pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
19814 }
19815
19816 pm_interpolated_regular_expression_node_append(parser->arena, interpolated, part);
19817 } else {
19818 // If the first part of the body of the regular expression is not a
19819 // string content, then we have interpolation and we need to create an
19820 // interpolated regular expression node.
19821 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
19822 }
19823
19824 // Now that we're here and we have interpolation, we'll parse all of the
19825 // parts into the list.
19826 pm_node_t *part;
19827 while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
19828 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
19829 pm_interpolated_regular_expression_node_append(parser->arena, interpolated, part);
19830 }
19831 }
19832
19833 pm_token_t closing = parser->current;
19834 if (match1(parser, PM_TOKEN_EOF)) {
19835 pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
19836 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
19837 } else {
19838 expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
19839 }
19840
19841 pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
19842 return UP(interpolated);
19843 }
19844 case PM_TOKEN_BACKTICK:
19845 case PM_TOKEN_PERCENT_LOWER_X: {
19846 parser_lex(parser);
19847 pm_token_t opening = parser->previous;
19848
19849 // When we get here, we don't know if this string is going to have
19850 // interpolation or not, even though it is allowed. Still, we want to be
19851 // able to return a string node without interpolation if we can since
19852 // it'll be faster.
19853 if (match1(parser, PM_TOKEN_STRING_END)) {
19854 // If we get here, then we have an end immediately after a start. In
19855 // that case we'll create an empty content token and return an
19856 // uninterpolated string.
19857 pm_token_t content = (pm_token_t) {
19858 .type = PM_TOKEN_STRING_CONTENT,
19859 .start = parser->previous.end,
19860 .end = parser->previous.end
19861 };
19862
19863 parser_lex(parser);
19864 return UP(pm_xstring_node_create(parser, &opening, &content, &parser->previous));
19865 }
19866
19868
19869 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19870 // In this case we've hit string content so we know the string
19871 // at least has something in it. We'll need to check if the
19872 // following token is the end (in which case we can return a
19873 // plain string) or if it's not then it has interpolation.
19874 pm_string_t unescaped = parser->current_string;
19875 pm_token_t content = parser->current;
19876 parser_lex(parser);
19877
19878 if (match1(parser, PM_TOKEN_STRING_END)) {
19879 pm_node_t *node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
19880 pm_node_flag_set(node, parse_unescaped_encoding(parser));
19881 parser_lex(parser);
19882 return node;
19883 }
19884
19885 // If we get here, then we have interpolation so we'll need to
19886 // create a string node with interpolation.
19887 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
19888
19889 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
19890 pm_node_flag_set(part, parse_unescaped_encoding(parser));
19891
19892 pm_interpolated_xstring_node_append(parser->arena, node, part);
19893 } else {
19894 // If the first part of the body of the string is not a string
19895 // content, then we have interpolation and we need to create an
19896 // interpolated string node.
19897 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
19898 }
19899
19900 pm_node_t *part;
19901 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19902 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
19903 pm_interpolated_xstring_node_append(parser->arena, node, part);
19904 }
19905 }
19906
19907 pm_token_t closing = parser->current;
19908 if (match1(parser, PM_TOKEN_EOF)) {
19909 pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
19910 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
19911 } else {
19912 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
19913 }
19914 pm_interpolated_xstring_node_closing_set(parser, node, &closing);
19915
19916 return UP(node);
19917 }
19918 case PM_TOKEN_USTAR: {
19919 parser_lex(parser);
19920
19921 // * operators at the beginning of expressions are only valid in the
19922 // context of a multiple assignment. We enforce that here. We'll
19923 // still lex past it though and create a missing node place.
19924 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19925 pm_parser_err_prefix(parser, diag_id);
19926 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
19927 }
19928
19929 pm_token_t operator = parser->previous;
19930 pm_node_t *name = NULL;
19931
19932 if (token_begins_expression_p(parser->current.type)) {
19933 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19934 }
19935
19936 pm_node_t *splat = UP(pm_splat_node_create(parser, &operator, name));
19937
19938 if (match1(parser, PM_TOKEN_COMMA)) {
19939 return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19940 } else {
19941 return parse_target_validate(parser, splat, true);
19942 }
19943 }
19944 case PM_TOKEN_BANG: {
19945 if (binding_power > PM_BINDING_POWER_UNARY) {
19946 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
19947 }
19948
19949 parser_lex(parser);
19950
19951 pm_token_t operator = parser->previous;
19952 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
19953 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
19954
19955 pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
19956 return UP(node);
19957 }
19958 case PM_TOKEN_TILDE: {
19959 if (binding_power > PM_BINDING_POWER_UNARY) {
19960 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
19961 }
19962 parser_lex(parser);
19963
19964 pm_token_t operator = parser->previous;
19965 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
19966 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
19967
19968 return UP(node);
19969 }
19970 case PM_TOKEN_UMINUS: {
19971 if (binding_power > PM_BINDING_POWER_UNARY) {
19972 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
19973 }
19974 parser_lex(parser);
19975
19976 pm_token_t operator = parser->previous;
19977 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
19978 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
19979
19980 return UP(node);
19981 }
19982 case PM_TOKEN_UMINUS_NUM: {
19983 parser_lex(parser);
19984
19985 pm_token_t operator = parser->previous;
19986 pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
19987
19988 if (accept1(parser, PM_TOKEN_STAR_STAR)) {
19989 pm_token_t exponent_operator = parser->previous;
19990 pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
19991 node = UP(pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0));
19992 node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
19993 } else {
19994 switch (PM_NODE_TYPE(node)) {
19995 case PM_INTEGER_NODE:
19996 case PM_FLOAT_NODE:
19997 case PM_RATIONAL_NODE:
19998 case PM_IMAGINARY_NODE:
19999 parse_negative_numeric(node);
20000 break;
20001 default:
20002 node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
20003 break;
20004 }
20005 }
20006
20007 return node;
20008 }
20009 case PM_TOKEN_MINUS_GREATER: {
20010 int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting;
20012
20013 size_t opening_newline_index = token_newline_index(parser);
20014 pm_accepts_block_stack_push(parser, true);
20015 parser_lex(parser);
20016
20017 pm_token_t operator = parser->previous;
20018 pm_parser_scope_push(parser, false);
20019
20020 pm_block_parameters_node_t *block_parameters;
20021
20022 switch (parser->current.type) {
20023 case PM_TOKEN_PARENTHESIS_LEFT: {
20024 pm_token_t opening = parser->current;
20025 parser_lex(parser);
20026
20027 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
20028 block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
20029 } else {
20030 block_parameters = parse_block_parameters(parser, false, &opening, true, true, (uint16_t) (depth + 1));
20031 }
20032
20033 accept1(parser, PM_TOKEN_NEWLINE);
20034 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
20035
20036 pm_block_parameters_node_closing_set(parser, block_parameters, &parser->previous);
20037 break;
20038 }
20039 case PM_CASE_PARAMETER: {
20040 pm_accepts_block_stack_push(parser, false);
20041 block_parameters = parse_block_parameters(parser, false, NULL, true, false, (uint16_t) (depth + 1));
20042 pm_accepts_block_stack_pop(parser);
20043 break;
20044 }
20045 default: {
20046 block_parameters = NULL;
20047 break;
20048 }
20049 }
20050
20051 pm_token_t opening;
20052 pm_node_t *body = NULL;
20053 parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
20054
20055 if (accept1(parser, PM_TOKEN_LAMBDA_BEGIN)) {
20056 opening = parser->previous;
20057
20058 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
20059 body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1)));
20060 }
20061
20062 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20063 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE, &opening);
20064 } else {
20065 expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
20066 opening = parser->previous;
20067
20068 if (!match3(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20069 pm_accepts_block_stack_push(parser, true);
20070 body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1)));
20071 pm_accepts_block_stack_pop(parser);
20072 }
20073
20074 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20075 assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
20076 body = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1)));
20077 } else {
20078 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20079 }
20080
20081 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END, &operator);
20082 }
20083
20084 pm_constant_id_list_t locals;
20085 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
20086 pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &operator, &parser->previous);
20087
20088 pm_parser_scope_pop(parser);
20089 pm_accepts_block_stack_pop(parser);
20090
20091 return UP(pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body));
20092 }
20093 case PM_TOKEN_UPLUS: {
20094 if (binding_power > PM_BINDING_POWER_UNARY) {
20095 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20096 }
20097 parser_lex(parser);
20098
20099 pm_token_t operator = parser->previous;
20100 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20101 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
20102
20103 return UP(node);
20104 }
20105 case PM_TOKEN_STRING_BEGIN:
20106 return parse_strings(parser, NULL, accepts_label, (uint16_t) (depth + 1));
20107 case PM_TOKEN_SYMBOL_BEGIN: {
20108 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
20109 parser_lex(parser);
20110
20111 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END, (uint16_t) (depth + 1));
20112 }
20113 default: {
20114 pm_context_t recoverable = context_recoverable(parser, &parser->current);
20115
20116 if (recoverable != PM_CONTEXT_NONE) {
20117 parser->recovering = true;
20118
20119 // If the given error is not the generic one, then we'll add it
20120 // here because it will provide more context in addition to the
20121 // recoverable error that we will also add.
20122 if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
20123 pm_parser_err_prefix(parser, diag_id);
20124 }
20125
20126 // If we get here, then we are assuming this token is closing a
20127 // parent context, so we'll indicate that to the user so that
20128 // they know how we behaved.
20129 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
20130 } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
20131 // We're going to make a special case here, because "cannot
20132 // parse expression" is pretty generic, and we know here that we
20133 // have an unexpected token.
20134 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
20135 } else {
20136 pm_parser_err_prefix(parser, diag_id);
20137 }
20138
20139 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
20140 }
20141 }
20142}
20143
20153static pm_node_t *
20154parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20155 pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, false, diag_id, (uint16_t) (depth + 1));
20156
20157 // Contradicting binding powers, the right-hand-side value of the assignment
20158 // allows the `rescue` modifier.
20159 if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20160 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20161
20162 pm_token_t rescue = parser->current;
20163 parser_lex(parser);
20164
20165 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20166 context_pop(parser);
20167
20168 return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
20169 }
20170
20171 return value;
20172}
20173
20178static void
20179parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
20180 switch (PM_NODE_TYPE(node)) {
20181 case PM_BEGIN_NODE: {
20182 const pm_begin_node_t *cast = (const pm_begin_node_t *) node;
20183 if (cast->statements != NULL) parse_assignment_value_local(parser, (const pm_node_t *) cast->statements);
20184 break;
20185 }
20186 case PM_LOCAL_VARIABLE_WRITE_NODE: {
20188 pm_locals_read(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
20189 break;
20190 }
20191 case PM_PARENTHESES_NODE: {
20192 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
20193 if (cast->body != NULL) parse_assignment_value_local(parser, cast->body);
20194 break;
20195 }
20196 case PM_STATEMENTS_NODE: {
20197 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
20198 const pm_node_t *statement;
20199
20200 PM_NODE_LIST_FOREACH(&cast->body, index, statement) {
20201 parse_assignment_value_local(parser, statement);
20202 }
20203 break;
20204 }
20205 default:
20206 break;
20207 }
20208}
20209
20222static pm_node_t *
20223parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20224 bool permitted = true;
20225 if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
20226
20227 pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MODIFIER, diag_id, (uint16_t) (depth + 1));
20228 if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
20229
20230 parse_assignment_value_local(parser, value);
20231 bool single_value = true;
20232
20233 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
20234 single_value = false;
20235
20236 pm_array_node_t *array = pm_array_node_create(parser, NULL);
20237 pm_array_node_elements_append(parser->arena, array, value);
20238 value = UP(array);
20239
20240 while (accept1(parser, PM_TOKEN_COMMA)) {
20241 pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
20242
20243 pm_array_node_elements_append(parser->arena, array, element);
20244 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
20245
20246 parse_assignment_value_local(parser, element);
20247 }
20248 }
20249
20250 // Contradicting binding powers, the right-hand-side value of the assignment
20251 // allows the `rescue` modifier.
20252 if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20253 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20254
20255 pm_token_t rescue = parser->current;
20256 parser_lex(parser);
20257
20258 bool accepts_command_call_inner = false;
20259
20260 // RHS can accept command call iff the value is a call with arguments
20261 // but without parenthesis.
20262 if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
20263 pm_call_node_t *call_node = (pm_call_node_t *) value;
20264 if ((call_node->arguments != NULL) && (call_node->opening_loc.length == 0)) {
20265 accepts_command_call_inner = true;
20266 }
20267 }
20268
20269 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20270 context_pop(parser);
20271
20272 return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
20273 }
20274
20275 return value;
20276}
20277
20285static void
20286parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
20287 if (call_node->arguments != NULL) {
20288 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
20289 pm_node_unreference(parser, UP(call_node->arguments));
20290 call_node->arguments = NULL;
20291 }
20292
20293 if (call_node->block != NULL) {
20294 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
20295 pm_node_unreference(parser, UP(call_node->block));
20296 call_node->block = NULL;
20297 }
20298}
20299
20324
20325static inline const uint8_t *
20326pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20327 cursor++;
20328
20329 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20330 uint8_t value = escape_hexadecimal_digit(*cursor);
20331 cursor++;
20332
20333 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20334 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(*cursor));
20335 cursor++;
20336 }
20337
20338 pm_buffer_append_byte(unescaped, value);
20339 } else {
20340 pm_buffer_append_string(unescaped, "\\x", 2);
20341 }
20342
20343 return cursor;
20344}
20345
20346static inline const uint8_t *
20347pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20348 uint8_t value = (uint8_t) (*cursor - '0');
20349 cursor++;
20350
20351 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20352 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20353 cursor++;
20354
20355 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20356 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20357 cursor++;
20358 }
20359 }
20360
20361 pm_buffer_append_byte(unescaped, value);
20362 return cursor;
20363}
20364
20365static inline const uint8_t *
20366pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end, const pm_location_t *error_location) {
20367 const uint8_t *start = cursor - 1;
20368 cursor++;
20369
20370 if (cursor >= end) {
20371 pm_buffer_append_string(unescaped, "\\u", 2);
20372 return cursor;
20373 }
20374
20375 if (*cursor != '{') {
20376 size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
20377 uint32_t value = escape_unicode(parser, cursor, length, error_location);
20378
20379 if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
20380 pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start));
20381 }
20382
20383 return cursor + length;
20384 }
20385
20386 cursor++;
20387 for (;;) {
20388 while (cursor < end && *cursor == ' ') cursor++;
20389
20390 if (cursor >= end) break;
20391 if (*cursor == '}') {
20392 cursor++;
20393 break;
20394 }
20395
20396 size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor);
20397 if (length == 0) {
20398 break;
20399 }
20400 uint32_t value = escape_unicode(parser, cursor, length, error_location);
20401
20402 (void) pm_buffer_append_unicode_codepoint(unescaped, value);
20403 cursor += length;
20404 }
20405
20406 return cursor;
20407}
20408
20409static void
20410pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor, const pm_location_t *error_location) {
20411 const uint8_t *end = source + length;
20412 pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source));
20413
20414 for (;;) {
20415 if (++cursor >= end) {
20416 pm_buffer_append_byte(unescaped, '\\');
20417 return;
20418 }
20419
20420 switch (*cursor) {
20421 case 'x':
20422 cursor = pm_named_capture_escape_hex(unescaped, cursor, end);
20423 break;
20424 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
20425 cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
20426 break;
20427 case 'u':
20428 cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end, error_location);
20429 break;
20430 default:
20431 pm_buffer_append_byte(unescaped, '\\');
20432 break;
20433 }
20434
20435 const uint8_t *next_cursor = pm_memchr(cursor, '\\', (size_t) (end - cursor), parser->encoding_changed, parser->encoding);
20436 if (next_cursor == NULL) break;
20437
20438 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (next_cursor - cursor));
20439 cursor = next_cursor;
20440 }
20441
20442 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (end - cursor));
20443}
20444
20449static void
20450parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
20452
20453 pm_parser_t *parser = callback_data->parser;
20454 pm_call_node_t *call = callback_data->call;
20455 pm_constant_id_list_t *names = &callback_data->names;
20456
20457 const uint8_t *source = pm_string_source(capture);
20458 size_t length = pm_string_length(capture);
20459 pm_buffer_t unescaped = { 0 };
20460
20461 // First, we need to handle escapes within the name of the capture group.
20462 // This is because regular expressions have three different representations
20463 // in prism. The first is the plain source code. The second is the
20464 // representation that will be sent to the regular expression engine, which
20465 // is the value of the "unescaped" field. This is poorly named, because it
20466 // actually still contains escapes, just a subset of them that the regular
20467 // expression engine knows how to handle. The third representation is fully
20468 // unescaped, which is what we need.
20469 const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding);
20470 if (PRISM_UNLIKELY(cursor != NULL)) {
20471 pm_named_capture_escape(parser, &unescaped, source, length, cursor, callback_data->shared ? NULL : &call->receiver->location);
20472 source = (const uint8_t *) pm_buffer_value(&unescaped);
20473 length = pm_buffer_length(&unescaped);
20474 }
20475
20476 const uint8_t *start;
20477 const uint8_t *end;
20478 pm_constant_id_t name;
20479
20480 // If the name of the capture group isn't a valid identifier, we do
20481 // not add it to the local table.
20482 if (!pm_slice_is_valid_local(parser, source, source + length)) {
20483 pm_buffer_free(&unescaped);
20484 return;
20485 }
20486
20487 if (callback_data->shared) {
20488 // If the unescaped string is a slice of the source, then we can
20489 // copy the names directly. The pointers will line up.
20490 start = source;
20491 end = source + length;
20492 name = pm_parser_constant_id_raw(parser, start, end);
20493 } else {
20494 // Otherwise, the name is a slice of the malloc-ed owned string,
20495 // in which case we need to copy it out into a new string.
20496 start = parser->start + PM_NODE_START(call->receiver);
20497 end = parser->start + PM_NODE_END(call->receiver);
20498
20499 void *memory = xmalloc(length);
20500 if (memory == NULL) abort();
20501
20502 memcpy(memory, source, length);
20503 name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
20504 }
20505
20506 // Add this name to the list of constants if it is valid, not duplicated,
20507 // and not a keyword.
20508 if (name != 0 && !pm_constant_id_list_includes(names, name)) {
20509 pm_constant_id_list_append(parser->arena, names, name);
20510
20511 int depth;
20512 if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
20513 // If the local is not already a local but it is a keyword, then we
20514 // do not want to add a capture for this.
20515 if (pm_local_is_keyword((const char *) source, length)) {
20516 pm_buffer_free(&unescaped);
20517 return;
20518 }
20519
20520 // If the identifier is not already a local, then we will add it to
20521 // the local table.
20522 pm_parser_local_add(parser, name, start, end, 0);
20523 }
20524
20525 // Here we lazily create the MatchWriteNode since we know we're
20526 // about to add a target.
20527 if (callback_data->match == NULL) {
20528 callback_data->match = pm_match_write_node_create(parser, call);
20529 }
20530
20531 // Next, create the local variable target and add it to the list of
20532 // targets for the match.
20533 pm_node_t *target = UP(pm_local_variable_target_node_create(parser, &TOK2LOC(parser, &((pm_token_t) { .type = 0, .start = start, .end = end })), name, depth == -1 ? 0 : (uint32_t) depth));
20534 pm_node_list_append(parser->arena, &callback_data->match->targets, target);
20535 }
20536
20537 pm_buffer_free(&unescaped);
20538}
20539
20544static pm_node_t *
20545parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
20547 .parser = parser,
20548 .call = call,
20549 .names = { 0 },
20550 .shared = content->type == PM_STRING_SHARED
20551 };
20552
20554 .parser = parser,
20555 .start = parser->start + PM_NODE_START(call->receiver),
20556 .end = parser->start + PM_NODE_END(call->receiver),
20557 .shared = content->type == PM_STRING_SHARED
20558 };
20559
20560 pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), extended_mode, parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
20561
20562 if (callback_data.match != NULL) {
20563 return UP(callback_data.match);
20564 } else {
20565 return UP(call);
20566 }
20567}
20568
20569static inline pm_node_t *
20570parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, uint16_t depth) {
20571 pm_token_t token = parser->current;
20572
20573 switch (token.type) {
20574 case PM_TOKEN_EQUAL: {
20575 switch (PM_NODE_TYPE(node)) {
20576 case PM_CALL_NODE: {
20577 // If we have no arguments to the call node and we need this
20578 // to be a target then this is either a method call or a
20579 // local variable write. This _must_ happen before the value
20580 // is parsed because it could be referenced in the value.
20581 pm_call_node_t *call_node = (pm_call_node_t *) node;
20582 if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20583 pm_parser_local_add_location(parser, &call_node->message_loc, 0);
20584 }
20585 }
20587 case PM_CASE_WRITABLE: {
20588 // When we have `it = value`, we need to add `it` as a local
20589 // variable before parsing the value, in case the value
20590 // references the variable.
20591 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
20592 pm_parser_local_add_location(parser, &node->location, 0);
20593 }
20594
20595 parser_lex(parser);
20596 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20597
20598 if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
20599 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
20600 }
20601
20602 return parse_write(parser, node, &token, value);
20603 }
20604 case PM_SPLAT_NODE: {
20605 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
20606 pm_multi_target_node_targets_append(parser, multi_target, node);
20607
20608 parser_lex(parser);
20609 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20610 return parse_write(parser, UP(multi_target), &token, value);
20611 }
20612 case PM_SOURCE_ENCODING_NODE:
20613 case PM_FALSE_NODE:
20614 case PM_SOURCE_FILE_NODE:
20615 case PM_SOURCE_LINE_NODE:
20616 case PM_NIL_NODE:
20617 case PM_SELF_NODE:
20618 case PM_TRUE_NODE: {
20619 // In these special cases, we have specific error messages
20620 // and we will replace them with local variable writes.
20621 parser_lex(parser);
20622 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20623 return parse_unwriteable_write(parser, node, &token, value);
20624 }
20625 default:
20626 // In this case we have an = sign, but we don't know what
20627 // it's for. We need to treat it as an error. We'll mark it
20628 // as an error and skip past it.
20629 parser_lex(parser);
20630 pm_parser_err_token(parser, &token, PM_ERR_EXPRESSION_NOT_WRITABLE);
20631 return node;
20632 }
20633 }
20634 case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL: {
20635 switch (PM_NODE_TYPE(node)) {
20636 case PM_BACK_REFERENCE_READ_NODE:
20637 case PM_NUMBERED_REFERENCE_READ_NODE:
20638 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20640 case PM_GLOBAL_VARIABLE_READ_NODE: {
20641 parser_lex(parser);
20642
20643 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20644 pm_node_t *result = UP(pm_global_variable_and_write_node_create(parser, node, &token, value));
20645
20646 return result;
20647 }
20648 case PM_CLASS_VARIABLE_READ_NODE: {
20649 parser_lex(parser);
20650
20651 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20652 pm_node_t *result = UP(pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
20653
20654 return result;
20655 }
20656 case PM_CONSTANT_PATH_NODE: {
20657 parser_lex(parser);
20658
20659 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20660 pm_node_t *write = UP(pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
20661
20662 return parse_shareable_constant_write(parser, write);
20663 }
20664 case PM_CONSTANT_READ_NODE: {
20665 parser_lex(parser);
20666
20667 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20668 pm_node_t *write = UP(pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
20669
20670 return parse_shareable_constant_write(parser, write);
20671 }
20672 case PM_INSTANCE_VARIABLE_READ_NODE: {
20673 parser_lex(parser);
20674
20675 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20676 pm_node_t *result = UP(pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
20677
20678 return result;
20679 }
20680 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
20681 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
20682 parser_lex(parser);
20683
20684 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20685 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0));
20686
20687 pm_node_unreference(parser, node);
20688 return result;
20689 }
20690 case PM_LOCAL_VARIABLE_READ_NODE: {
20691 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
20692 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.length, PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + node->location.start);
20693 pm_node_unreference(parser, node);
20694 }
20695
20697 parser_lex(parser);
20698
20699 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20700 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth));
20701
20702 return result;
20703 }
20704 case PM_CALL_NODE: {
20705 pm_call_node_t *cast = (pm_call_node_t *) node;
20706
20707 // If we have a vcall (a method with no arguments and no
20708 // receiver that could have been a local variable) then we
20709 // will transform it into a local variable write.
20710 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20711 pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
20712 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
20713 parser_lex(parser);
20714
20715 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20716 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
20717
20718 return result;
20719 }
20720
20721 // Move past the token here so that we have already added
20722 // the local variable by this point.
20723 parser_lex(parser);
20724
20725 // If there is no call operator and the message is "[]" then
20726 // this is an aref expression, and we can transform it into
20727 // an aset expression.
20728 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
20729 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20730 return UP(pm_index_and_write_node_create(parser, cast, &token, value));
20731 }
20732
20733 // If this node cannot be writable, then we have an error.
20734 if (pm_call_node_writable_p(parser, cast)) {
20735 parse_write_name(parser, &cast->name);
20736 } else {
20737 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
20738 }
20739
20740 parse_call_operator_write(parser, cast, &token);
20741 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20742 return UP(pm_call_and_write_node_create(parser, cast, &token, value));
20743 }
20744 case PM_MULTI_WRITE_NODE: {
20745 parser_lex(parser);
20746 pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
20747 return node;
20748 }
20749 default:
20750 parser_lex(parser);
20751
20752 // In this case we have an &&= sign, but we don't know what it's for.
20753 // We need to treat it as an error. For now, we'll mark it as an error
20754 // and just skip right past it.
20755 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
20756 return node;
20757 }
20758 }
20759 case PM_TOKEN_PIPE_PIPE_EQUAL: {
20760 switch (PM_NODE_TYPE(node)) {
20761 case PM_BACK_REFERENCE_READ_NODE:
20762 case PM_NUMBERED_REFERENCE_READ_NODE:
20763 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20765 case PM_GLOBAL_VARIABLE_READ_NODE: {
20766 parser_lex(parser);
20767
20768 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20769 pm_node_t *result = UP(pm_global_variable_or_write_node_create(parser, node, &token, value));
20770
20771 return result;
20772 }
20773 case PM_CLASS_VARIABLE_READ_NODE: {
20774 parser_lex(parser);
20775
20776 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20777 pm_node_t *result = UP(pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
20778
20779 return result;
20780 }
20781 case PM_CONSTANT_PATH_NODE: {
20782 parser_lex(parser);
20783
20784 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20785 pm_node_t *write = UP(pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
20786
20787 return parse_shareable_constant_write(parser, write);
20788 }
20789 case PM_CONSTANT_READ_NODE: {
20790 parser_lex(parser);
20791
20792 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20793 pm_node_t *write = UP(pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
20794
20795 return parse_shareable_constant_write(parser, write);
20796 }
20797 case PM_INSTANCE_VARIABLE_READ_NODE: {
20798 parser_lex(parser);
20799
20800 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20801 pm_node_t *result = UP(pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
20802
20803 return result;
20804 }
20805 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
20806 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
20807 parser_lex(parser);
20808
20809 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20810 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0));
20811
20812 pm_node_unreference(parser, node);
20813 return result;
20814 }
20815 case PM_LOCAL_VARIABLE_READ_NODE: {
20816 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
20817 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(node));
20818 pm_node_unreference(parser, node);
20819 }
20820
20822 parser_lex(parser);
20823
20824 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20825 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth));
20826
20827 return result;
20828 }
20829 case PM_CALL_NODE: {
20830 pm_call_node_t *cast = (pm_call_node_t *) node;
20831
20832 // If we have a vcall (a method with no arguments and no
20833 // receiver that could have been a local variable) then we
20834 // will transform it into a local variable write.
20835 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20836 pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
20837 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
20838 parser_lex(parser);
20839
20840 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20841 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
20842
20843 return result;
20844 }
20845
20846 // Move past the token here so that we have already added
20847 // the local variable by this point.
20848 parser_lex(parser);
20849
20850 // If there is no call operator and the message is "[]" then
20851 // this is an aref expression, and we can transform it into
20852 // an aset expression.
20853 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
20854 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20855 return UP(pm_index_or_write_node_create(parser, cast, &token, value));
20856 }
20857
20858 // If this node cannot be writable, then we have an error.
20859 if (pm_call_node_writable_p(parser, cast)) {
20860 parse_write_name(parser, &cast->name);
20861 } else {
20862 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
20863 }
20864
20865 parse_call_operator_write(parser, cast, &token);
20866 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20867 return UP(pm_call_or_write_node_create(parser, cast, &token, value));
20868 }
20869 case PM_MULTI_WRITE_NODE: {
20870 parser_lex(parser);
20871 pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
20872 return node;
20873 }
20874 default:
20875 parser_lex(parser);
20876
20877 // In this case we have an ||= sign, but we don't know what it's for.
20878 // We need to treat it as an error. For now, we'll mark it as an error
20879 // and just skip right past it.
20880 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
20881 return node;
20882 }
20883 }
20884 case PM_TOKEN_AMPERSAND_EQUAL:
20885 case PM_TOKEN_CARET_EQUAL:
20886 case PM_TOKEN_GREATER_GREATER_EQUAL:
20887 case PM_TOKEN_LESS_LESS_EQUAL:
20888 case PM_TOKEN_MINUS_EQUAL:
20889 case PM_TOKEN_PERCENT_EQUAL:
20890 case PM_TOKEN_PIPE_EQUAL:
20891 case PM_TOKEN_PLUS_EQUAL:
20892 case PM_TOKEN_SLASH_EQUAL:
20893 case PM_TOKEN_STAR_EQUAL:
20894 case PM_TOKEN_STAR_STAR_EQUAL: {
20895 switch (PM_NODE_TYPE(node)) {
20896 case PM_BACK_REFERENCE_READ_NODE:
20897 case PM_NUMBERED_REFERENCE_READ_NODE:
20898 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20900 case PM_GLOBAL_VARIABLE_READ_NODE: {
20901 parser_lex(parser);
20902
20903 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20904 pm_node_t *result = UP(pm_global_variable_operator_write_node_create(parser, node, &token, value));
20905
20906 return result;
20907 }
20908 case PM_CLASS_VARIABLE_READ_NODE: {
20909 parser_lex(parser);
20910
20911 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20912 pm_node_t *result = UP(pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
20913
20914 return result;
20915 }
20916 case PM_CONSTANT_PATH_NODE: {
20917 parser_lex(parser);
20918
20919 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20920 pm_node_t *write = UP(pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
20921
20922 return parse_shareable_constant_write(parser, write);
20923 }
20924 case PM_CONSTANT_READ_NODE: {
20925 parser_lex(parser);
20926
20927 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20928 pm_node_t *write = UP(pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
20929
20930 return parse_shareable_constant_write(parser, write);
20931 }
20932 case PM_INSTANCE_VARIABLE_READ_NODE: {
20933 parser_lex(parser);
20934
20935 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20936 pm_node_t *result = UP(pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
20937
20938 return result;
20939 }
20940 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
20941 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
20942 parser_lex(parser);
20943
20944 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20945 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0));
20946
20947 pm_node_unreference(parser, node);
20948 return result;
20949 }
20950 case PM_LOCAL_VARIABLE_READ_NODE: {
20951 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
20952 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(node));
20953 pm_node_unreference(parser, node);
20954 }
20955
20957 parser_lex(parser);
20958
20959 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20960 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth));
20961
20962 return result;
20963 }
20964 case PM_CALL_NODE: {
20965 parser_lex(parser);
20966 pm_call_node_t *cast = (pm_call_node_t *) node;
20967
20968 // If we have a vcall (a method with no arguments and no
20969 // receiver that could have been a local variable) then we
20970 // will transform it into a local variable write.
20971 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20972 pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
20973 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
20974 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20975 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
20976
20977 return result;
20978 }
20979
20980 // If there is no call operator and the message is "[]" then
20981 // this is an aref expression, and we can transform it into
20982 // an aset expression.
20983 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
20984 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20985 return UP(pm_index_operator_write_node_create(parser, cast, &token, value));
20986 }
20987
20988 // If this node cannot be writable, then we have an error.
20989 if (pm_call_node_writable_p(parser, cast)) {
20990 parse_write_name(parser, &cast->name);
20991 } else {
20992 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
20993 }
20994
20995 parse_call_operator_write(parser, cast, &token);
20996 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20997 return UP(pm_call_operator_write_node_create(parser, cast, &token, value));
20998 }
20999 case PM_MULTI_WRITE_NODE: {
21000 parser_lex(parser);
21001 pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
21002 return node;
21003 }
21004 default:
21005 parser_lex(parser);
21006
21007 // In this case we have an operator but we don't know what it's for.
21008 // We need to treat it as an error. For now, we'll mark it as an error
21009 // and just skip right past it.
21010 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
21011 return node;
21012 }
21013 }
21014 case PM_TOKEN_AMPERSAND_AMPERSAND:
21015 case PM_TOKEN_KEYWORD_AND: {
21016 parser_lex(parser);
21017
21018 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21019 return UP(pm_and_node_create(parser, node, &token, right));
21020 }
21021 case PM_TOKEN_KEYWORD_OR:
21022 case PM_TOKEN_PIPE_PIPE: {
21023 parser_lex(parser);
21024
21025 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21026 return UP(pm_or_node_create(parser, node, &token, right));
21027 }
21028 case PM_TOKEN_EQUAL_TILDE: {
21029 // Note that we _must_ parse the value before adding the local
21030 // variables in order to properly mirror the behavior of Ruby. For
21031 // example,
21032 //
21033 // /(?<foo>bar)/ =~ foo
21034 //
21035 // In this case, `foo` should be a method call and not a local yet.
21036 parser_lex(parser);
21037 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21038
21039 // By default, we're going to create a call node and then return it.
21040 pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
21041 pm_node_t *result = UP(call);
21042
21043 // If the receiver of this =~ is a regular expression node, then we
21044 // need to introduce local variables for it based on its named
21045 // capture groups.
21046 if (PM_NODE_TYPE_P(node, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE)) {
21047 // It's possible to have an interpolated regular expression node
21048 // that only contains strings. This is because it can be split
21049 // up by a heredoc. In this case we need to concat the unescaped
21050 // strings together and then parse them as a regular expression.
21052
21053 bool interpolated = false;
21054 size_t total_length = 0;
21055
21056 pm_node_t *part;
21057 PM_NODE_LIST_FOREACH(parts, index, part) {
21058 if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
21059 total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
21060 } else {
21061 interpolated = true;
21062 break;
21063 }
21064 }
21065
21066 if (!interpolated && total_length > 0) {
21067 void *memory = xmalloc(total_length);
21068 if (!memory) abort();
21069
21070 uint8_t *cursor = memory;
21071 PM_NODE_LIST_FOREACH(parts, index, part) {
21072 pm_string_t *unescaped = &((pm_string_node_t *) part)->unescaped;
21073 size_t length = pm_string_length(unescaped);
21074
21075 memcpy(cursor, pm_string_source(unescaped), length);
21076 cursor += length;
21077 }
21078
21079 pm_string_t owned;
21080 pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
21081
21082 result = parse_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21083 pm_string_free(&owned);
21084 }
21085 } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
21086 // If we have a regular expression node, then we can just parse
21087 // the named captures directly off the unescaped string.
21088 const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
21089 result = parse_regular_expression_named_captures(parser, content, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21090 }
21091
21092 return result;
21093 }
21094 case PM_TOKEN_UAMPERSAND:
21095 case PM_TOKEN_USTAR:
21096 case PM_TOKEN_USTAR_STAR:
21097 // The only times this will occur are when we are in an error state,
21098 // but we'll put them in here so that errors can propagate.
21099 case PM_TOKEN_BANG_EQUAL:
21100 case PM_TOKEN_BANG_TILDE:
21101 case PM_TOKEN_EQUAL_EQUAL:
21102 case PM_TOKEN_EQUAL_EQUAL_EQUAL:
21103 case PM_TOKEN_LESS_EQUAL_GREATER:
21104 case PM_TOKEN_CARET:
21105 case PM_TOKEN_PIPE:
21106 case PM_TOKEN_AMPERSAND:
21107 case PM_TOKEN_GREATER_GREATER:
21108 case PM_TOKEN_LESS_LESS:
21109 case PM_TOKEN_MINUS:
21110 case PM_TOKEN_PLUS:
21111 case PM_TOKEN_PERCENT:
21112 case PM_TOKEN_SLASH:
21113 case PM_TOKEN_STAR:
21114 case PM_TOKEN_STAR_STAR: {
21115 parser_lex(parser);
21116 pm_token_t operator = parser->previous;
21117 switch (PM_NODE_TYPE(node)) {
21118 case PM_RESCUE_MODIFIER_NODE: {
21120 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21121 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21122 }
21123 break;
21124 }
21125 case PM_AND_NODE: {
21126 pm_and_node_t *cast = (pm_and_node_t *) node;
21127 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21128 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21129 }
21130 break;
21131 }
21132 case PM_OR_NODE: {
21133 pm_or_node_t *cast = (pm_or_node_t *) node;
21134 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21135 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21136 }
21137 break;
21138 }
21139 default:
21140 break;
21141 }
21142
21143 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21144 return UP(pm_call_node_binary_create(parser, node, &token, argument, 0));
21145 }
21146 case PM_TOKEN_GREATER:
21147 case PM_TOKEN_GREATER_EQUAL:
21148 case PM_TOKEN_LESS:
21149 case PM_TOKEN_LESS_EQUAL: {
21150 if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
21151 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
21152 }
21153
21154 parser_lex(parser);
21155 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21156 return UP(pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON));
21157 }
21158 case PM_TOKEN_AMPERSAND_DOT:
21159 case PM_TOKEN_DOT: {
21160 parser_lex(parser);
21161 pm_token_t operator = parser->previous;
21162 pm_arguments_t arguments = { 0 };
21163
21164 // This if statement handles the foo.() syntax.
21165 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
21166 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21167 return UP(pm_call_node_shorthand_create(parser, node, &operator, &arguments));
21168 }
21169
21170 switch (PM_NODE_TYPE(node)) {
21171 case PM_RESCUE_MODIFIER_NODE: {
21173 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21174 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21175 }
21176 break;
21177 }
21178 case PM_AND_NODE: {
21179 pm_and_node_t *cast = (pm_and_node_t *) node;
21180 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21181 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21182 }
21183 break;
21184 }
21185 case PM_OR_NODE: {
21186 pm_or_node_t *cast = (pm_or_node_t *) node;
21187 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21188 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21189 }
21190 break;
21191 }
21192 default:
21193 break;
21194 }
21195
21196 pm_token_t message;
21197
21198 switch (parser->current.type) {
21199 case PM_CASE_OPERATOR:
21200 case PM_CASE_KEYWORD:
21201 case PM_TOKEN_CONSTANT:
21202 case PM_TOKEN_IDENTIFIER:
21203 case PM_TOKEN_METHOD_NAME: {
21204 parser_lex(parser);
21205 message = parser->previous;
21206 break;
21207 }
21208 default: {
21209 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type));
21210 message = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
21211 }
21212 }
21213
21214 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21215 pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
21216
21217 if (
21218 (previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
21219 arguments.arguments == NULL &&
21220 arguments.opening_loc.length == 0 &&
21221 match1(parser, PM_TOKEN_COMMA)
21222 ) {
21223 return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21224 } else {
21225 return UP(call);
21226 }
21227 }
21228 case PM_TOKEN_DOT_DOT:
21229 case PM_TOKEN_DOT_DOT_DOT: {
21230 parser_lex(parser);
21231
21232 pm_node_t *right = NULL;
21233 if (token_begins_expression_p(parser->current.type)) {
21234 right = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21235 }
21236
21237 return UP(pm_range_node_create(parser, node, &token, right));
21238 }
21239 case PM_TOKEN_KEYWORD_IF_MODIFIER: {
21240 pm_token_t keyword = parser->current;
21241 parser_lex(parser);
21242
21243 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
21244 return UP(pm_if_node_modifier_create(parser, node, &keyword, predicate));
21245 }
21246 case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: {
21247 pm_token_t keyword = parser->current;
21248 parser_lex(parser);
21249
21250 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
21251 return UP(pm_unless_node_modifier_create(parser, node, &keyword, predicate));
21252 }
21253 case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: {
21254 parser_lex(parser);
21255 pm_statements_node_t *statements = pm_statements_node_create(parser);
21256 pm_statements_node_body_append(parser, statements, node, true);
21257
21258 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
21259 return UP(pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
21260 }
21261 case PM_TOKEN_KEYWORD_WHILE_MODIFIER: {
21262 parser_lex(parser);
21263 pm_statements_node_t *statements = pm_statements_node_create(parser);
21264 pm_statements_node_body_append(parser, statements, node, true);
21265
21266 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
21267 return UP(pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
21268 }
21269 case PM_TOKEN_QUESTION_MARK: {
21270 context_push(parser, PM_CONTEXT_TERNARY);
21271 pm_node_list_t current_block_exits = { 0 };
21272 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21273
21274 pm_token_t qmark = parser->current;
21275 parser_lex(parser);
21276
21277 pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
21278
21279 if (parser->recovering) {
21280 // If parsing the true expression of this ternary resulted in a syntax
21281 // error that we can recover from, then we're going to put missing nodes
21282 // and tokens into the remaining places. We want to be sure to do this
21283 // before the `expect` function call to make sure it doesn't
21284 // accidentally move past a ':' token that occurs after the syntax
21285 // error.
21286 pm_token_t colon = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
21287 pm_node_t *false_expression = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &colon), PM_TOKEN_LENGTH(&colon)));
21288
21289 context_pop(parser);
21290 pop_block_exits(parser, previous_block_exits);
21291 return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
21292 }
21293
21294 accept1(parser, PM_TOKEN_NEWLINE);
21295 expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
21296
21297 pm_token_t colon = parser->previous;
21298 pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
21299
21300 context_pop(parser);
21301 pop_block_exits(parser, previous_block_exits);
21302 return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
21303 }
21304 case PM_TOKEN_COLON_COLON: {
21305 parser_lex(parser);
21306 pm_token_t delimiter = parser->previous;
21307
21308 switch (parser->current.type) {
21309 case PM_TOKEN_CONSTANT: {
21310 parser_lex(parser);
21311 pm_node_t *path;
21312
21313 if (
21314 (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
21315 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
21316 ) {
21317 // If we have a constant immediately following a '::' operator, then
21318 // this can either be a constant path or a method call, depending on
21319 // what follows the constant.
21320 //
21321 // If we have parentheses, then this is a method call. That would
21322 // look like Foo::Bar().
21323 pm_token_t message = parser->previous;
21324 pm_arguments_t arguments = { 0 };
21325
21326 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21327 path = UP(pm_call_node_call_create(parser, node, &delimiter, &message, &arguments));
21328 } else {
21329 // Otherwise, this is a constant path. That would look like Foo::Bar.
21330 path = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
21331 }
21332
21333 // If this is followed by a comma then it is a multiple assignment.
21334 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21335 return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21336 }
21337
21338 return path;
21339 }
21340 case PM_CASE_OPERATOR:
21341 case PM_CASE_KEYWORD:
21342 case PM_TOKEN_IDENTIFIER:
21343 case PM_TOKEN_METHOD_NAME: {
21344 parser_lex(parser);
21345 pm_token_t message = parser->previous;
21346
21347 // If we have an identifier following a '::' operator, then it is for
21348 // sure a method call.
21349 pm_arguments_t arguments = { 0 };
21350 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21351 pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21352
21353 // If this is followed by a comma then it is a multiple assignment.
21354 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21355 return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21356 }
21357
21358 return UP(call);
21359 }
21360 case PM_TOKEN_PARENTHESIS_LEFT: {
21361 // If we have a parenthesis following a '::' operator, then it is the
21362 // method call shorthand. That would look like Foo::(bar).
21363 pm_arguments_t arguments = { 0 };
21364 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21365
21366 return UP(pm_call_node_shorthand_create(parser, node, &delimiter, &arguments));
21367 }
21368 default: {
21369 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
21370 return UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
21371 }
21372 }
21373 }
21374 case PM_TOKEN_KEYWORD_RESCUE_MODIFIER: {
21375 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
21376 parser_lex(parser);
21377 accept1(parser, PM_TOKEN_NEWLINE);
21378
21379 pm_node_t *value = parse_expression(parser, binding_power, true, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
21380 context_pop(parser);
21381
21382 return UP(pm_rescue_modifier_node_create(parser, node, &token, value));
21383 }
21384 case PM_TOKEN_BRACKET_LEFT: {
21385 parser_lex(parser);
21386
21387 pm_arguments_t arguments = { 0 };
21388 arguments.opening_loc = TOK2LOC(parser, &parser->previous);
21389
21390 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
21391 pm_accepts_block_stack_push(parser, true);
21392 parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint16_t) (depth + 1));
21393 pm_accepts_block_stack_pop(parser);
21394 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
21395 }
21396
21397 arguments.closing_loc = TOK2LOC(parser, &parser->previous);
21398
21399 // If we have a comma after the closing bracket then this is a multiple
21400 // assignment and we should parse the targets.
21401 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21402 pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
21403 return parse_targets_validate(parser, UP(aref), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21404 }
21405
21406 // If we're at the end of the arguments, we can now check if there is a
21407 // block node that starts with a {. If there is, then we can parse it and
21408 // add it to the arguments.
21409 pm_block_node_t *block = NULL;
21410 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
21411 block = parse_block(parser, (uint16_t) (depth + 1));
21412 pm_arguments_validate_block(parser, &arguments, block);
21413 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
21414 block = parse_block(parser, (uint16_t) (depth + 1));
21415 }
21416
21417 if (block != NULL) {
21418 if (arguments.block != NULL) {
21419 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_AFTER_BLOCK);
21420 if (arguments.arguments == NULL) {
21421 arguments.arguments = pm_arguments_node_create(parser);
21422 }
21423 pm_arguments_node_arguments_append(parser->arena, arguments.arguments, arguments.block);
21424 }
21425
21426 arguments.block = UP(block);
21427 }
21428
21429 return UP(pm_call_node_aref_create(parser, node, &arguments));
21430 }
21431 case PM_TOKEN_KEYWORD_IN: {
21432 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21433 parser->pattern_matching_newlines = true;
21434
21435 pm_token_t operator = parser->current;
21436 parser->command_start = false;
21437 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21438 parser_lex(parser);
21439
21440 pm_constant_id_list_t captures = { 0 };
21441 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
21442
21443 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21444
21445 return UP(pm_match_predicate_node_create(parser, node, pattern, &operator));
21446 }
21447 case PM_TOKEN_EQUAL_GREATER: {
21448 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21449 parser->pattern_matching_newlines = true;
21450
21451 pm_token_t operator = parser->current;
21452 parser->command_start = false;
21453 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21454 parser_lex(parser);
21455
21456 pm_constant_id_list_t captures = { 0 };
21457 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
21458
21459 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21460
21461 return UP(pm_match_required_node_create(parser, node, pattern, &operator));
21462 }
21463 default:
21464 assert(false && "unreachable");
21465 return NULL;
21466 }
21467}
21468
21469#undef PM_PARSE_PATTERN_SINGLE
21470#undef PM_PARSE_PATTERN_TOP
21471#undef PM_PARSE_PATTERN_MULTI
21472
21477static inline bool
21478pm_call_node_command_p(const pm_call_node_t *node) {
21479 return (
21480 (node->opening_loc.length == 0) &&
21481 (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
21482 (node->arguments != NULL || node->block != NULL)
21483 );
21484}
21485
21494static pm_node_t *
21495parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
21496 if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
21497 pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
21498 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
21499 }
21500
21501 pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
21502
21503 switch (PM_NODE_TYPE(node)) {
21504 case PM_MISSING_NODE:
21505 // If we found a syntax error, then the type of node returned by
21506 // parse_expression_prefix is going to be a missing node.
21507 return node;
21508 case PM_PRE_EXECUTION_NODE:
21509 case PM_POST_EXECUTION_NODE:
21510 case PM_ALIAS_GLOBAL_VARIABLE_NODE:
21511 case PM_ALIAS_METHOD_NODE:
21512 case PM_MULTI_WRITE_NODE:
21513 case PM_UNDEF_NODE:
21514 // These expressions are statements, and cannot be followed by
21515 // operators (except modifiers).
21516 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21517 return node;
21518 }
21519 break;
21520 case PM_CALL_NODE:
21521 // If we have a call node, then we need to check if it looks like a
21522 // method call without parentheses that contains arguments. If it
21523 // does, then it has different rules for parsing infix operators,
21524 // namely that it only accepts composition (and/or) and modifiers
21525 // (if/unless/etc.).
21526 if ((pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_COMPOSITION) && pm_call_node_command_p((pm_call_node_t *) node)) {
21527 return node;
21528 }
21529 break;
21530 case PM_SYMBOL_NODE:
21531 // If we have a symbol node that is being parsed as a label, then we
21532 // need to immediately return, because there should never be an
21533 // infix operator following this node.
21534 if (pm_symbol_node_label_p(parser, node)) {
21535 return node;
21536 }
21537 break;
21538 default:
21539 break;
21540 }
21541
21542 // Otherwise we'll look and see if the next token can be parsed as an infix
21543 // operator. If it can, then we'll parse it using parse_expression_infix.
21544 pm_binding_powers_t current_binding_powers;
21545 pm_token_type_t current_token_type;
21546
21547 while (
21548 current_token_type = parser->current.type,
21549 current_binding_powers = pm_binding_powers[current_token_type],
21550 binding_power <= current_binding_powers.left &&
21551 current_binding_powers.binary
21552 ) {
21553 node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1));
21554
21555 switch (PM_NODE_TYPE(node)) {
21556 case PM_MULTI_WRITE_NODE:
21557 // Multi-write nodes are statements, and cannot be followed by
21558 // operators except modifiers.
21559 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21560 return node;
21561 }
21562 break;
21563 case PM_CLASS_VARIABLE_WRITE_NODE:
21564 case PM_CONSTANT_PATH_WRITE_NODE:
21565 case PM_CONSTANT_WRITE_NODE:
21566 case PM_GLOBAL_VARIABLE_WRITE_NODE:
21567 case PM_INSTANCE_VARIABLE_WRITE_NODE:
21568 case PM_LOCAL_VARIABLE_WRITE_NODE:
21569 // These expressions are statements, by virtue of the right-hand
21570 // side of their write being an implicit array.
21571 if (PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21572 return node;
21573 }
21574 break;
21575 case PM_CALL_NODE:
21576 // These expressions are also statements, by virtue of the
21577 // right-hand side of the expression (i.e., the last argument to
21578 // the call node) being an implicit array.
21579 if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21580 return node;
21581 }
21582 break;
21583 case PM_RESCUE_MODIFIER_NODE:
21584 // A rescue modifier whose handler is a one-liner pattern match
21585 // (=> or in) produces a statement. That means it cannot be
21586 // extended by operators above the modifier level.
21587 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21589 pm_node_t *rescue_expression = cast->rescue_expression;
21590
21591 if (PM_NODE_TYPE_P(rescue_expression, PM_MATCH_REQUIRED_NODE) || PM_NODE_TYPE_P(rescue_expression, PM_MATCH_PREDICATE_NODE)) {
21592 return node;
21593 }
21594 }
21595 break;
21596 default:
21597 break;
21598 }
21599
21600 // If the operator is nonassoc and we should not be able to parse the
21601 // upcoming infix operator, break.
21602 if (current_binding_powers.nonassoc) {
21603 // If this is a non-assoc operator and we are about to parse the
21604 // exact same operator, then we need to add an error.
21605 if (match1(parser, current_token_type)) {
21606 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
21607 break;
21608 }
21609
21610 // If this is an endless range, then we need to reject a couple of
21611 // additional operators because it violates the normal operator
21612 // precedence rules. Those patterns are:
21613 //
21614 // 1.. & 2
21615 // 1.. * 2
21616 //
21617 if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
21618 if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) {
21619 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
21620 break;
21621 }
21622
21623 if (PM_BINDING_POWER_TERM <= pm_binding_powers[parser->current.type].left) {
21624 break;
21625 }
21626 } else if (current_binding_powers.left <= pm_binding_powers[parser->current.type].left) {
21627 break;
21628 }
21629 }
21630
21631 if (accepts_command_call) {
21632 // A command-style method call is only accepted on method chains.
21633 // Thus, we check whether the parsed node can continue method chains.
21634 // The method chain can continue if the parsed node is one of the following five kinds:
21635 // (1) index access: foo[1]
21636 // (2) attribute access: foo.bar
21637 // (3) method call with parenthesis: foo.bar(1)
21638 // (4) method call with a block: foo.bar do end
21639 // (5) constant path: foo::Bar
21640 switch (node->type) {
21641 case PM_CALL_NODE: {
21642 pm_call_node_t *cast = (pm_call_node_t *)node;
21643 if (
21644 // (1) foo[1]
21645 !(
21646 cast->call_operator_loc.length == 0 &&
21647 cast->message_loc.length > 0 &&
21648 parser->start[cast->message_loc.start] == '[' &&
21649 parser->start[cast->message_loc.start + cast->message_loc.length - 1] == ']'
21650 ) &&
21651 // (2) foo.bar
21652 !(
21653 cast->call_operator_loc.length > 0 &&
21654 cast->arguments == NULL &&
21655 cast->block == NULL &&
21656 cast->opening_loc.length == 0
21657 ) &&
21658 // (3) foo.bar(1)
21659 !(
21660 cast->call_operator_loc.length > 0 &&
21661 cast->opening_loc.length > 0
21662 ) &&
21663 // (4) foo.bar do end
21664 !(
21665 cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
21666 )
21667 ) {
21668 accepts_command_call = false;
21669 }
21670 break;
21671 }
21672 // (5) foo::Bar
21673 case PM_CONSTANT_PATH_NODE:
21674 break;
21675 default:
21676 accepts_command_call = false;
21677 break;
21678 }
21679 }
21680
21681 if (context_terminator(parser->current_context->context, &parser->current)) {
21682 pm_binding_powers_t next_binding_powers = pm_binding_powers[parser->current.type];
21683 if (
21684 !next_binding_powers.binary ||
21685 binding_power > next_binding_powers.left ||
21686 (PM_NODE_TYPE_P(node, PM_CALL_NODE) && pm_call_node_command_p((pm_call_node_t *) node))
21687 ) {
21688 return node;
21689 }
21690 }
21691 }
21692
21693 return node;
21694}
21695
21700static pm_statements_node_t *
21701wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
21702 if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
21703 if (statements == NULL) {
21704 statements = pm_statements_node_create(parser);
21705 }
21706
21707 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21708 pm_arguments_node_arguments_append(
21709 parser->arena,
21710 arguments,
21711 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2)))
21712 );
21713
21714 pm_statements_node_body_append(parser, statements, UP(pm_call_node_fcall_synthesized_create(
21715 parser,
21716 arguments,
21717 pm_parser_constant_id_constant(parser, "print", 5)
21718 )), true);
21719 }
21720
21721 if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
21722 if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
21723 if (statements == NULL) {
21724 statements = pm_statements_node_create(parser);
21725 }
21726
21727 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21728 pm_arguments_node_arguments_append(
21729 parser->arena,
21730 arguments,
21731 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2)))
21732 );
21733
21734 pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
21735 pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, UP(receiver), "split", arguments);
21736
21737 pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
21738 parser,
21739 pm_parser_constant_id_constant(parser, "$F", 2),
21740 UP(call)
21741 );
21742
21743 pm_statements_node_body_prepend(parser->arena, statements, UP(write));
21744 }
21745
21746 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21747 pm_arguments_node_arguments_append(
21748 parser->arena,
21749 arguments,
21750 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2)))
21751 );
21752
21753 if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
21754 pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
21755 pm_keyword_hash_node_elements_append(parser->arena, keywords, UP(pm_assoc_node_create(
21756 parser,
21757 UP(pm_symbol_node_synthesized_create(parser, "chomp")),
21758 NULL,
21759 UP(pm_true_node_synthesized_create(parser))
21760 )));
21761
21762 pm_arguments_node_arguments_append(parser->arena, arguments, UP(keywords));
21763 pm_node_flag_set(UP(arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
21764 }
21765
21766 pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
21767 pm_statements_node_body_append(parser, wrapped_statements, UP(pm_while_node_synthesized_create(
21768 parser,
21769 UP(pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4))),
21770 statements
21771 )), true);
21772
21773 statements = wrapped_statements;
21774 }
21775
21776 return statements;
21777}
21778
21782static pm_node_t *
21783parse_program(pm_parser_t *parser) {
21784 // If the current scope is NULL, then we want to push a new top level scope.
21785 // The current scope could exist in the event that we are parsing an eval
21786 // and the user has passed into scopes that already exist.
21787 if (parser->current_scope == NULL) {
21788 pm_parser_scope_push(parser, true);
21789 }
21790
21791 pm_node_list_t current_block_exits = { 0 };
21792 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21793
21794 parser_lex(parser);
21795 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
21796
21797 if (statements != NULL && !parser->parsing_eval) {
21798 // If we have statements, then the top-level statement should be
21799 // explicitly checked as well. We have to do this here because
21800 // everywhere else we check all but the last statement.
21801 assert(statements->body.size > 0);
21802 pm_void_statement_check(parser, statements->body.nodes[statements->body.size - 1]);
21803 }
21804
21805 pm_constant_id_list_t locals;
21806 pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
21807 pm_parser_scope_pop(parser);
21808
21809 // At the top level, see if we need to wrap the statements in a program
21810 // node with a while loop based on the options.
21812 statements = wrap_statements(parser, statements);
21813 } else {
21814 flush_block_exits(parser, previous_block_exits);
21815 }
21816
21817 // If this is an empty file, then we're still going to parse all of the
21818 // statements in order to gather up all of the comments and such. Here we'll
21819 // correct the location information.
21820 if (statements == NULL) {
21821 statements = pm_statements_node_create(parser);
21822 statements->base.location = (pm_location_t) { 0 };
21823 }
21824
21825 return UP(pm_program_node_create(parser, &locals, statements));
21826}
21827
21828/******************************************************************************/
21829/* External functions */
21830/******************************************************************************/
21831
21841static const char *
21842pm_strnstr(const char *big, const char *little, size_t big_length) {
21843 size_t little_length = strlen(little);
21844
21845 for (const char *max = big + big_length - little_length; big <= max; big++) {
21846 if (*big == *little && memcmp(big, little, little_length) == 0) return big;
21847 }
21848
21849 return NULL;
21850}
21851
21852#ifdef _WIN32
21853#define pm_parser_warn_shebang_carriage_return(parser, start, length) ((void) 0)
21854#else
21860static void
21861pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
21862 if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
21863 pm_parser_warn(parser, U32(start - parser->start), U32(length), PM_WARN_SHEBANG_CARRIAGE_RETURN);
21864 }
21865}
21866#endif
21867
21872static void
21873pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const char *engine, size_t length) {
21874 const char *switches = pm_strnstr(engine, " -", length);
21875 if (switches == NULL) return;
21876
21877 pm_options_t next_options = *options;
21878 options->shebang_callback(
21879 &next_options,
21880 (const uint8_t *) (switches + 1),
21881 length - ((size_t) (switches - engine)) - 1,
21882 options->shebang_callback_data
21883 );
21884
21885 size_t encoding_length;
21886 if ((encoding_length = pm_string_length(&next_options.encoding)) > 0) {
21887 const uint8_t *encoding_source = pm_string_source(&next_options.encoding);
21888 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
21889 }
21890
21891 parser->command_line = next_options.command_line;
21892 parser->frozen_string_literal = next_options.frozen_string_literal;
21893}
21894
21899pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
21900 assert(arena != NULL);
21901 assert(source != NULL);
21902
21903 *parser = (pm_parser_t) {
21904 .arena = arena,
21905 .node_id = 0,
21906 .lex_state = PM_LEX_STATE_BEG,
21907 .enclosure_nesting = 0,
21908 .lambda_enclosure_nesting = -1,
21909 .brace_nesting = 0,
21910 .do_loop_stack = 0,
21911 .accepts_block_stack = 0,
21912 .lex_modes = {
21913 .index = 0,
21914 .stack = {{ .mode = PM_LEX_DEFAULT }},
21915 .current = &parser->lex_modes.stack[0],
21916 },
21917 .start = source,
21918 .end = source + size,
21919 .previous = { .type = PM_TOKEN_EOF, .start = source, .end = source },
21920 .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
21921 .next_start = NULL,
21922 .heredoc_end = NULL,
21923 .data_loc = { 0 },
21924 .comment_list = { 0 },
21925 .magic_comment_list = { 0 },
21926 .warning_list = { 0 },
21927 .error_list = { 0 },
21928 .current_scope = NULL,
21929 .current_context = NULL,
21930 .encoding = PM_ENCODING_UTF_8_ENTRY,
21931 .encoding_changed_callback = NULL,
21932 .encoding_comment_start = source,
21933 .lex_callback = NULL,
21934 .filepath = { 0 },
21935 .constant_pool = { 0 },
21936 .line_offsets = { 0 },
21937 .integer_base = 0,
21938 .current_string = PM_STRING_EMPTY,
21939 .start_line = 1,
21940 .explicit_encoding = NULL,
21941 .command_line = 0,
21942 .parsing_eval = false,
21943 .partial_script = false,
21944 .command_start = true,
21945 .recovering = false,
21946 .encoding_locked = false,
21947 .encoding_changed = false,
21948 .pattern_matching_newlines = false,
21949 .in_keyword_arg = false,
21950 .current_block_exits = NULL,
21951 .semantic_token_seen = false,
21952 .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
21953 .current_regular_expression_ascii_only = false,
21954 .warn_mismatched_indentation = true
21955 };
21956
21957 // Initialize the constant pool. We're going to completely guess as to the
21958 // number of constants that we'll need based on the size of the input. The
21959 // ratio we chose here is actually less arbitrary than you might think.
21960 //
21961 // We took ~50K Ruby files and measured the size of the file versus the
21962 // number of constants that were found in those files. Then we found the
21963 // average and standard deviation of the ratios of constants/bytesize. Then
21964 // we added 1.34 standard deviations to the average to get a ratio that
21965 // would fit 75% of the files (for a two-tailed distribution). This works
21966 // because there was about a 0.77 correlation and the distribution was
21967 // roughly normal.
21968 //
21969 // This ratio will need to change if we add more constants to the constant
21970 // pool for another node type.
21971 uint32_t constant_size = ((uint32_t) size) / 95;
21972 pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size);
21973
21974 // Initialize the newline list. Similar to the constant pool, we're going to
21975 // guess at the number of newlines that we'll need based on the size of the
21976 // input.
21977 size_t newline_size = size / 22;
21978 pm_line_offset_list_init(&parser->line_offsets, newline_size < 4 ? 4 : newline_size);
21979
21980 // If options were provided to this parse, establish them here.
21981 if (options != NULL) {
21982 // filepath option
21983 parser->filepath = options->filepath;
21984
21985 // line option
21986 parser->start_line = options->line;
21987
21988 // encoding option
21989 size_t encoding_length = pm_string_length(&options->encoding);
21990 if (encoding_length > 0) {
21991 const uint8_t *encoding_source = pm_string_source(&options->encoding);
21992 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
21993 }
21994
21995 // encoding_locked option
21996 parser->encoding_locked = options->encoding_locked;
21997
21998 // frozen_string_literal option
22000
22001 // command_line option
22002 parser->command_line = options->command_line;
22003
22004 // version option
22005 parser->version = options->version;
22006
22007 // partial_script
22008 parser->partial_script = options->partial_script;
22009
22010 // scopes option
22011 parser->parsing_eval = options->scopes_count > 0;
22012 if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
22013
22014 for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
22015 const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index);
22016 pm_parser_scope_push(parser, scope_index == 0);
22017
22018 // Scopes given from the outside are not allowed to have numbered
22019 // parameters.
22020 parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
22021
22022 for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
22023 const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
22024
22025 const uint8_t *source = pm_string_source(local);
22026 size_t length = pm_string_length(local);
22027
22028 void *allocated = xmalloc(length);
22029 if (allocated == NULL) continue;
22030
22031 memcpy(allocated, source, length);
22032 pm_parser_local_add_owned(parser, (uint8_t *) allocated, length);
22033 }
22034 }
22035 }
22036
22037 // Now that we have established the user-provided options, check if
22038 // a version was given and parse as the latest version otherwise.
22039 if (parser->version == PM_OPTIONS_VERSION_UNSET) {
22041 }
22042
22043 pm_accepts_block_stack_push(parser, true);
22044
22045 // Skip past the UTF-8 BOM if it exists.
22046 if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
22047 parser->current.end += 3;
22048 parser->encoding_comment_start += 3;
22049
22050 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
22052 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
22053 }
22054 }
22055
22056 // If the -x command line flag is set, or the first shebang of the file does
22057 // not include "ruby", then we'll search for a shebang that does include
22058 // "ruby" and start parsing from there.
22059 bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
22060
22061 // If the first two bytes of the source are a shebang, then we will do a bit
22062 // of extra processing.
22063 //
22064 // First, we'll indicate that the encoding comment is at the end of the
22065 // shebang. This means that when a shebang is present the encoding comment
22066 // can begin on the second line.
22067 //
22068 // Second, we will check if the shebang includes "ruby". If it does, then we
22069 // we will start parsing from there. We will also potentially warning the
22070 // user if there is a carriage return at the end of the shebang. We will
22071 // also potentially call the shebang callback if this is the main script to
22072 // allow the caller to parse the shebang and find any command-line options.
22073 // If the shebang does not include "ruby" and this is the main script being
22074 // parsed, then we will start searching the file for a shebang that does
22075 // contain "ruby" as if -x were passed on the command line.
22076 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
22077 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->current.end);
22078
22079 if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
22080 const char *engine;
22081
22082 if ((engine = pm_strnstr((const char *) parser->start, "ruby", length)) != NULL) {
22083 if (newline != NULL) {
22084 parser->encoding_comment_start = newline + 1;
22085
22086 if (options == NULL || options->main_script) {
22087 pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1);
22088 }
22089 }
22090
22091 if (options != NULL && options->main_script && options->shebang_callback != NULL) {
22092 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) parser->start)));
22093 }
22094
22095 search_shebang = false;
22096 } else if (options != NULL && options->main_script && !parser->parsing_eval) {
22097 search_shebang = true;
22098 }
22099 }
22100
22101 // Here we're going to find the first shebang that includes "ruby" and start
22102 // parsing from there.
22103 if (search_shebang) {
22104 // If a shebang that includes "ruby" is not found, then we're going to a
22105 // a load error to the list of errors on the parser.
22106 bool found_shebang = false;
22107
22108 // This is going to point to the start of each line as we check it.
22109 // We'll maintain a moving window looking at each line at they come.
22110 const uint8_t *cursor = parser->start;
22111
22112 // The newline pointer points to the end of the current line that we're
22113 // considering. If it is NULL, then we're at the end of the file.
22114 const uint8_t *newline = next_newline(cursor, parser->end - cursor);
22115
22116 while (newline != NULL) {
22117 pm_line_offset_list_append(&parser->line_offsets, U32(newline - parser->start + 1));
22118
22119 cursor = newline + 1;
22120 newline = next_newline(cursor, parser->end - cursor);
22121
22122 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - cursor);
22123 if (length > 2 && cursor[0] == '#' && cursor[1] == '!') {
22124 const char *engine;
22125 if ((engine = pm_strnstr((const char *) cursor, "ruby", length)) != NULL) {
22126 found_shebang = true;
22127
22128 if (newline != NULL) {
22129 pm_parser_warn_shebang_carriage_return(parser, cursor, length + 1);
22130 parser->encoding_comment_start = newline + 1;
22131 }
22132
22133 if (options != NULL && options->shebang_callback != NULL) {
22134 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) cursor)));
22135 }
22136
22137 break;
22138 }
22139 }
22140 }
22141
22142 if (found_shebang) {
22143 parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22144 parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22145 } else {
22146 pm_parser_err(parser, 0, 0, PM_ERR_SCRIPT_NOT_FOUND);
22147 pm_line_offset_list_clear(&parser->line_offsets);
22148 }
22149 }
22150
22151 // The encoding comment can start after any amount of inline whitespace, so
22152 // here we'll advance it to the first non-inline-whitespace character so
22153 // that it is ready for future comparisons.
22154 parser->encoding_comment_start += pm_strspn_inline_whitespace(parser->encoding_comment_start, parser->end - parser->encoding_comment_start);
22155}
22156
22165
22169static inline void
22170pm_comment_list_free(pm_list_t *list) {
22171 pm_list_node_t *node, *next;
22172
22173 for (node = list->head; node != NULL; node = next) {
22174 next = node->next;
22175
22176 pm_comment_t *comment = (pm_comment_t *) node;
22177 xfree_sized(comment, sizeof(pm_comment_t));
22178 }
22179}
22180
22184static inline void
22185pm_magic_comment_list_free(pm_list_t *list) {
22186 pm_list_node_t *node, *next;
22187
22188 for (node = list->head; node != NULL; node = next) {
22189 next = node->next;
22190
22192 xfree_sized(magic_comment, sizeof(pm_magic_comment_t));
22193 }
22194}
22195
22201 pm_string_free(&parser->filepath);
22202 pm_diagnostic_list_free(&parser->error_list);
22203 pm_diagnostic_list_free(&parser->warning_list);
22204 pm_comment_list_free(&parser->comment_list);
22205 pm_magic_comment_list_free(&parser->magic_comment_list);
22206 pm_constant_pool_free(&parser->constant_pool);
22207 pm_line_offset_list_free(&parser->line_offsets);
22208
22209 while (parser->current_scope != NULL) {
22210 // Normally, popping the scope doesn't free the locals since it is
22211 // assumed that ownership has transferred to the AST. However if we have
22212 // scopes while we're freeing the parser, it's likely they came from
22213 // eval scopes and we need to free them explicitly here.
22214 pm_parser_scope_pop(parser);
22215 }
22216
22217 while (parser->lex_modes.index >= PM_LEX_STACK_SIZE) {
22218 lex_mode_pop(parser);
22219 }
22220}
22221
22227 return parse_program(parser);
22228}
22229
22235static bool
22236pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof) {
22237#define LINE_SIZE 4096
22238 char line[LINE_SIZE];
22239
22240 while (memset(line, '\n', LINE_SIZE), stream_fgets(line, LINE_SIZE, stream) != NULL) {
22241 size_t length = LINE_SIZE;
22242 while (length > 0 && line[length - 1] == '\n') length--;
22243
22244 if (length == LINE_SIZE) {
22245 // If we read a line that is the maximum size and it doesn't end
22246 // with a newline, then we'll just append it to the buffer and
22247 // continue reading.
22248 length--;
22249 pm_buffer_append_string(buffer, line, length);
22250 continue;
22251 }
22252
22253 // Append the line to the buffer.
22254 length--;
22255 pm_buffer_append_string(buffer, line, length);
22256
22257 // Check if the line matches the __END__ marker. If it does, then stop
22258 // reading and return false. In most circumstances, this means we should
22259 // stop reading from the stream so that the DATA constant can pick it
22260 // up.
22261 switch (length) {
22262 case 7:
22263 if (strncmp(line, "__END__", 7) == 0) return false;
22264 break;
22265 case 8:
22266 if (strncmp(line, "__END__\n", 8) == 0) return false;
22267 break;
22268 case 9:
22269 if (strncmp(line, "__END__\r\n", 9) == 0) return false;
22270 break;
22271 }
22272
22273 // All data should be read via gets. If the string returned by gets
22274 // _doesn't_ end with a newline, then we assume we hit EOF condition.
22275 if (stream_feof(stream)) {
22276 break;
22277 }
22278 }
22279
22280 return true;
22281#undef LINE_SIZE
22282}
22283
22291pm_parse_stream(pm_arena_t *arena, pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) {
22292 pm_buffer_init(buffer);
22293
22294 bool eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
22295
22296 pm_parser_init(arena, parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22297 pm_node_t *node = pm_parse(parser);
22298
22299 while (!eof && parser->error_list.size > 0) {
22300 eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
22301
22302 pm_parser_free(parser);
22303 pm_arena_free(arena);
22304 pm_parser_init(arena, parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22305 node = pm_parse(parser);
22306 }
22307
22308 return node;
22309}
22310
22315pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
22316 pm_options_t options = { 0 };
22317 pm_options_read(&options, data);
22318
22319 pm_arena_t arena = { 0 };
22320 pm_parser_t parser;
22321 pm_parser_init(&arena, &parser, source, size, &options);
22322
22323 pm_parse(&parser);
22324
22325 bool result = parser.error_list.size == 0;
22326 pm_parser_free(&parser);
22327 pm_arena_free(&arena);
22328 pm_options_free(&options);
22329
22330 return result;
22331}
22332
22333#undef PM_CASE_KEYWORD
22334#undef PM_CASE_OPERATOR
22335#undef PM_CASE_WRITABLE
22336#undef PM_STRING_EMPTY
22337
22338// We optionally support serializing to a binary string. For systems that don't
22339// want or need this functionality, it can be turned off with the
22340// PRISM_EXCLUDE_SERIALIZATION define.
22341#ifndef PRISM_EXCLUDE_SERIALIZATION
22342
22343static inline void
22344pm_serialize_header(pm_buffer_t *buffer) {
22345 pm_buffer_append_string(buffer, "PRISM", 5);
22346 pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
22347 pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR);
22348 pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH);
22349 pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
22350}
22351
22356pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
22357 pm_serialize_header(buffer);
22358 pm_serialize_content(parser, node, buffer);
22359 pm_buffer_append_byte(buffer, '\0');
22360}
22361
22367pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22368 pm_options_t options = { 0 };
22369 pm_options_read(&options, data);
22370
22371 pm_arena_t arena = { 0 };
22372 pm_parser_t parser;
22373 pm_parser_init(&arena, &parser, source, size, &options);
22374
22375 pm_node_t *node = pm_parse(&parser);
22376
22377 pm_serialize_header(buffer);
22378 pm_serialize_content(&parser, node, buffer);
22379 pm_buffer_append_byte(buffer, '\0');
22380
22381 pm_parser_free(&parser);
22382 pm_arena_free(&arena);
22383 pm_options_free(&options);
22384}
22385
22391pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data) {
22392 pm_arena_t arena = { 0 };
22393 pm_parser_t parser;
22394 pm_options_t options = { 0 };
22395 pm_options_read(&options, data);
22396
22397 pm_buffer_t parser_buffer;
22398 pm_node_t *node = pm_parse_stream(&arena, &parser, &parser_buffer, stream, stream_fgets, stream_feof, &options);
22399 pm_serialize_header(buffer);
22400 pm_serialize_content(&parser, node, buffer);
22401 pm_buffer_append_byte(buffer, '\0');
22402
22403 pm_buffer_free(&parser_buffer);
22404 pm_parser_free(&parser);
22405 pm_arena_free(&arena);
22406 pm_options_free(&options);
22407}
22408
22413pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22414 pm_options_t options = { 0 };
22415 pm_options_read(&options, data);
22416
22417 pm_arena_t arena = { 0 };
22418 pm_parser_t parser;
22419 pm_parser_init(&arena, &parser, source, size, &options);
22420
22421 pm_parse(&parser);
22422 pm_serialize_header(buffer);
22423 pm_serialize_encoding(parser.encoding, buffer);
22424 pm_buffer_append_varsint(buffer, parser.start_line);
22425 pm_serialize_comment_list(&parser.comment_list, buffer);
22426
22427 pm_parser_free(&parser);
22428 pm_arena_free(&arena);
22429 pm_options_free(&options);
22430}
22431
22432#endif
22433
22434/******************************************************************************/
22435/* Slice queries for the Ruby API */
22436/******************************************************************************/
22437
22439typedef enum {
22441 PM_SLICE_TYPE_ERROR = -1,
22442
22444 PM_SLICE_TYPE_NONE,
22445
22447 PM_SLICE_TYPE_LOCAL,
22448
22450 PM_SLICE_TYPE_CONSTANT,
22451
22453 PM_SLICE_TYPE_METHOD_NAME
22454} pm_slice_type_t;
22455
22459pm_slice_type_t
22460pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
22461 // first, get the right encoding object
22462 const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
22463 if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
22464
22465 // check that there is at least one character
22466 if (length == 0) return PM_SLICE_TYPE_NONE;
22467
22468 size_t width;
22469 if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
22470 // valid because alphabetical
22471 } else if (*source == '_') {
22472 // valid because underscore
22473 width = 1;
22474 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
22475 // valid because multibyte
22476 } else {
22477 // invalid because no match
22478 return PM_SLICE_TYPE_NONE;
22479 }
22480
22481 // determine the type of the slice based on the first character
22482 const uint8_t *end = source + length;
22483 pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
22484
22485 // next, iterate through all of the bytes of the string to ensure that they
22486 // are all valid identifier characters
22487 source += width;
22488
22489 while (source < end) {
22490 if ((width = encoding->alnum_char(source, end - source)) != 0) {
22491 // valid because alphanumeric
22492 source += width;
22493 } else if (*source == '_') {
22494 // valid because underscore
22495 source++;
22496 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
22497 // valid because multibyte
22498 source += width;
22499 } else {
22500 // invalid because no match
22501 break;
22502 }
22503 }
22504
22505 // accept a ! or ? at the end of the slice as a method name
22506 if (*source == '!' || *source == '?' || *source == '=') {
22507 source++;
22508 result = PM_SLICE_TYPE_METHOD_NAME;
22509 }
22510
22511 // valid if we are at the end of the slice
22512 return source == end ? result : PM_SLICE_TYPE_NONE;
22513}
22514
22519pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
22520 switch (pm_slice_type(source, length, encoding_name)) {
22521 case PM_SLICE_TYPE_ERROR:
22522 return PM_STRING_QUERY_ERROR;
22523 case PM_SLICE_TYPE_NONE:
22524 case PM_SLICE_TYPE_CONSTANT:
22525 case PM_SLICE_TYPE_METHOD_NAME:
22526 return PM_STRING_QUERY_FALSE;
22527 case PM_SLICE_TYPE_LOCAL:
22528 return PM_STRING_QUERY_TRUE;
22529 }
22530
22531 assert(false && "unreachable");
22532 return PM_STRING_QUERY_FALSE;
22533}
22534
22539pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
22540 switch (pm_slice_type(source, length, encoding_name)) {
22541 case PM_SLICE_TYPE_ERROR:
22542 return PM_STRING_QUERY_ERROR;
22543 case PM_SLICE_TYPE_NONE:
22544 case PM_SLICE_TYPE_LOCAL:
22545 case PM_SLICE_TYPE_METHOD_NAME:
22546 return PM_STRING_QUERY_FALSE;
22547 case PM_SLICE_TYPE_CONSTANT:
22548 return PM_STRING_QUERY_TRUE;
22549 }
22550
22551 assert(false && "unreachable");
22552 return PM_STRING_QUERY_FALSE;
22553}
22554
22559pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
22560#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
22561#define C1(c) (*source == c)
22562#define C2(s) (memcmp(source, s, 2) == 0)
22563#define C3(s) (memcmp(source, s, 3) == 0)
22564
22565 switch (pm_slice_type(source, length, encoding_name)) {
22566 case PM_SLICE_TYPE_ERROR:
22567 return PM_STRING_QUERY_ERROR;
22568 case PM_SLICE_TYPE_NONE:
22569 break;
22570 case PM_SLICE_TYPE_LOCAL:
22571 // numbered parameters are not valid method names
22572 return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
22573 case PM_SLICE_TYPE_CONSTANT:
22574 // all constants are valid method names
22575 case PM_SLICE_TYPE_METHOD_NAME:
22576 // all method names are valid method names
22577 return PM_STRING_QUERY_TRUE;
22578 }
22579
22580 switch (length) {
22581 case 1:
22582 return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
22583 case 2:
22584 return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
22585 case 3:
22586 return B(C3("===") || C3("<=>") || C3("[]="));
22587 default:
22588 return PM_STRING_QUERY_FALSE;
22589 }
22590
22591#undef B
22592#undef C1
22593#undef C2
22594#undef C3
22595}
pm_diagnostic_id_t
The diagnostic IDs of all of the diagnostics, used to communicate the types of errors between the par...
Definition diagnostic.h:29
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
#define xcalloc
Old name of ruby_xcalloc.
Definition xmalloc.h:55
int len
Length of the buffer.
Definition io.h:8
VALUE type(ANYARGS)
ANYARGS-ed function type.
PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options)
Free the internal memory associated with the options.
Definition options.c:218
PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index)
Return a pointer to the local at the given index within the given scope.
Definition options.c:202
PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm_options_t *options, size_t index)
Return a pointer to the scope at the given index within the given options.
Definition options.c:182
static const uint8_t PM_OPTIONS_COMMAND_LINE_N
A bit representing whether or not the command line -n option was set.
Definition options.h:228
#define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
String literals should be made frozen.
Definition options.h:20
#define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
String literals should be made mutable.
Definition options.h:31
#define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
String literals may be frozen or mutable depending on the implementation default.
Definition options.h:26
static const uint8_t PM_OPTIONS_COMMAND_LINE_P
A bit representing whether or not the command line -p option was set.
Definition options.h:234
@ PM_OPTIONS_VERSION_CRUBY_3_3
The vendored version of prism in CRuby 3.3.x.
Definition options.h:92
@ PM_OPTIONS_VERSION_LATEST
The current version of prism.
Definition options.h:107
@ PM_OPTIONS_VERSION_CRUBY_4_1
The vendored version of prism in CRuby 4.1.x.
Definition options.h:104
@ PM_OPTIONS_VERSION_UNSET
If an explicit version is not provided, the current version of prism will be used.
Definition options.h:89
@ PM_OPTIONS_VERSION_CRUBY_3_4
The vendored version of prism in CRuby 3.4.x.
Definition options.h:95
@ PM_OPTIONS_VERSION_CRUBY_4_0
The vendored version of prism in CRuby 4.0.x.
Definition options.h:101
pm_heredoc_indent_t
The type of indentation that a heredoc uses.
Definition parser.h:80
struct pm_context_node pm_context_node_t
This is a node in a linked list of contexts.
#define PM_LEX_STACK_SIZE
We pre-allocate a certain number of lex states in order to avoid having to call malloc too many times...
Definition parser.h:263
struct pm_parser pm_parser_t
The parser used to parse Ruby source.
Definition parser.h:268
struct pm_comment pm_comment_t
This is a node in the linked list of comments that we've found while parsing.
pm_lex_state_t
This enum combines the various bits from the above enum into individual values that represent the var...
Definition parser.h:47
struct pm_scope pm_scope_t
This struct represents a node in a linked list of scopes.
pm_heredoc_quote_t
The type of quote that a heredoc uses.
Definition parser.h:70
void(* pm_encoding_changed_callback_t)(pm_parser_t *parser)
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:494
pm_context_t
While parsing, we keep track of a stack of contexts.
Definition parser.h:275
@ PM_CONTEXT_CLASS_RESCUE
a rescue statement within a class statement
Definition parser.h:325
@ PM_CONTEXT_ELSIF
an elsif clause
Definition parser.h:352
@ PM_CONTEXT_DEF_RESCUE
a rescue statement within a method definition
Definition parser.h:337
@ PM_CONTEXT_ELSE
an else clause
Definition parser.h:349
@ PM_CONTEXT_FOR_INDEX
a for loop's index
Definition parser.h:361
@ PM_CONTEXT_CASE_WHEN
a case when statements
Definition parser.h:310
@ PM_CONTEXT_BLOCK_RESCUE
a rescue statement within a do..end block
Definition parser.h:307
@ PM_CONTEXT_MODULE
a module declaration
Definition parser.h:388
@ PM_CONTEXT_DEF_PARAMS
a method definition's parameters
Definition parser.h:340
@ PM_CONTEXT_CASE_IN
a case in statements
Definition parser.h:313
@ PM_CONTEXT_BLOCK_ELSE
a rescue else statement within a do..end block
Definition parser.h:301
@ PM_CONTEXT_LOOP_PREDICATE
the predicate clause of a loop statement
Definition parser.h:382
@ PM_CONTEXT_SCLASS
a singleton class definition
Definition parser.h:418
@ PM_CONTEXT_UNLESS
an unless statement
Definition parser.h:433
@ PM_CONTEXT_POSTEXE
an END block
Definition parser.h:406
@ PM_CONTEXT_IF
an if statement
Definition parser.h:364
@ PM_CONTEXT_MULTI_TARGET
a multiple target expression
Definition parser.h:400
@ PM_CONTEXT_LAMBDA_RESCUE
a rescue statement within a lambda expression
Definition parser.h:379
@ PM_CONTEXT_BEGIN_ELSE
a rescue else statement with an explicit begin
Definition parser.h:286
@ PM_CONTEXT_NONE
a null context, used for returning a value from a function
Definition parser.h:277
@ PM_CONTEXT_CLASS_ELSE
a rescue else statement within a class statement
Definition parser.h:322
@ PM_CONTEXT_LAMBDA_ENSURE
an ensure statement within a lambda expression
Definition parser.h:373
@ PM_CONTEXT_BLOCK_ENSURE
an ensure statement within a do..end block
Definition parser.h:298
@ PM_CONTEXT_CLASS_ENSURE
an ensure statement within a class statement
Definition parser.h:319
@ PM_CONTEXT_LAMBDA_BRACES
a lambda expression with braces
Definition parser.h:367
@ PM_CONTEXT_MODULE_ELSE
a rescue else statement within a module statement
Definition parser.h:394
@ PM_CONTEXT_PARENS
a parenthesized expression
Definition parser.h:403
@ PM_CONTEXT_BLOCK_BRACES
expressions in block arguments using braces
Definition parser.h:292
@ PM_CONTEXT_BLOCK_PARAMETERS
expressions in block parameters foo do |...| end
Definition parser.h:304
@ PM_CONTEXT_DEF_ENSURE
an ensure statement within a method definition
Definition parser.h:331
@ PM_CONTEXT_SCLASS_RESCUE
a rescue statement with a singleton class
Definition parser.h:427
@ PM_CONTEXT_PREEXE
a BEGIN block
Definition parser.h:412
@ PM_CONTEXT_DEFINED
a defined? expression
Definition parser.h:343
@ PM_CONTEXT_MODULE_ENSURE
an ensure statement within a module statement
Definition parser.h:391
@ PM_CONTEXT_BEGIN_RESCUE
a rescue statement with an explicit begin
Definition parser.h:289
@ PM_CONTEXT_UNTIL
an until statement
Definition parser.h:436
@ PM_CONTEXT_DEF_ELSE
a rescue else statement within a method definition
Definition parser.h:334
@ PM_CONTEXT_FOR
a for loop
Definition parser.h:358
@ PM_CONTEXT_PREDICATE
a predicate inside an if/elsif/unless statement
Definition parser.h:409
@ PM_CONTEXT_BEGIN_ENSURE
an ensure statement with an explicit begin
Definition parser.h:283
@ PM_CONTEXT_SCLASS_ENSURE
an ensure statement with a singleton class
Definition parser.h:421
@ PM_CONTEXT_DEFAULT_PARAMS
a method definition's default parameter
Definition parser.h:346
@ PM_CONTEXT_LAMBDA_ELSE
a rescue else statement within a lambda expression
Definition parser.h:376
@ PM_CONTEXT_CLASS
a class declaration
Definition parser.h:316
@ PM_CONTEXT_MAIN
the top level context
Definition parser.h:385
@ PM_CONTEXT_LAMBDA_DO_END
a lambda expression with do..end
Definition parser.h:370
@ PM_CONTEXT_BEGIN
a begin statement
Definition parser.h:280
@ PM_CONTEXT_RESCUE_MODIFIER
a modifier rescue clause
Definition parser.h:415
@ PM_CONTEXT_EMBEXPR
an interpolated expression
Definition parser.h:355
@ PM_CONTEXT_TERNARY
a ternary expression
Definition parser.h:430
@ PM_CONTEXT_DEF
a method definition
Definition parser.h:328
@ PM_CONTEXT_SCLASS_ELSE
a rescue else statement with a singleton class
Definition parser.h:424
@ PM_CONTEXT_MODULE_RESCUE
a rescue statement within a module statement
Definition parser.h:397
@ PM_CONTEXT_BLOCK_KEYWORDS
expressions in block arguments using do..end
Definition parser.h:295
@ PM_CONTEXT_WHILE
a while statement
Definition parser.h:439
uint8_t pm_scope_parameters_t
The flags about scope parameters that can be set.
Definition parser.h:564
uint8_t pm_shareable_constant_value_t
The type of shareable constant value that can be set.
Definition parser.h:520
pm_comment_type_t
This is the type of a comment that we've found while parsing.
Definition parser.h:452
void pm_buffer_free(pm_buffer_t *buffer)
Free the memory associated with the buffer.
Definition pm_buffer.c:356
bool pm_buffer_init(pm_buffer_t *buffer)
Initialize a pm_buffer_t with its default values.
Definition pm_buffer.c:27
size_t pm_buffer_length(const pm_buffer_t *buffer)
Return the length of the buffer.
Definition pm_buffer.c:43
char * pm_buffer_value(const pm_buffer_t *buffer)
Return the value of the buffer.
Definition pm_buffer.c:35
#define PM_CONSTANT_ID_UNSET
When we allocate constants into the pool, we reserve 0 to mean that the slot is not yet filled.
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string)
Returns the length associated with the string.
Definition pm_string.c:352
PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string)
Returns the start pointer associated with the string.
Definition pm_string.c:360
PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string)
Free the associated memory of the given string.
Definition pm_string.c:368
#define PM_STRING_EMPTY
Defines an empty string.
Definition pm_string.h:70
#define PRISM_ALIGNOF
Get the alignment requirement of a type.
Definition defines.h:303
#define PRISM_FALLTHROUGH
We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
Definition defines.h:274
#define PRISM_UNLIKELY(x)
The compiler should predicate that this branch will not be taken.
Definition defines.h:258
#define PRISM_ATTRIBUTE_UNUSED
GCC will warn if you specify a function or parameter that is unused at runtime.
Definition defines.h:81
#define PRISM_DEPTH_MAXIMUM
When we are parsing using recursive descent, we want to protect against malicious payloads that could...
Definition defines.h:37
#define PM_STATIC_ASSERT(line, condition, message)
We want to be able to use static assertions, but they weren't standardized until C11.
Definition defines.h:116
#define PRISM_EXPORTED_FUNCTION
By default, we compile with -fvisibility=hidden.
Definition defines.h:53
#define PM_ENCODING_US_ASCII_ENTRY
This is the US-ASCII encoding.
Definition encoding.h:252
#define PM_ENCODING_UTF_8_ENTRY
This is the default UTF-8 encoding.
Definition encoding.h:245
#define PRISM_ENCODING_ALPHABETIC_BIT
All of the lookup tables use the first bit of each embedded byte to indicate whether the codepoint is...
Definition encoding.h:68
#define PRISM_ENCODING_ALPHANUMERIC_BIT
All of the lookup tables use the second bit of each embedded byte to indicate whether the codepoint i...
Definition encoding.h:74
#define PM_NODE_LIST_FOREACH(list, index, node)
Loop through each node in the node list, writing each node to the given pm_node_t pointer.
Definition node.h:17
#define PRISM_VERSION
The version of the Prism library as a constant string.
Definition version.h:27
#define PRISM_VERSION_PATCH
The patch version of the Prism library as an int.
Definition version.h:22
#define PRISM_VERSION_MINOR
The minor version of the Prism library as an int.
Definition version.h:17
#define PRISM_VERSION_MAJOR
The major version of the Prism library as an int.
Definition version.h:12
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser)
Parse the Ruby source associated with the given parser and return the tree.
Definition prism.c:22226
PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback)
Register a callback that will be called whenever prism changes the encoding it is using to parse base...
Definition prism.c:22162
PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options)
Initialize a parser with the given start and end pointers.
Definition prism.c:21899
PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser)
Free any memory associated with the given parser.
Definition prism.c:22200
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_arena_t *arena, pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options)
Parse a stream of Ruby source and return the tree.
Definition prism.c:22291
The main header file for the prism parser.
pm_string_query_t
Represents the results of a slice query.
Definition prism.h:272
@ PM_STRING_QUERY_TRUE
Returned if the result of the slice query is true.
Definition prism.h:280
@ PM_STRING_QUERY_ERROR
Returned if the encoding given to a slice query was invalid.
Definition prism.h:274
@ PM_STRING_QUERY_FALSE
Returned if the result of the slice query is false.
Definition prism.h:277
void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
Serialize the encoding, metadata, nodes, and constant pool.
Definition serialize.c:2147
char *() pm_parse_stream_fgets_t(char *string, int size, void *stream)
This function is used in pm_parse_stream() to retrieve a line of input from a stream.
Definition prism.h:109
void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer)
Serialize the name of the encoding to the buffer.
Definition serialize.c:2124
void pm_serialize_comment_list(pm_list_t *list, pm_buffer_t *buffer)
Serialize the given list of comments to the given buffer.
Definition serialize.c:2054
int() pm_parse_stream_feof_t(void *stream)
This function is used in pm_parse_stream to check whether a stream is EOF.
Definition prism.h:116
const char * pm_token_type_human(pm_token_type_t token_type)
Returns the human name of the given token type.
Definition token_type.c:358
This struct is used to pass information between the regular expression parser and the error callback.
Definition prism.c:17371
pm_parser_t * parser
The parser that we are parsing the regular expression for.
Definition prism.c:17373
const uint8_t * start
The start of the regular expression.
Definition prism.c:17376
bool shared
Whether or not the source of the regular expression is shared.
Definition prism.c:17387
const uint8_t * end
The end of the regular expression.
Definition prism.c:17379
This struct is used to pass information between the regular expression parser and the named capture c...
Definition prism.c:20304
pm_constant_id_list_t names
The list of names that have been parsed.
Definition prism.c:20315
pm_parser_t * parser
The parser that is parsing the regular expression.
Definition prism.c:20306
pm_match_write_node_t * match
The match write node that is being created.
Definition prism.c:20312
pm_call_node_t * call
The call node wrapping the regular expression node.
Definition prism.c:20309
bool shared
Whether the content of the regular expression is shared.
Definition prism.c:20322
AndNode.
Definition ast.h:1277
PM_NODE_ALIGNAS struct pm_node * left
AndNode::left.
Definition ast.h:1292
PM_NODE_ALIGNAS struct pm_node * right
AndNode::right.
Definition ast.h:1305
A bump allocator.
Definition pm_arena.h:39
ArgumentsNode.
Definition ast.h:1337
pm_node_t base
The embedded base node.
Definition ast.h:1339
struct pm_node_list arguments
ArgumentsNode::arguments.
Definition ast.h:1349
This is a special out parameter to the parse_arguments_list function that includes opening and closin...
Definition prism.c:1659
pm_node_t * block
The optional block attached to the call.
Definition prism.c:1670
bool has_forwarding
The flag indicating whether this arguments list has forwarding argument.
Definition prism.c:1673
pm_location_t opening_loc
The optional location of the opening parenthesis or bracket.
Definition prism.c:1661
pm_arguments_node_t * arguments
The lazily-allocated optional arguments node.
Definition prism.c:1664
pm_location_t closing_loc
The optional location of the closing parenthesis or bracket.
Definition prism.c:1667
ArrayNode.
Definition ast.h:1367
struct pm_node_list elements
ArrayNode::elements.
Definition ast.h:1376
ArrayPatternNode.
Definition ast.h:1427
PM_NODE_ALIGNAS struct pm_node * constant
ArrayPatternNode::constant.
Definition ast.h:1445
pm_location_t opening_loc
ArrayPatternNode::opening_loc.
Definition ast.h:1485
pm_location_t closing_loc
ArrayPatternNode::closing_loc.
Definition ast.h:1495
AssocNode.
Definition ast.h:1510
PM_NODE_ALIGNAS struct pm_node * value
AssocNode::value.
Definition ast.h:1541
PM_NODE_ALIGNAS struct pm_node * key
AssocNode::key.
Definition ast.h:1528
BeginNode.
Definition ast.h:1633
PM_NODE_ALIGNAS struct pm_else_node * else_clause
BeginNode::else_clause.
Definition ast.h:1675
PM_NODE_ALIGNAS struct pm_ensure_node * ensure_clause
BeginNode::ensure_clause.
Definition ast.h:1685
PM_NODE_ALIGNAS struct pm_statements_node * statements
BeginNode::statements.
Definition ast.h:1655
PM_NODE_ALIGNAS struct pm_rescue_node * rescue_clause
BeginNode::rescue_clause.
Definition ast.h:1665
pm_node_t base
The embedded base node.
Definition ast.h:1635
This struct represents a set of binding powers used for a given token.
Definition prism.c:12173
bool binary
Whether or not this token can be used as a binary operator.
Definition prism.c:12181
pm_binding_power_t left
The left binding power.
Definition prism.c:12175
bool nonassoc
Whether or not this token can be used as non-associative binary operator.
Definition prism.c:12187
pm_binding_power_t right
The right binding power.
Definition prism.c:12178
BlockLocalVariableNode.
Definition ast.h:1750
BlockNode.
Definition ast.h:1777
pm_location_t opening_loc
BlockNode::opening_loc.
Definition ast.h:1823
BlockParametersNode.
Definition ast.h:1905
A pm_buffer_t is a simple memory buffer that stores data in a contiguous block of memory.
Definition pm_buffer.h:22
size_t length
The length of the buffer in bytes.
Definition pm_buffer.h:24
char * value
A pointer to the start of the buffer.
Definition pm_buffer.h:30
CallNode.
Definition ast.h:2129
pm_location_t opening_loc
CallNode::opening_loc.
Definition ast.h:2190
pm_location_t closing_loc
CallNode::closing_loc.
Definition ast.h:2210
pm_constant_id_t name
CallNode::name.
Definition ast.h:2170
PM_NODE_ALIGNAS struct pm_arguments_node * arguments
CallNode::arguments.
Definition ast.h:2200
pm_location_t equal_loc
CallNode::equal_loc.
Definition ast.h:2223
pm_location_t call_operator_loc
CallNode::call_operator_loc.
Definition ast.h:2160
pm_location_t message_loc
CallNode::message_loc.
Definition ast.h:2180
PM_NODE_ALIGNAS struct pm_node * block
CallNode::block.
Definition ast.h:2233
PM_NODE_ALIGNAS struct pm_node * receiver
CallNode::receiver.
Definition ast.h:2147
CaseMatchNode.
Definition ast.h:2564
struct pm_node_list conditions
CaseMatchNode::conditions.
Definition ast.h:2586
PM_NODE_ALIGNAS struct pm_else_node * else_clause
CaseMatchNode::else_clause.
Definition ast.h:2596
CaseNode.
Definition ast.h:2633
PM_NODE_ALIGNAS struct pm_else_node * else_clause
CaseNode::else_clause.
Definition ast.h:2665
struct pm_node_list conditions
CaseNode::conditions.
Definition ast.h:2655
ClassVariableReadNode.
Definition ast.h:2922
ClassVariableTargetNode.
Definition ast.h:2950
ClassVariableWriteNode.
Definition ast.h:2972
This is a node in the linked list of comments that we've found while parsing.
Definition parser.h:462
pm_comment_type_t type
The type of comment that we've found.
Definition parser.h:470
pm_location_t location
The location of the comment in the source.
Definition parser.h:467
A list of constant IDs.
size_t size
The number of constant ids in the list.
ConstantPathNode.
Definition ast.h:3181
ConstantPathTargetNode.
Definition ast.h:3316
ConstantReadNode.
Definition ast.h:3409
A constant in the pool which effectively stores a string.
size_t length
The length of the string.
const uint8_t * start
A pointer to the start of the string.
ConstantTargetNode.
Definition ast.h:3437
ConstantWriteNode.
Definition ast.h:3459
This is a node in a linked list of contexts.
Definition parser.h:443
pm_context_t context
The context that this node represents.
Definition parser.h:445
struct pm_context_node * prev
A pointer to the previous context in the linked list.
Definition parser.h:448
ElseNode.
Definition ast.h:3635
PM_NODE_ALIGNAS struct pm_statements_node * statements
ElseNode::statements.
Definition ast.h:3647
This struct defines the functions necessary to implement the encoding interface so we can determine h...
Definition encoding.h:23
size_t(* alpha_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphab...
Definition encoding.h:36
size_t(* char_width)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding.
Definition encoding.h:29
bool(* isupper_char)(const uint8_t *b, ptrdiff_t n)
Return true if the next character is valid in the encoding and is an uppercase character.
Definition encoding.h:50
const char * name
The name of the encoding.
Definition encoding.h:56
size_t(* alnum_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphan...
Definition encoding.h:43
EnsureNode.
Definition ast.h:3730
PM_NODE_ALIGNAS struct pm_statements_node * statements
EnsureNode::statements.
Definition ast.h:3742
FindPatternNode.
Definition ast.h:3788
pm_location_t opening_loc
FindPatternNode::opening_loc.
Definition ast.h:3852
PM_NODE_ALIGNAS struct pm_node * constant
FindPatternNode::constant.
Definition ast.h:3800
pm_location_t closing_loc
FindPatternNode::closing_loc.
Definition ast.h:3865
FlipFlopNode.
Definition ast.h:3883
FloatNode.
Definition ast.h:3915
double value
FloatNode::value.
Definition ast.h:3924
pm_node_t base
The embedded base node.
Definition ast.h:3917
ForwardingParameterNode.
Definition ast.h:4048
GlobalVariableReadNode.
Definition ast.h:4210
GlobalVariableTargetNode.
Definition ast.h:4238
GlobalVariableWriteNode.
Definition ast.h:4260
HashNode.
Definition ast.h:4321
struct pm_node_list elements
HashNode::elements.
Definition ast.h:4346
HashPatternNode.
Definition ast.h:4380
PM_NODE_ALIGNAS struct pm_node * constant
HashPatternNode::constant.
Definition ast.h:4395
pm_location_t opening_loc
HashPatternNode::opening_loc.
Definition ast.h:4434
pm_location_t closing_loc
HashPatternNode::closing_loc.
Definition ast.h:4447
All of the information necessary to store to lexing a heredoc.
Definition parser.h:89
size_t ident_length
The length of the heredoc identifier.
Definition parser.h:94
pm_heredoc_quote_t quote
The type of quote that the heredoc uses.
Definition parser.h:97
pm_heredoc_indent_t indent
The type of indentation that the heredoc uses.
Definition parser.h:100
const uint8_t * ident_start
A pointer to the start of the heredoc identifier.
Definition parser.h:91
IfNode.
Definition ast.h:4468
PM_NODE_ALIGNAS struct pm_statements_node * statements
IfNode::statements.
Definition ast.h:4527
PM_NODE_ALIGNAS struct pm_node * subsequent
IfNode::subsequent.
Definition ast.h:4546
ImaginaryNode.
Definition ast.h:4573
InNode.
Definition ast.h:4649
PM_NODE_ALIGNAS struct pm_statements_node * statements
InNode::statements.
Definition ast.h:4661
InstanceVariableReadNode.
Definition ast.h:5052
InstanceVariableTargetNode.
Definition ast.h:5080
InstanceVariableWriteNode.
Definition ast.h:5102
IntegerNode.
Definition ast.h:5169
pm_integer_t value
IntegerNode::value.
Definition ast.h:5178
pm_node_t base
The embedded base node.
Definition ast.h:5171
bool negative
Whether or not the integer is negative.
Definition pm_integer.h:42
InterpolatedMatchLastLineNode.
Definition ast.h:5206
InterpolatedRegularExpressionNode.
Definition ast.h:5251
InterpolatedStringNode.
Definition ast.h:5287
pm_node_t base
The embedded base node.
Definition ast.h:5289
pm_location_t opening_loc
InterpolatedStringNode::opening_loc.
Definition ast.h:5294
InterpolatedSymbolNode.
Definition ast.h:5319
InterpolatedXStringNode.
Definition ast.h:5351
pm_location_t opening_loc
InterpolatedXStringNode::opening_loc.
Definition ast.h:5358
pm_node_t base
The embedded base node.
Definition ast.h:5353
struct pm_node_list parts
InterpolatedXStringNode::parts.
Definition ast.h:5363
KeywordHashNode.
Definition ast.h:5420
void(* callback)(void *data, pm_parser_t *parser, pm_token_t *token)
This is the callback that is called when a token is lexed.
Definition parser.h:516
void * data
This opaque pointer is used to provide whatever information the user deemed necessary to the callback...
Definition parser.h:510
When lexing Ruby source, the lexer has a small amount of state to tell which kind of token it is curr...
Definition parser.h:110
uint8_t terminator
This is the terminator of the list literal.
Definition parser.h:166
size_t nesting
This keeps track of the nesting level of the list.
Definition parser.h:154
bool interpolation
Whether or not interpolation is allowed in this list.
Definition parser.h:157
uint8_t incrementor
When lexing a list, it takes into account balancing the terminator if the terminator is one of (),...
Definition parser.h:163
enum pm_lex_mode::@98 mode
The type of this lex mode.
uint8_t breakpoints[11]
This is the character set that should be used to delimit the tokens within the list.
Definition parser.h:172
pm_heredoc_lex_mode_t base
All of the data necessary to lex a heredoc.
Definition parser.h:234
bool line_continuation
True if the previous token ended with a line continuation.
Definition parser.h:250
struct pm_lex_mode * prev
The previous lex state so that it knows how to pop.
Definition parser.h:255
bool label_allowed
Whether or not at the end of the string we should allow a :, which would indicate this was a dynamic ...
Definition parser.h:209
const uint8_t * next_start
This is the pointer to the character where lexing should resume once the heredoc has been completely ...
Definition parser.h:240
union pm_lex_mode::@99 as
The data associated with this type of lex mode.
size_t * common_whitespace
This is used to track the amount of common whitespace on each line so that we know how much to dedent...
Definition parser.h:247
int32_t line
The line number.
uint32_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
This struct represents an abstract linked list that provides common functionality.
Definition pm_list.h:46
struct pm_list_node * next
A pointer to the next node in the list.
Definition pm_list.h:48
This represents the overall linked list.
Definition pm_list.h:55
pm_list_node_t * head
A pointer to the head of the list.
Definition pm_list.h:60
size_t size
The size of the list.
Definition pm_list.h:57
This tracks an individual local variable in a certain lexical context, as well as the number of times...
Definition parser.h:530
pm_constant_id_t name
The name of the local variable.
Definition parser.h:532
pm_location_t location
The location of the local variable in the source.
Definition parser.h:535
uint32_t hash
The hash of the local variable.
Definition parser.h:544
uint32_t index
The index of the local variable in the local table.
Definition parser.h:538
uint32_t reads
The number of times the local variable is read.
Definition parser.h:541
LocalVariableReadNode.
Definition ast.h:5656
uint32_t depth
LocalVariableReadNode::depth.
Definition ast.h:5686
pm_constant_id_t name
LocalVariableReadNode::name.
Definition ast.h:5673
LocalVariableTargetNode.
Definition ast.h:5704
LocalVariableWriteNode.
Definition ast.h:5731
uint32_t depth
LocalVariableWriteNode::depth.
Definition ast.h:5757
pm_constant_id_t name
LocalVariableWriteNode::name.
Definition ast.h:5744
This is a set of local variables in a certain lexical context (method, class, module,...
Definition parser.h:552
pm_local_t * locals
The nullable allocated memory for the local variables in the set.
Definition parser.h:560
uint32_t capacity
The capacity of the local variables set.
Definition parser.h:557
uint32_t size
The number of local variables in the set.
Definition parser.h:554
This struct represents a slice in the source code, defined by an offset and a length.
Definition ast.h:540
uint32_t start
The offset of the location from the start of the source.
Definition ast.h:542
uint32_t length
The length of the location.
Definition ast.h:545
This is a node in the linked list of magic comments that we've found while parsing.
Definition parser.h:479
MatchLastLineNode.
Definition ast.h:5822
MatchWriteNode.
Definition ast.h:5977
struct pm_node_list targets
MatchWriteNode::targets.
Definition ast.h:5989
MissingNode.
Definition ast.h:6001
MultiTargetNode.
Definition ast.h:6070
pm_location_t lparen_loc
MultiTargetNode::lparen_loc.
Definition ast.h:6127
struct pm_node_list lefts
MultiTargetNode::lefts.
Definition ast.h:6087
pm_location_t rparen_loc
MultiTargetNode::rparen_loc.
Definition ast.h:6137
MultiWriteNode.
Definition ast.h:6152
A list of nodes in the source, most often used for lists of children.
Definition ast.h:553
size_t size
The number of nodes in the list.
Definition ast.h:555
struct pm_node ** nodes
The nodes in the list.
Definition ast.h:561
This is the base structure that represents a node in the syntax tree.
Definition ast.h:1051
pm_node_type_t type
This represents the type of the node.
Definition ast.h:1056
pm_location_t location
This is the location of the node in the source.
Definition ast.h:1074
OptionalParameterNode.
Definition ast.h:6446
A scope of locals surrounding the code that is being parsed.
Definition options.h:36
size_t locals_count
The number of locals in the scope.
Definition options.h:38
uint8_t forwarding
Flags for the set of forwarding parameters in this scope.
Definition options.h:44
The options that can be passed to the parser.
Definition options.h:113
uint8_t command_line
A bitset of the various options that were set on the command line.
Definition options.h:162
void * shebang_callback_data
Any additional data that should be passed along to the shebang callback if one was set.
Definition options.h:124
bool encoding_locked
Whether or not the encoding magic comments should be respected.
Definition options.h:178
bool main_script
When the file being parsed is the main script, the shebang will be considered for command-line flags ...
Definition options.h:185
pm_string_t encoding
The name of the encoding that the source file is in.
Definition options.h:139
int32_t line
The line within the file that the parse starts on.
Definition options.h:133
pm_options_shebang_callback_t shebang_callback
The callback to call when additional switches are found in a shebang comment.
Definition options.h:118
int8_t frozen_string_literal
Whether or not the frozen string literal option has been set.
Definition options.h:171
bool partial_script
When the file being parsed is considered a "partial" script, jumps will not be marked as errors if th...
Definition options.h:195
size_t scopes_count
The number of scopes surrounding the code that is being parsed.
Definition options.h:144
pm_string_t filepath
The name of the file that is currently being parsed.
Definition options.h:127
pm_options_version_t version
The version of prism that we should be parsing with.
Definition options.h:159
OrNode.
Definition ast.h:6483
PM_NODE_ALIGNAS struct pm_node * right
OrNode::right.
Definition ast.h:6511
PM_NODE_ALIGNAS struct pm_node * left
OrNode::left.
Definition ast.h:6498
ParametersNode.
Definition ast.h:6537
PM_NODE_ALIGNAS struct pm_node * block
ParametersNode::block.
Definition ast.h:6574
PM_NODE_ALIGNAS struct pm_node * rest
ParametersNode::rest.
Definition ast.h:6554
PM_NODE_ALIGNAS struct pm_node * keyword_rest
ParametersNode::keyword_rest.
Definition ast.h:6569
ParenthesesNode.
Definition ast.h:6592
PM_NODE_ALIGNAS struct pm_node * body
ParenthesesNode::body.
Definition ast.h:6599
This struct represents the overall parser.
Definition parser.h:638
const pm_encoding_t * explicit_encoding
When a string-like expression is being lexed, any byte or escape sequence that resolves to a value wh...
Definition parser.h:841
pm_lex_state_t lex_state
The current state of the lexer.
Definition parser.h:650
uint8_t command_line
The command line flags given from the options.
Definition parser.h:860
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
Definition parser.h:756
bool partial_script
Whether or not we are parsing a "partial" script, which is a script that will be evaluated in the con...
Definition parser.h:883
bool pattern_matching_newlines
This flag indicates that we are currently parsing a pattern matching expression and impacts that calc...
Definition parser.h:910
const uint8_t * end
The pointer to the end of the source.
Definition parser.h:695
bool recovering
Whether or not we're currently recovering from a syntax error.
Definition parser.h:889
pm_node_flags_t integer_base
We want to add a flag to integer nodes that indicates their base.
Definition parser.h:798
bool warn_mismatched_indentation
By default, Ruby always warns about mismatched indentation.
Definition parser.h:931
pm_constant_pool_t constant_pool
This constant pool keeps all of the constants defined throughout the file so that we can reference th...
Definition parser.h:787
bool in_keyword_arg
This flag indicates that we are currently parsing a keyword argument.
Definition parser.h:913
const uint8_t * next_start
This is a special field set on the parser when we need the parser to jump to a specific location when...
Definition parser.h:708
pm_static_literals_t * current_hash_keys
The hash keys for the hash that is currently being parsed.
Definition parser.h:750
pm_list_t magic_comment_list
The list of magic comments that have been found while parsing.
Definition parser.h:722
int lambda_enclosure_nesting
Used to temporarily track the nesting of enclosures to determine if a { is the beginning of a lambda ...
Definition parser.h:659
pm_lex_callback_t * lex_callback
This is an optional callback that can be attached to the parser that will be called whenever a new to...
Definition parser.h:775
pm_options_version_t version
The version of prism that we should use to parse.
Definition parser.h:857
pm_token_t previous
The previous token we were considering.
Definition parser.h:698
pm_string_t current_string
This string is used to pass information from the lexer to the parser.
Definition parser.h:804
bool parsing_eval
Whether or not we are parsing an eval string.
Definition parser.h:876
bool current_regular_expression_ascii_only
True if the current regular expression being lexed contains only ASCII characters.
Definition parser.h:925
bool encoding_changed
Whether or not the encoding has been changed by a magic comment.
Definition parser.h:904
pm_location_t data_loc
An optional location that represents the location of the END marker and the rest of the content of th...
Definition parser.h:729
pm_context_node_t * current_context
The current parsing context.
Definition parser.h:741
const uint8_t * start
The pointer to the start of the source.
Definition parser.h:692
int enclosure_nesting
Tracks the current nesting of (), [], and {}.
Definition parser.h:653
pm_line_offset_list_t line_offsets
This is the list of line offsets in the source file.
Definition parser.h:790
pm_list_t error_list
The list of errors that have been found while parsing.
Definition parser.h:735
int8_t frozen_string_literal
Whether or not we have found a frozen_string_literal magic comment with a true or false value.
Definition parser.h:870
pm_node_list_t * current_block_exits
When parsing block exits (e.g., break, next, redo), we need to validate that they are in correct cont...
Definition parser.h:854
const uint8_t * encoding_comment_start
This pointer indicates where a comment must start if it is to be considered an encoding comment.
Definition parser.h:769
pm_lex_mode_t stack[PM_LEX_STACK_SIZE]
The stack of lexer modes.
Definition parser.h:685
pm_list_t warning_list
The list of warnings that have been found while parsing.
Definition parser.h:732
const uint8_t * heredoc_end
This field indicates the end of a heredoc whose identifier was found on the current line.
Definition parser.h:716
int brace_nesting
Used to track the nesting of braces to ensure we get the correct value when we are interpolating bloc...
Definition parser.h:665
pm_encoding_changed_callback_t encoding_changed_callback
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:763
struct pm_parser::@104 lex_modes
A stack of lex modes.
int32_t start_line
The line number at the start of the parse.
Definition parser.h:810
bool encoding_locked
This is very specialized behavior for when you want to parse in a context that does not respect encod...
Definition parser.h:897
pm_lex_mode_t * current
The current mode of the lexer.
Definition parser.h:682
pm_arena_t * arena
The arena used for all AST-lifetime allocations.
Definition parser.h:640
pm_list_t comment_list
The list of comments that have been found while parsing.
Definition parser.h:719
size_t index
The current index into the lexer mode stack.
Definition parser.h:688
pm_string_t filepath
This is the path of the file being parsed.
Definition parser.h:781
pm_scope_t * current_scope
The current local scope.
Definition parser.h:738
bool command_start
Whether or not we're at the beginning of a command.
Definition parser.h:886
bool semantic_token_seen
Whether or not the parser has seen a token that has semantic meaning (i.e., a token that is not a com...
Definition parser.h:919
RangeNode.
Definition ast.h:6822
PM_NODE_ALIGNAS struct pm_node * right
RangeNode::right.
Definition ast.h:6851
PM_NODE_ALIGNAS struct pm_node * left
RangeNode::left.
Definition ast.h:6837
RationalNode.
Definition ast.h:6879
pm_node_t base
The embedded base node.
Definition ast.h:6881
pm_integer_t numerator
RationalNode::numerator.
Definition ast.h:6890
In order to properly set a regular expression's encoding and to validate the byte sequence for the un...
Definition prism.c:9462
pm_buffer_t regexp_buffer
The buffer holding the regexp source.
Definition prism.c:9467
pm_token_buffer_t base
The embedded base buffer.
Definition prism.c:9464
RegularExpressionNode.
Definition ast.h:6944
pm_string_t unescaped
RegularExpressionNode::unescaped.
Definition ast.h:6966
RequiredParameterNode.
Definition ast.h:7016
RescueModifierNode.
Definition ast.h:7038
PM_NODE_ALIGNAS struct pm_node * rescue_expression
RescueModifierNode::rescue_expression.
Definition ast.h:7055
RescueNode.
Definition ast.h:7075
PM_NODE_ALIGNAS struct pm_rescue_node * subsequent
RescueNode::subsequent.
Definition ast.h:7112
pm_location_t then_keyword_loc
RescueNode::then_keyword_loc.
Definition ast.h:7102
This struct represents a node in a linked list of scopes.
Definition parser.h:578
struct pm_scope * previous
A pointer to the previous scope in the linked list.
Definition parser.h:580
pm_node_list_t implicit_parameters
This is a list of the implicit parameters contained within the block.
Definition parser.h:591
pm_shareable_constant_value_t shareable_constant
The current state of constant shareability for this scope.
Definition parser.h:618
pm_locals_t locals
The IDs of the locals in the given scope.
Definition parser.h:583
pm_scope_parameters_t parameters
This is a bitfield that indicates the parameters that are being used in this scope.
Definition parser.h:612
bool closed
A boolean indicating whether or not this scope can see into its parent.
Definition parser.h:624
SplatNode.
Definition ast.h:7365
PM_NODE_ALIGNAS struct pm_node * expression
SplatNode::expression.
Definition ast.h:7377
StatementsNode.
Definition ast.h:7392
struct pm_node_list body
StatementsNode::body.
Definition ast.h:7399
pm_node_t base
The embedded base node.
Definition ast.h:7394
Certain sets of nodes (hash keys and when clauses) check for duplicate nodes to alert the user of pot...
StringNode.
Definition ast.h:7426
pm_node_t base
The embedded base node.
Definition ast.h:7428
pm_string_t unescaped
StringNode::unescaped.
Definition ast.h:7448
pm_location_t content_loc
StringNode::content_loc.
Definition ast.h:7438
pm_location_t closing_loc
StringNode::closing_loc.
Definition ast.h:7443
pm_location_t opening_loc
StringNode::opening_loc.
Definition ast.h:7433
A generic string type that can have various ownership semantics.
Definition pm_string.h:33
const uint8_t * source
A pointer to the start of the string.
Definition pm_string.h:35
size_t length
The length of the string in bytes of memory.
Definition pm_string.h:38
enum pm_string_t::@105 type
The type of the string.
SymbolNode.
Definition ast.h:7520
pm_location_t value_loc
SymbolNode::value_loc.
Definition ast.h:7532
pm_string_t unescaped
SymbolNode::unescaped.
Definition ast.h:7542
When we're lexing certain types (strings, symbols, lists, etc.) we have string content associated wit...
Definition prism.c:9436
pm_buffer_t buffer
The buffer that we're using to keep track of the string content.
Definition prism.c:9441
const uint8_t * cursor
The cursor into the source string that points to how far we have currently copied into the buffer.
Definition prism.c:9447
This struct represents a token in the Ruby source.
Definition ast.h:521
const uint8_t * end
A pointer to the end location of the token in the source.
Definition ast.h:529
const uint8_t * start
A pointer to the start location of the token in the source.
Definition ast.h:526
pm_token_type_t type
The type of the token.
Definition ast.h:523
UndefNode.
Definition ast.h:7574
UnlessNode.
Definition ast.h:7604
PM_NODE_ALIGNAS struct pm_statements_node * statements
UnlessNode::statements.
Definition ast.h:7653
PM_NODE_ALIGNAS struct pm_else_node * else_clause
UnlessNode::else_clause.
Definition ast.h:7663
WhenNode.
Definition ast.h:7738
PM_NODE_ALIGNAS struct pm_statements_node * statements
WhenNode::statements.
Definition ast.h:7760
XStringNode.
Definition ast.h:7827