Ruby 4.1.0dev (2026-03-04 revision 75d6b159eba73d4effe5989052f621177423e9db)
prism.c
1#include "prism.h"
2
6const char *
7pm_version(void) {
8 return PRISM_VERSION;
9}
10
15#define PM_TAB_WHITESPACE_SIZE 8
16
17// Macros for min/max.
18#define MIN(a,b) (((a)<(b))?(a):(b))
19#define MAX(a,b) (((a)>(b))?(a):(b))
20
21/******************************************************************************/
22/* Helpful AST-related macros */
23/******************************************************************************/
24
25#define U32(value_) ((uint32_t) (value_))
26
27#define FL PM_NODE_FLAGS
28#define UP PM_NODE_UPCAST
29
30#define PM_LOCATION_START(location_) ((location_)->start)
31#define PM_LOCATION_END(location_) ((location_)->start + (location_)->length)
32
33#define PM_TOKEN_START(parser_, token_) U32((token_)->start - (parser_)->start)
34#define PM_TOKEN_END(parser_, token_) U32((token_)->end - (parser_)->start)
35#define PM_TOKEN_LENGTH(token_) U32((token_)->end - (token_)->start)
36#define PM_TOKENS_LENGTH(left_, right_) U32((right_)->end - (left_)->start)
37
38#define PM_NODE_START(node_) (UP(node_)->location.start)
39#define PM_NODE_LENGTH(node_) (UP(node_)->location.length)
40#define PM_NODE_END(node_) (UP(node_)->location.start + UP(node_)->location.length)
41#define PM_NODES_LENGTH(left_, right_) (PM_NODE_END(right_) - PM_NODE_START(left_))
42
43#define PM_TOKEN_NODE_LENGTH(parser_, token_, node_) (PM_NODE_END(node_) - PM_TOKEN_START(parser_, token_))
44#define PM_NODE_TOKEN_LENGTH(parser_, node_, token_) (PM_TOKEN_END(parser_, token_) - PM_NODE_START(node_))
45
46#define PM_NODE_START_SET_NODE(left_, right_) (PM_NODE_START(left_) = PM_NODE_START(right_))
47#define PM_NODE_START_SET_TOKEN(parser_, node_, token_) (PM_NODE_START(node_) = PM_TOKEN_START(parser_, token_))
48#define PM_NODE_LENGTH_SET_NODE(left_, right_) (PM_NODE_LENGTH(left_) = PM_NODE_END(right_) - PM_NODE_START(left_))
49#define PM_NODE_LENGTH_SET_TOKEN(parser_, node_, token_) (PM_NODE_LENGTH(node_) = PM_TOKEN_END(parser_, token_) - PM_NODE_START(node_))
50#define PM_NODE_LENGTH_SET_LOCATION(node_, location_) (PM_NODE_LENGTH(node_) = PM_LOCATION_END(location_) - PM_NODE_START(node_))
51
52#define PM_LOCATION_INIT(start_, length_) ((pm_location_t) { .start = (start_), .length = (length_) })
53#define PM_LOCATION_INIT_UNSET PM_LOCATION_INIT(0, 0)
54#define PM_LOCATION_INIT_TOKEN(parser_, token_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_))
55#define PM_LOCATION_INIT_NODE(node_) UP(node_)->location
56
57#define PM_LOCATION_INIT_TOKENS(parser_, left_, right_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, left_), PM_TOKENS_LENGTH(left_, right_))
58#define PM_LOCATION_INIT_NODES(left_, right_) PM_LOCATION_INIT(PM_NODE_START(left_), PM_NODES_LENGTH(left_, right_))
59#define PM_LOCATION_INIT_TOKEN_NODE(parser_, token_, node_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, token_), PM_TOKEN_NODE_LENGTH(parser_, token_, node_))
60#define PM_LOCATION_INIT_NODE_TOKEN(parser_, node_, token_) PM_LOCATION_INIT(PM_NODE_START(node_), PM_NODE_TOKEN_LENGTH(parser_, node_, token_))
61
62#define TOK2LOC(parser_, token_) PM_LOCATION_INIT_TOKEN(parser_, token_)
63#define NTOK2LOC(parser_, token_) ((token_) == NULL ? PM_LOCATION_INIT_UNSET : TOK2LOC(parser_, token_))
64#define NTOK2PTR(token_) ((token_).start == NULL ? NULL : &(token_))
65
66/******************************************************************************/
67/* Lex mode manipulations */
68/******************************************************************************/
69
74static inline uint8_t
75lex_mode_incrementor(const uint8_t start) {
76 switch (start) {
77 case '(':
78 case '[':
79 case '{':
80 case '<':
81 return start;
82 default:
83 return '\0';
84 }
85}
86
91static inline uint8_t
92lex_mode_terminator(const uint8_t start) {
93 switch (start) {
94 case '(':
95 return ')';
96 case '[':
97 return ']';
98 case '{':
99 return '}';
100 case '<':
101 return '>';
102 default:
103 return start;
104 }
105}
106
112static bool
113lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
114 lex_mode.prev = parser->lex_modes.current;
115 parser->lex_modes.index++;
116
117 if (parser->lex_modes.index > PM_LEX_STACK_SIZE - 1) {
118 parser->lex_modes.current = (pm_lex_mode_t *) xmalloc(sizeof(pm_lex_mode_t));
119 if (parser->lex_modes.current == NULL) return false;
120
121 *parser->lex_modes.current = lex_mode;
122 } else {
123 parser->lex_modes.stack[parser->lex_modes.index] = lex_mode;
124 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
125 }
126
127 return true;
128}
129
133static inline bool
134lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
135 uint8_t incrementor = lex_mode_incrementor(delimiter);
136 uint8_t terminator = lex_mode_terminator(delimiter);
137
138 pm_lex_mode_t lex_mode = {
139 .mode = PM_LEX_LIST,
140 .as.list = {
141 .nesting = 0,
142 .interpolation = interpolation,
143 .incrementor = incrementor,
144 .terminator = terminator
145 }
146 };
147
148 // These are the places where we need to split up the content of the list.
149 // We'll use strpbrk to find the first of these characters.
150 uint8_t *breakpoints = lex_mode.as.list.breakpoints;
151 memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
152 size_t index = 7;
153
154 // Now we'll add the terminator to the list of breakpoints. If the
155 // terminator is not already a NULL byte, add it to the list.
156 if (terminator != '\0') {
157 breakpoints[index++] = terminator;
158 }
159
160 // If interpolation is allowed, then we're going to check for the #
161 // character. Otherwise we'll only look for escapes and the terminator.
162 if (interpolation) {
163 breakpoints[index++] = '#';
164 }
165
166 // If there is an incrementor, then we'll check for that as well.
167 if (incrementor != '\0') {
168 breakpoints[index++] = incrementor;
169 }
170
171 parser->explicit_encoding = NULL;
172 return lex_mode_push(parser, lex_mode);
173}
174
180static inline bool
181lex_mode_push_list_eof(pm_parser_t *parser) {
182 return lex_mode_push_list(parser, false, '\0');
183}
184
188static inline bool
189lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
190 pm_lex_mode_t lex_mode = {
191 .mode = PM_LEX_REGEXP,
192 .as.regexp = {
193 .nesting = 0,
194 .incrementor = incrementor,
195 .terminator = terminator
196 }
197 };
198
199 // These are the places where we need to split up the content of the
200 // regular expression. We'll use strpbrk to find the first of these
201 // characters.
202 uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
203 memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
204 size_t index = 4;
205
206 // First we'll add the terminator.
207 if (terminator != '\0') {
208 breakpoints[index++] = terminator;
209 }
210
211 // Next, if there is an incrementor, then we'll check for that as well.
212 if (incrementor != '\0') {
213 breakpoints[index++] = incrementor;
214 }
215
216 parser->explicit_encoding = NULL;
217 return lex_mode_push(parser, lex_mode);
218}
219
223static inline bool
224lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
225 pm_lex_mode_t lex_mode = {
226 .mode = PM_LEX_STRING,
227 .as.string = {
228 .nesting = 0,
229 .interpolation = interpolation,
230 .label_allowed = label_allowed,
231 .incrementor = incrementor,
232 .terminator = terminator
233 }
234 };
235
236 // These are the places where we need to split up the content of the
237 // string. We'll use strpbrk to find the first of these characters.
238 uint8_t *breakpoints = lex_mode.as.string.breakpoints;
239 memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
240 size_t index = 3;
241
242 // Now add in the terminator. If the terminator is not already a NULL byte,
243 // then we'll add it.
244 if (terminator != '\0') {
245 breakpoints[index++] = terminator;
246 }
247
248 // If interpolation is allowed, then we're going to check for the #
249 // character. Otherwise we'll only look for escapes and the terminator.
250 if (interpolation) {
251 breakpoints[index++] = '#';
252 }
253
254 // If we have an incrementor, then we'll add that in as a breakpoint as
255 // well.
256 if (incrementor != '\0') {
257 breakpoints[index++] = incrementor;
258 }
259
260 parser->explicit_encoding = NULL;
261 return lex_mode_push(parser, lex_mode);
262}
263
269static inline bool
270lex_mode_push_string_eof(pm_parser_t *parser) {
271 return lex_mode_push_string(parser, false, false, '\0', '\0');
272}
273
279static void
280lex_mode_pop(pm_parser_t *parser) {
281 if (parser->lex_modes.index == 0) {
282 parser->lex_modes.current->mode = PM_LEX_DEFAULT;
283 } else if (parser->lex_modes.index < PM_LEX_STACK_SIZE) {
284 parser->lex_modes.index--;
285 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
286 } else {
287 parser->lex_modes.index--;
288 pm_lex_mode_t *prev = parser->lex_modes.current->prev;
289 xfree_sized(parser->lex_modes.current, sizeof(pm_lex_mode_t));
290 parser->lex_modes.current = prev;
291 }
292}
293
297static inline bool
298lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
299 return parser->lex_state & state;
300}
301
302typedef enum {
303 PM_IGNORED_NEWLINE_NONE = 0,
304 PM_IGNORED_NEWLINE_ALL,
305 PM_IGNORED_NEWLINE_PATTERN
306} pm_ignored_newline_type_t;
307
308static inline pm_ignored_newline_type_t
309lex_state_ignored_p(pm_parser_t *parser) {
310 bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
311
312 if (ignored) {
313 return PM_IGNORED_NEWLINE_ALL;
314 } else if ((parser->lex_state & ~((unsigned int) PM_LEX_STATE_LABEL)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) {
315 return PM_IGNORED_NEWLINE_PATTERN;
316 } else {
317 return PM_IGNORED_NEWLINE_NONE;
318 }
319}
320
321static inline bool
322lex_state_beg_p(pm_parser_t *parser) {
323 return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
324}
325
326static inline bool
327lex_state_arg_p(pm_parser_t *parser) {
328 return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
329}
330
331static inline bool
332lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
333 if (parser->current.end >= parser->end) {
334 return false;
335 }
336 return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
337}
338
339static inline bool
340lex_state_end_p(pm_parser_t *parser) {
341 return lex_state_p(parser, PM_LEX_STATE_END_ANY);
342}
343
347static inline bool
348lex_state_operator_p(pm_parser_t *parser) {
349 return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
350}
351
356static inline void
357lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
358 parser->lex_state = state;
359}
360
361#ifndef PM_DEBUG_LOGGING
366#define PM_DEBUG_LOGGING 0
367#endif
368
369#if PM_DEBUG_LOGGING
370PRISM_ATTRIBUTE_UNUSED static void
371debug_state(pm_parser_t *parser) {
372 fprintf(stderr, "STATE: ");
373 bool first = true;
374
375 if (parser->lex_state == PM_LEX_STATE_NONE) {
376 fprintf(stderr, "NONE\n");
377 return;
378 }
379
380#define CHECK_STATE(state) \
381 if (parser->lex_state & state) { \
382 if (!first) fprintf(stderr, "|"); \
383 fprintf(stderr, "%s", #state); \
384 first = false; \
385 }
386
387 CHECK_STATE(PM_LEX_STATE_BEG)
388 CHECK_STATE(PM_LEX_STATE_END)
389 CHECK_STATE(PM_LEX_STATE_ENDARG)
390 CHECK_STATE(PM_LEX_STATE_ENDFN)
391 CHECK_STATE(PM_LEX_STATE_ARG)
392 CHECK_STATE(PM_LEX_STATE_CMDARG)
393 CHECK_STATE(PM_LEX_STATE_MID)
394 CHECK_STATE(PM_LEX_STATE_FNAME)
395 CHECK_STATE(PM_LEX_STATE_DOT)
396 CHECK_STATE(PM_LEX_STATE_CLASS)
397 CHECK_STATE(PM_LEX_STATE_LABEL)
398 CHECK_STATE(PM_LEX_STATE_LABELED)
399 CHECK_STATE(PM_LEX_STATE_FITEM)
400
401#undef CHECK_STATE
402
403 fprintf(stderr, "\n");
404}
405
406static void
407debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * caller_name, int line_number) {
408 fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
409 debug_state(parser);
410 lex_state_set(parser, state);
411 fprintf(stderr, "Now: ");
412 debug_state(parser);
413 fprintf(stderr, "\n");
414}
415
416#define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
417#endif
418
419/******************************************************************************/
420/* Command-line macro helpers */
421/******************************************************************************/
422
424#define PM_PARSER_COMMAND_LINE_OPTION(parser, option) ((parser)->command_line & (option))
425
427#define PM_PARSER_COMMAND_LINE_OPTION_A(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_A)
428
430#define PM_PARSER_COMMAND_LINE_OPTION_E(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_E)
431
433#define PM_PARSER_COMMAND_LINE_OPTION_L(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_L)
434
436#define PM_PARSER_COMMAND_LINE_OPTION_N(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_N)
437
439#define PM_PARSER_COMMAND_LINE_OPTION_P(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_P)
440
442#define PM_PARSER_COMMAND_LINE_OPTION_X(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_X)
443
444/******************************************************************************/
445/* Diagnostic-related functions */
446/******************************************************************************/
447
451static inline void
452pm_parser_err(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) {
453 pm_diagnostic_list_append(&parser->error_list, start, length, diag_id);
454}
455
460static inline void
461pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
462 pm_parser_err(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id);
463}
464
469static inline void
470pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
471 pm_parser_err_token(parser, &parser->current, diag_id);
472}
473
478static inline void
479pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
480 pm_parser_err_token(parser, &parser->previous, diag_id);
481}
482
487static inline void
488pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
489 pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id);
490}
491
495#define PM_PARSER_ERR_FORMAT(parser_, start_, length_, diag_id_, ...) \
496 pm_diagnostic_list_append_format(&(parser_)->error_list, start_, length_, diag_id_, __VA_ARGS__)
497
502#define PM_PARSER_ERR_NODE_FORMAT(parser_, node_, diag_id_, ...) \
503 PM_PARSER_ERR_FORMAT(parser_, PM_NODE_START(node_), PM_NODE_LENGTH(node_), diag_id_, __VA_ARGS__)
504
509#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser_, node_, diag_id_) \
510 PM_PARSER_ERR_NODE_FORMAT(parser_, node_, diag_id_, (int) PM_NODE_LENGTH(node_), (const char *) (parser_->start + PM_NODE_START(node_)))
511
516#define PM_PARSER_ERR_TOKEN_FORMAT(parser_, token_, diag_id, ...) \
517 PM_PARSER_ERR_FORMAT(parser_, PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_), diag_id, __VA_ARGS__)
518
523#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser_, token_, diag_id_) \
524 PM_PARSER_ERR_TOKEN_FORMAT(parser_, token_, diag_id_, (int) PM_TOKEN_LENGTH(token_), (const char *) (token_)->start)
525
529static inline void
530pm_parser_warn(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) {
531 pm_diagnostic_list_append(&parser->warning_list, start, length, diag_id);
532}
533
538static inline void
539pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
540 pm_parser_warn(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id);
541}
542
547static inline void
548pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
549 pm_parser_warn(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id);
550}
551
556#define PM_PARSER_WARN_FORMAT(parser_, start_, length_, diag_id_, ...) \
557 pm_diagnostic_list_append_format(&(parser_)->warning_list, start_, length_, diag_id_, __VA_ARGS__)
558
563#define PM_PARSER_WARN_TOKEN_FORMAT(parser_, token_, diag_id_, ...) \
564 PM_PARSER_WARN_FORMAT(parser_, PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_), diag_id_, __VA_ARGS__)
565
570#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser_, token_, diag_id_) \
571 PM_PARSER_WARN_TOKEN_FORMAT(parser_, token_, diag_id_, (int) PM_TOKEN_LENGTH(token_), (const char *) (token_)->start)
572
577#define PM_PARSER_WARN_NODE_FORMAT(parser_, node_, diag_id_, ...) \
578 PM_PARSER_WARN_FORMAT(parser_, PM_NODE_START(node_), PM_NODE_LENGTH(node_), diag_id_, __VA_ARGS__)
579
585static void
586pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
587 PM_PARSER_ERR_FORMAT(
588 parser,
589 U32(ident_start - parser->start),
590 U32(ident_length),
591 PM_ERR_HEREDOC_TERM,
592 (int) ident_length,
593 (const char *) ident_start
594 );
595}
596
597/******************************************************************************/
598/* Scope-related functions */
599/******************************************************************************/
600
604static bool
605pm_parser_scope_push(pm_parser_t *parser, bool closed) {
606 pm_scope_t *scope = (pm_scope_t *) xmalloc(sizeof(pm_scope_t));
607 if (scope == NULL) return false;
608
609 *scope = (pm_scope_t) {
610 .previous = parser->current_scope,
611 .locals = { 0 },
612 .parameters = PM_SCOPE_PARAMETERS_NONE,
613 .implicit_parameters = { 0 },
614 .shareable_constant = parser->current_scope == NULL ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
615 .closed = closed
616 };
617
618 parser->current_scope = scope;
619 return true;
620}
621
626static bool
627pm_parser_scope_toplevel_p(pm_parser_t *parser) {
628 pm_scope_t *scope = parser->current_scope;
629
630 do {
631 if (scope->previous == NULL) return true;
632 if (scope->closed) return false;
633 } while ((scope = scope->previous) != NULL);
634
635 assert(false && "unreachable");
636 return true;
637}
638
642static pm_scope_t *
643pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
644 pm_scope_t *scope = parser->current_scope;
645
646 while (depth-- > 0) {
647 assert(scope != NULL);
648 scope = scope->previous;
649 }
650
651 return scope;
652}
653
654typedef enum {
655 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
656 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
657 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
658} pm_scope_forwarding_param_check_result_t;
659
660static pm_scope_forwarding_param_check_result_t
661pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
662 pm_scope_t *scope = parser->current_scope;
663 bool conflict = false;
664
665 while (scope != NULL) {
666 if (scope->parameters & mask) {
667 if (scope->closed) {
668 if (conflict) {
669 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
670 } else {
671 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
672 }
673 }
674
675 conflict = true;
676 }
677
678 if (scope->closed) break;
679 scope = scope->previous;
680 }
681
682 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
683}
684
685static void
686pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
687 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
688 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
689 // Pass.
690 break;
691 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
692 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
693 break;
694 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
695 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
696 break;
697 }
698}
699
700static void
701pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
702 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
703 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
704 // Pass.
705 break;
706 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
707 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
708 break;
709 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
710 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
711 break;
712 }
713}
714
715static void
716pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
717 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
718 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
719 // Pass.
720 break;
721 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
722 // This shouldn't happen, because ... is not allowed in the
723 // declaration of blocks. If we get here, we assume we already have
724 // an error for this.
725 break;
726 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
727 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
728 break;
729 }
730}
731
732static void
733pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
734 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
735 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
736 // Pass.
737 break;
738 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
739 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
740 break;
741 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
742 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
743 break;
744 }
745}
746
751pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
752 return parser->current_scope->shareable_constant;
753}
754
759static void
760pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constant_value_t shareable_constant) {
761 pm_scope_t *scope = parser->current_scope;
762
763 do {
764 scope->shareable_constant = shareable_constant;
765 } while (!scope->closed && (scope = scope->previous) != NULL);
766}
767
768/******************************************************************************/
769/* Local variable-related functions */
770/******************************************************************************/
771
775#define PM_LOCALS_HASH_THRESHOLD 9
776
777static void
778pm_locals_free(pm_locals_t *locals) {
779 if (locals->capacity > 0) {
780 xfree_sized(locals->locals, locals->capacity * sizeof(pm_local_t));
781 }
782}
783
788static uint32_t
789pm_locals_hash(pm_constant_id_t name) {
790 name = ((name >> 16) ^ name) * 0x45d9f3b;
791 name = ((name >> 16) ^ name) * 0x45d9f3b;
792 name = (name >> 16) ^ name;
793 return name;
794}
795
800static void
801pm_locals_resize(pm_locals_t *locals) {
802 uint32_t next_capacity = locals->capacity == 0 ? 4 : (locals->capacity * 2);
803 assert(next_capacity > locals->capacity);
804
805 pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
806 if (next_locals == NULL) abort();
807
808 if (next_capacity < PM_LOCALS_HASH_THRESHOLD) {
809 if (locals->size > 0) {
810 memcpy(next_locals, locals->locals, locals->size * sizeof(pm_local_t));
811 }
812 } else {
813 // If we just switched from a list to a hash, then we need to fill in
814 // the hash values of all of the locals.
815 bool hash_needed = (locals->capacity <= PM_LOCALS_HASH_THRESHOLD);
816 uint32_t mask = next_capacity - 1;
817
818 for (uint32_t index = 0; index < locals->capacity; index++) {
819 pm_local_t *local = &locals->locals[index];
820
821 if (local->name != PM_CONSTANT_ID_UNSET) {
822 if (hash_needed) local->hash = pm_locals_hash(local->name);
823
824 uint32_t hash = local->hash;
825 while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
826 next_locals[hash & mask] = *local;
827 }
828 }
829 }
830
831 pm_locals_free(locals);
832 locals->locals = next_locals;
833 locals->capacity = next_capacity;
834}
835
851static bool
852pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, uint32_t start, uint32_t length, uint32_t reads) {
853 if (locals->size >= (locals->capacity / 4 * 3)) {
854 pm_locals_resize(locals);
855 }
856
857 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
858 for (uint32_t index = 0; index < locals->capacity; index++) {
859 pm_local_t *local = &locals->locals[index];
860
861 if (local->name == PM_CONSTANT_ID_UNSET) {
862 *local = (pm_local_t) {
863 .name = name,
864 .location = { .start = start, .length = length },
865 .index = locals->size++,
866 .reads = reads,
867 .hash = 0
868 };
869 return true;
870 } else if (local->name == name) {
871 return false;
872 }
873 }
874 } else {
875 uint32_t mask = locals->capacity - 1;
876 uint32_t hash = pm_locals_hash(name);
877 uint32_t initial_hash = hash;
878
879 do {
880 pm_local_t *local = &locals->locals[hash & mask];
881
882 if (local->name == PM_CONSTANT_ID_UNSET) {
883 *local = (pm_local_t) {
884 .name = name,
885 .location = { .start = start, .length = length },
886 .index = locals->size++,
887 .reads = reads,
888 .hash = initial_hash
889 };
890 return true;
891 } else if (local->name == name) {
892 return false;
893 } else {
894 hash++;
895 }
896 } while ((hash & mask) != initial_hash);
897 }
898
899 assert(false && "unreachable");
900 return true;
901}
902
907static uint32_t
908pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
909 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
910 for (uint32_t index = 0; index < locals->size; index++) {
911 pm_local_t *local = &locals->locals[index];
912 if (local->name == name) return index;
913 }
914 } else {
915 uint32_t mask = locals->capacity - 1;
916 uint32_t hash = pm_locals_hash(name);
917 uint32_t initial_hash = hash & mask;
918
919 do {
920 pm_local_t *local = &locals->locals[hash & mask];
921
922 if (local->name == PM_CONSTANT_ID_UNSET) {
923 return UINT32_MAX;
924 } else if (local->name == name) {
925 return hash & mask;
926 } else {
927 hash++;
928 }
929 } while ((hash & mask) != initial_hash);
930 }
931
932 return UINT32_MAX;
933}
934
939static void
940pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
941 uint32_t index = pm_locals_find(locals, name);
942 assert(index != UINT32_MAX);
943
944 pm_local_t *local = &locals->locals[index];
945 assert(local->reads < UINT32_MAX);
946
947 local->reads++;
948}
949
954static void
955pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
956 uint32_t index = pm_locals_find(locals, name);
957 assert(index != UINT32_MAX);
958
959 pm_local_t *local = &locals->locals[index];
960 assert(local->reads > 0);
961
962 local->reads--;
963}
964
968static uint32_t
969pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
970 uint32_t index = pm_locals_find(locals, name);
971 assert(index != UINT32_MAX);
972
973 return locals->locals[index].reads;
974}
975
984static void
985pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
986 pm_constant_id_list_init_capacity(list, locals->size);
987
988 // If we're still below the threshold for switching to a hash, then we only
989 // need to loop over the locals until we hit the size because the locals are
990 // stored in a list.
991 uint32_t capacity = locals->capacity < PM_LOCALS_HASH_THRESHOLD ? locals->size : locals->capacity;
992
993 // We will only warn for unused variables if we're not at the top level, or
994 // if we're parsing a file outside of eval or -e.
995 bool warn_unused = !toplevel || (!parser->parsing_eval && !PM_PARSER_COMMAND_LINE_OPTION_E(parser));
996
997 for (uint32_t index = 0; index < capacity; index++) {
998 pm_local_t *local = &locals->locals[index];
999
1000 if (local->name != PM_CONSTANT_ID_UNSET) {
1001 pm_constant_id_list_insert(list, (size_t) local->index, local->name);
1002
1003 if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_line_offset_list_line(&parser->line_offsets, local->location.start, parser->start_line) >= 0))) {
1004 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
1005
1006 if (constant->length >= 1 && *constant->start != '_') {
1007 PM_PARSER_WARN_FORMAT(
1008 parser,
1009 local->location.start,
1010 local->location.length,
1011 PM_WARN_UNUSED_LOCAL_VARIABLE,
1012 (int) constant->length,
1013 (const char *) constant->start
1014 );
1015 }
1016 }
1017 }
1018 }
1019}
1020
1021/******************************************************************************/
1022/* Node-related functions */
1023/******************************************************************************/
1024
1028static inline pm_constant_id_t
1029pm_parser_constant_id_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
1030 return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
1031}
1032
1036static inline pm_constant_id_t
1037pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
1038 return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
1039}
1040
1044static inline pm_constant_id_t
1045pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
1046 return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length);
1047}
1048
1052static inline pm_constant_id_t
1053pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1054 return pm_parser_constant_id_raw(parser, token->start, token->end);
1055}
1056
1061#define PM_CASE_VOID_VALUE PM_RETURN_NODE: case PM_BREAK_NODE: case PM_NEXT_NODE: \
1062 case PM_REDO_NODE: case PM_RETRY_NODE: case PM_MATCH_REQUIRED_NODE
1063
1069static pm_node_t *
1070pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
1071 pm_node_t *void_node = NULL;
1072
1073 while (node != NULL) {
1074 switch (PM_NODE_TYPE(node)) {
1075 case PM_CASE_VOID_VALUE:
1076 return void_node != NULL ? void_node : node;
1077 case PM_MATCH_PREDICATE_NODE:
1078 return NULL;
1079 case PM_BEGIN_NODE: {
1080 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1081
1082 if (cast->ensure_clause != NULL) {
1083 if (cast->rescue_clause != NULL) {
1084 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->rescue_clause));
1085 if (vn != NULL) return vn;
1086 }
1087
1088 if (cast->statements != NULL) {
1089 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1090 if (vn != NULL) return vn;
1091 }
1092
1093 node = UP(cast->ensure_clause);
1094 } else if (cast->rescue_clause != NULL) {
1095 // https://bugs.ruby-lang.org/issues/21669
1096 if (cast->else_clause == NULL || parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
1097 if (cast->statements == NULL) return NULL;
1098
1099 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1100 if (vn == NULL) return NULL;
1101 if (void_node == NULL) void_node = vn;
1102 }
1103
1104 for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
1105 pm_node_t *vn = pm_check_value_expression(parser, UP(rescue_clause->statements));
1106
1107 if (vn == NULL) {
1108 // https://bugs.ruby-lang.org/issues/21669
1109 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
1110 return NULL;
1111 }
1112 void_node = NULL;
1113 break;
1114 }
1115 }
1116
1117 if (cast->else_clause != NULL) {
1118 node = UP(cast->else_clause);
1119
1120 // https://bugs.ruby-lang.org/issues/21669
1121 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
1122 pm_node_t *vn = pm_check_value_expression(parser, node);
1123 if (vn != NULL) return vn;
1124 }
1125 } else {
1126 return void_node;
1127 }
1128 } else {
1129 node = UP(cast->statements);
1130 }
1131
1132 break;
1133 }
1134 case PM_CASE_NODE: {
1135 // https://bugs.ruby-lang.org/issues/21669
1136 if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
1137 return NULL;
1138 }
1139
1140 pm_case_node_t *cast = (pm_case_node_t *) node;
1141 if (cast->else_clause == NULL) return NULL;
1142
1143 pm_node_t *condition;
1144 PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
1145 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
1146
1147 pm_when_node_t *cast = (pm_when_node_t *) condition;
1148 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1149 if (vn == NULL) return NULL;
1150 if (void_node == NULL) void_node = vn;
1151 }
1152
1153 node = UP(cast->else_clause);
1154 break;
1155 }
1156 case PM_CASE_MATCH_NODE: {
1157 // https://bugs.ruby-lang.org/issues/21669
1158 if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
1159 return NULL;
1160 }
1161
1163 if (cast->else_clause == NULL) return NULL;
1164
1165 pm_node_t *condition;
1166 PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
1167 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
1168
1169 pm_in_node_t *cast = (pm_in_node_t *) condition;
1170 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1171 if (vn == NULL) return NULL;
1172 if (void_node == NULL) void_node = vn;
1173 }
1174
1175 node = UP(cast->else_clause);
1176 break;
1177 }
1178 case PM_ENSURE_NODE: {
1179 pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
1180 node = UP(cast->statements);
1181 break;
1182 }
1183 case PM_PARENTHESES_NODE: {
1185 node = UP(cast->body);
1186 break;
1187 }
1188 case PM_STATEMENTS_NODE: {
1190
1191 // https://bugs.ruby-lang.org/issues/21669
1192 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
1193 pm_node_t *body_part;
1194 PM_NODE_LIST_FOREACH(&cast->body, index, body_part) {
1195 switch (PM_NODE_TYPE(body_part)) {
1196 case PM_CASE_VOID_VALUE:
1197 if (void_node == NULL) {
1198 void_node = body_part;
1199 }
1200 return void_node;
1201 default: break;
1202 }
1203 }
1204 }
1205
1206 node = cast->body.nodes[cast->body.size - 1];
1207 break;
1208 }
1209 case PM_IF_NODE: {
1210 pm_if_node_t *cast = (pm_if_node_t *) node;
1211 if (cast->statements == NULL || cast->subsequent == NULL) {
1212 return NULL;
1213 }
1214 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1215 if (vn == NULL) {
1216 return NULL;
1217 }
1218 if (void_node == NULL) {
1219 void_node = vn;
1220 }
1221 node = cast->subsequent;
1222 break;
1223 }
1224 case PM_UNLESS_NODE: {
1225 pm_unless_node_t *cast = (pm_unless_node_t *) node;
1226 if (cast->statements == NULL || cast->else_clause == NULL) {
1227 return NULL;
1228 }
1229 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1230 if (vn == NULL) {
1231 return NULL;
1232 }
1233 if (void_node == NULL) {
1234 void_node = vn;
1235 }
1236 node = UP(cast->else_clause);
1237 break;
1238 }
1239 case PM_ELSE_NODE: {
1240 pm_else_node_t *cast = (pm_else_node_t *) node;
1241 node = UP(cast->statements);
1242 break;
1243 }
1244 case PM_AND_NODE: {
1245 pm_and_node_t *cast = (pm_and_node_t *) node;
1246 node = cast->left;
1247 break;
1248 }
1249 case PM_OR_NODE: {
1250 pm_or_node_t *cast = (pm_or_node_t *) node;
1251 node = cast->left;
1252 break;
1253 }
1254 case PM_LOCAL_VARIABLE_WRITE_NODE: {
1256
1257 pm_scope_t *scope = parser->current_scope;
1258 for (uint32_t depth = 0; depth < cast->depth; depth++) scope = scope->previous;
1259
1260 pm_locals_read(&scope->locals, cast->name);
1261 return NULL;
1262 }
1263 default:
1264 return NULL;
1265 }
1266 }
1267
1268 return NULL;
1269}
1270
1271static inline void
1272pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
1273 pm_node_t *void_node = pm_check_value_expression(parser, node);
1274 if (void_node != NULL) {
1275 pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
1276 }
1277}
1278
1282static void
1283pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
1284 const char *type = NULL;
1285 int length = 0;
1286
1287 switch (PM_NODE_TYPE(node)) {
1288 case PM_BACK_REFERENCE_READ_NODE:
1289 case PM_CLASS_VARIABLE_READ_NODE:
1290 case PM_GLOBAL_VARIABLE_READ_NODE:
1291 case PM_INSTANCE_VARIABLE_READ_NODE:
1292 case PM_LOCAL_VARIABLE_READ_NODE:
1293 case PM_NUMBERED_REFERENCE_READ_NODE:
1294 type = "a variable";
1295 length = 10;
1296 break;
1297 case PM_CALL_NODE: {
1298 const pm_call_node_t *cast = (const pm_call_node_t *) node;
1299 if (cast->call_operator_loc.length > 0 || cast->message_loc.length == 0) break;
1300
1301 const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
1302 switch (message->length) {
1303 case 1:
1304 switch (message->start[0]) {
1305 case '+':
1306 case '-':
1307 case '*':
1308 case '/':
1309 case '%':
1310 case '|':
1311 case '^':
1312 case '&':
1313 case '>':
1314 case '<':
1315 type = (const char *) message->start;
1316 length = 1;
1317 break;
1318 }
1319 break;
1320 case 2:
1321 switch (message->start[1]) {
1322 case '=':
1323 if (message->start[0] == '<' || message->start[0] == '>' || message->start[0] == '!' || message->start[0] == '=') {
1324 type = (const char *) message->start;
1325 length = 2;
1326 }
1327 break;
1328 case '@':
1329 if (message->start[0] == '+' || message->start[0] == '-') {
1330 type = (const char *) message->start;
1331 length = 2;
1332 }
1333 break;
1334 case '*':
1335 if (message->start[0] == '*') {
1336 type = (const char *) message->start;
1337 length = 2;
1338 }
1339 break;
1340 }
1341 break;
1342 case 3:
1343 if (memcmp(message->start, "<=>", 3) == 0) {
1344 type = "<=>";
1345 length = 3;
1346 }
1347 break;
1348 }
1349
1350 break;
1351 }
1352 case PM_CONSTANT_PATH_NODE:
1353 type = "::";
1354 length = 2;
1355 break;
1356 case PM_CONSTANT_READ_NODE:
1357 type = "a constant";
1358 length = 10;
1359 break;
1360 case PM_DEFINED_NODE:
1361 type = "defined?";
1362 length = 8;
1363 break;
1364 case PM_FALSE_NODE:
1365 type = "false";
1366 length = 5;
1367 break;
1368 case PM_FLOAT_NODE:
1369 case PM_IMAGINARY_NODE:
1370 case PM_INTEGER_NODE:
1371 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1372 case PM_INTERPOLATED_STRING_NODE:
1373 case PM_RATIONAL_NODE:
1374 case PM_REGULAR_EXPRESSION_NODE:
1375 case PM_SOURCE_ENCODING_NODE:
1376 case PM_SOURCE_FILE_NODE:
1377 case PM_SOURCE_LINE_NODE:
1378 case PM_STRING_NODE:
1379 case PM_SYMBOL_NODE:
1380 type = "a literal";
1381 length = 9;
1382 break;
1383 case PM_NIL_NODE:
1384 type = "nil";
1385 length = 3;
1386 break;
1387 case PM_RANGE_NODE: {
1388 const pm_range_node_t *cast = (const pm_range_node_t *) node;
1389
1390 if (PM_NODE_FLAG_P(cast, PM_RANGE_FLAGS_EXCLUDE_END)) {
1391 type = "...";
1392 length = 3;
1393 } else {
1394 type = "..";
1395 length = 2;
1396 }
1397
1398 break;
1399 }
1400 case PM_SELF_NODE:
1401 type = "self";
1402 length = 4;
1403 break;
1404 case PM_TRUE_NODE:
1405 type = "true";
1406 length = 4;
1407 break;
1408 default:
1409 break;
1410 }
1411
1412 if (type != NULL) {
1413 PM_PARSER_WARN_NODE_FORMAT(parser, node, PM_WARN_VOID_STATEMENT, length, type);
1414 }
1415}
1416
1421static void
1422pm_void_statements_check(pm_parser_t *parser, const pm_statements_node_t *node, bool last_value) {
1423 assert(node->body.size > 0);
1424 const size_t size = node->body.size - (last_value ? 1 : 0);
1425 for (size_t index = 0; index < size; index++) {
1426 pm_void_statement_check(parser, node->body.nodes[index]);
1427 }
1428}
1429
1435typedef enum {
1436 PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL,
1437 PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP,
1438 PM_CONDITIONAL_PREDICATE_TYPE_NOT
1439} pm_conditional_predicate_type_t;
1440
1444static void
1445pm_parser_warn_conditional_predicate_literal(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type, pm_diagnostic_id_t diag_id, const char *prefix) {
1446 switch (type) {
1447 case PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL:
1448 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "condition");
1449 break;
1450 case PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP:
1451 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "flip-flop");
1452 break;
1453 case PM_CONDITIONAL_PREDICATE_TYPE_NOT:
1454 break;
1455 }
1456}
1457
1462static bool
1463pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1464 switch (PM_NODE_TYPE(node)) {
1465 case PM_ARRAY_NODE: {
1466 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1467
1468 const pm_array_node_t *cast = (const pm_array_node_t *) node;
1469 for (size_t index = 0; index < cast->elements.size; index++) {
1470 if (!pm_conditional_predicate_warn_write_literal_p(cast->elements.nodes[index])) return false;
1471 }
1472
1473 return true;
1474 }
1475 case PM_HASH_NODE: {
1476 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1477
1478 const pm_hash_node_t *cast = (const pm_hash_node_t *) node;
1479 for (size_t index = 0; index < cast->elements.size; index++) {
1480 const pm_node_t *element = cast->elements.nodes[index];
1481 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) return false;
1482
1483 const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element;
1484 if (!pm_conditional_predicate_warn_write_literal_p(assoc->key) || !pm_conditional_predicate_warn_write_literal_p(assoc->value)) return false;
1485 }
1486
1487 return true;
1488 }
1489 case PM_FALSE_NODE:
1490 case PM_FLOAT_NODE:
1491 case PM_IMAGINARY_NODE:
1492 case PM_INTEGER_NODE:
1493 case PM_NIL_NODE:
1494 case PM_RATIONAL_NODE:
1495 case PM_REGULAR_EXPRESSION_NODE:
1496 case PM_SOURCE_ENCODING_NODE:
1497 case PM_SOURCE_FILE_NODE:
1498 case PM_SOURCE_LINE_NODE:
1499 case PM_STRING_NODE:
1500 case PM_SYMBOL_NODE:
1501 case PM_TRUE_NODE:
1502 return true;
1503 default:
1504 return false;
1505 }
1506}
1507
1512static inline void
1513pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1514 if (pm_conditional_predicate_warn_write_literal_p(node)) {
1515 pm_parser_warn_node(parser, node, parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1516 }
1517}
1518
1531static void
1532pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type) {
1533 switch (PM_NODE_TYPE(node)) {
1534 case PM_AND_NODE: {
1535 pm_and_node_t *cast = (pm_and_node_t *) node;
1536 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1537 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1538 break;
1539 }
1540 case PM_OR_NODE: {
1541 pm_or_node_t *cast = (pm_or_node_t *) node;
1542 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1543 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1544 break;
1545 }
1546 case PM_PARENTHESES_NODE: {
1548
1549 if ((cast->body != NULL) && PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE)) {
1550 pm_statements_node_t *statements = (pm_statements_node_t *) cast->body;
1551 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1552 }
1553
1554 break;
1555 }
1556 case PM_BEGIN_NODE: {
1557 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1558 if (cast->statements != NULL) {
1559 pm_statements_node_t *statements = cast->statements;
1560 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1561 }
1562 break;
1563 }
1564 case PM_RANGE_NODE: {
1565 pm_range_node_t *cast = (pm_range_node_t *) node;
1566
1567 if (cast->left != NULL) pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1568 if (cast->right != NULL) pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1569
1570 // Here we change the range node into a flip flop node. We can do
1571 // this since the nodes are exactly the same except for the type.
1572 // We're only asserting against the size when we should probably
1573 // assert against the entire layout, but we'll assume tests will
1574 // catch this.
1575 assert(sizeof(pm_range_node_t) == sizeof(pm_flip_flop_node_t));
1576 node->type = PM_FLIP_FLOP_NODE;
1577
1578 break;
1579 }
1580 case PM_REGULAR_EXPRESSION_NODE:
1581 // Here we change the regular expression node into a match last line
1582 // node. We can do this since the nodes are exactly the same except
1583 // for the type.
1585 node->type = PM_MATCH_LAST_LINE_NODE;
1586
1587 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1588 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "regex ");
1589 }
1590
1591 break;
1592 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1593 // Here we change the interpolated regular expression node into an
1594 // interpolated match last line node. We can do this since the nodes
1595 // are exactly the same except for the type.
1597 node->type = PM_INTERPOLATED_MATCH_LAST_LINE_NODE;
1598
1599 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1600 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "regex ");
1601 }
1602
1603 break;
1604 case PM_INTEGER_NODE:
1605 if (type == PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP) {
1606 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1607 pm_parser_warn_node(parser, node, PM_WARN_INTEGER_IN_FLIP_FLOP);
1608 }
1609 } else {
1610 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1611 }
1612 break;
1613 case PM_STRING_NODE:
1614 case PM_SOURCE_FILE_NODE:
1615 case PM_INTERPOLATED_STRING_NODE:
1616 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "string ");
1617 break;
1618 case PM_SYMBOL_NODE:
1619 case PM_INTERPOLATED_SYMBOL_NODE:
1620 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "symbol ");
1621 break;
1622 case PM_SOURCE_LINE_NODE:
1623 case PM_SOURCE_ENCODING_NODE:
1624 case PM_FLOAT_NODE:
1625 case PM_RATIONAL_NODE:
1626 case PM_IMAGINARY_NODE:
1627 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1628 break;
1629 case PM_CLASS_VARIABLE_WRITE_NODE:
1630 pm_conditional_predicate_warn_write_literal(parser, ((pm_class_variable_write_node_t *) node)->value);
1631 break;
1632 case PM_CONSTANT_WRITE_NODE:
1633 pm_conditional_predicate_warn_write_literal(parser, ((pm_constant_write_node_t *) node)->value);
1634 break;
1635 case PM_GLOBAL_VARIABLE_WRITE_NODE:
1636 pm_conditional_predicate_warn_write_literal(parser, ((pm_global_variable_write_node_t *) node)->value);
1637 break;
1638 case PM_INSTANCE_VARIABLE_WRITE_NODE:
1639 pm_conditional_predicate_warn_write_literal(parser, ((pm_instance_variable_write_node_t *) node)->value);
1640 break;
1641 case PM_LOCAL_VARIABLE_WRITE_NODE:
1642 pm_conditional_predicate_warn_write_literal(parser, ((pm_local_variable_write_node_t *) node)->value);
1643 break;
1644 case PM_MULTI_WRITE_NODE:
1645 pm_conditional_predicate_warn_write_literal(parser, ((pm_multi_write_node_t *) node)->value);
1646 break;
1647 default:
1648 break;
1649 }
1650}
1651
1674
1678static inline const pm_location_t *
1679pm_arguments_end(pm_arguments_t *arguments) {
1680 if (arguments->block != NULL) {
1681 uint32_t end = PM_NODE_END(arguments->block);
1682
1683 if (arguments->closing_loc.length > 0) {
1684 uint32_t arguments_end = PM_LOCATION_END(&arguments->closing_loc);
1685 if (arguments_end > end) {
1686 return &arguments->closing_loc;
1687 }
1688 }
1689 return &arguments->block->location;
1690 }
1691 if (arguments->closing_loc.length > 0) {
1692 return &arguments->closing_loc;
1693 }
1694 if (arguments->arguments != NULL) {
1695 return &arguments->arguments->base.location;
1696 }
1697 if (arguments->opening_loc.length > 0) {
1698 return &arguments->opening_loc;
1699 }
1700 return NULL;
1701}
1702
1707static void
1708pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
1709 // First, check that we have arguments and that we don't have a closing
1710 // location for them.
1711 if (arguments->arguments == NULL || arguments->closing_loc.length > 0) {
1712 return;
1713 }
1714
1715 // Next, check that we don't have a single parentheses argument. This would
1716 // look like:
1717 //
1718 // foo (1) {}
1719 //
1720 // In this case, it's actually okay for the block to be attached to the
1721 // call, even though it looks like it's attached to the argument.
1722 if (arguments->arguments->arguments.size == 1 && PM_NODE_TYPE_P(arguments->arguments->arguments.nodes[0], PM_PARENTHESES_NODE)) {
1723 return;
1724 }
1725
1726 // If we didn't hit a case before this check, then at this point we need to
1727 // add a syntax error.
1728 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
1729}
1730
1731/******************************************************************************/
1732/* Basic character checks */
1733/******************************************************************************/
1734
1741static inline size_t
1742char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1743 if (n <= 0) return 0;
1744
1745 if (parser->encoding_changed) {
1746 size_t width;
1747
1748 if ((width = parser->encoding->alpha_char(b, n)) != 0) {
1749 return width;
1750 } else if (*b == '_') {
1751 return 1;
1752 } else if (*b >= 0x80) {
1753 return parser->encoding->char_width(b, n);
1754 } else {
1755 return 0;
1756 }
1757 } else if (*b < 0x80) {
1758 return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
1759 } else {
1760 return pm_encoding_utf_8_char_width(b, n);
1761 }
1762}
1763
1768static inline size_t
1769char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
1770 if (n <= 0) {
1771 return 0;
1772 } else if (*b < 0x80) {
1773 return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
1774 } else {
1775 return pm_encoding_utf_8_char_width(b, n);
1776 }
1777}
1778
1784static inline size_t
1785char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1786 if (n <= 0) {
1787 return 0;
1788 } else if (parser->encoding_changed) {
1789 size_t width;
1790
1791 if ((width = parser->encoding->alnum_char(b, n)) != 0) {
1792 return width;
1793 } else if (*b == '_') {
1794 return 1;
1795 } else if (*b >= 0x80) {
1796 return parser->encoding->char_width(b, n);
1797 } else {
1798 return 0;
1799 }
1800 } else {
1801 return char_is_identifier_utf8(b, n);
1802 }
1803}
1804
1805// Here we're defining a perfect hash for the characters that are allowed in
1806// global names. This is used to quickly check the next character after a $ to
1807// see if it's a valid character for a global name.
1808#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
1809#define PUNCT(idx) ( \
1810 BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
1811 BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
1812 BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
1813 BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
1814 BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
1815 BIT('0', idx))
1816
1817const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
1818
1819#undef BIT
1820#undef PUNCT
1821
1822static inline bool
1823char_is_global_name_punctuation(const uint8_t b) {
1824 const unsigned int i = (const unsigned int) b;
1825 if (i <= 0x20 || 0x7e < i) return false;
1826
1827 return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
1828}
1829
1830static inline bool
1831token_is_setter_name(pm_token_t *token) {
1832 return (
1833 (token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
1834 ((token->type == PM_TOKEN_IDENTIFIER) &&
1835 (token->end - token->start >= 2) &&
1836 (token->end[-1] == '='))
1837 );
1838}
1839
1843static bool
1844pm_local_is_keyword(const char *source, size_t length) {
1845#define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
1846
1847 switch (length) {
1848 case 2:
1849 switch (source[0]) {
1850 case 'd': KEYWORD("do"); return false;
1851 case 'i': KEYWORD("if"); KEYWORD("in"); return false;
1852 case 'o': KEYWORD("or"); return false;
1853 default: return false;
1854 }
1855 case 3:
1856 switch (source[0]) {
1857 case 'a': KEYWORD("and"); return false;
1858 case 'd': KEYWORD("def"); return false;
1859 case 'e': KEYWORD("end"); return false;
1860 case 'f': KEYWORD("for"); return false;
1861 case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
1862 default: return false;
1863 }
1864 case 4:
1865 switch (source[0]) {
1866 case 'c': KEYWORD("case"); return false;
1867 case 'e': KEYWORD("else"); return false;
1868 case 'n': KEYWORD("next"); return false;
1869 case 'r': KEYWORD("redo"); return false;
1870 case 's': KEYWORD("self"); return false;
1871 case 't': KEYWORD("then"); KEYWORD("true"); return false;
1872 case 'w': KEYWORD("when"); return false;
1873 default: return false;
1874 }
1875 case 5:
1876 switch (source[0]) {
1877 case 'a': KEYWORD("alias"); return false;
1878 case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
1879 case 'c': KEYWORD("class"); return false;
1880 case 'e': KEYWORD("elsif"); return false;
1881 case 'f': KEYWORD("false"); return false;
1882 case 'r': KEYWORD("retry"); return false;
1883 case 's': KEYWORD("super"); return false;
1884 case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
1885 case 'w': KEYWORD("while"); return false;
1886 case 'y': KEYWORD("yield"); return false;
1887 default: return false;
1888 }
1889 case 6:
1890 switch (source[0]) {
1891 case 'e': KEYWORD("ensure"); return false;
1892 case 'm': KEYWORD("module"); return false;
1893 case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
1894 case 'u': KEYWORD("unless"); return false;
1895 default: return false;
1896 }
1897 case 8:
1898 KEYWORD("__LINE__");
1899 KEYWORD("__FILE__");
1900 return false;
1901 case 12:
1902 KEYWORD("__ENCODING__");
1903 return false;
1904 default:
1905 return false;
1906 }
1907
1908#undef KEYWORD
1909}
1910
1911/******************************************************************************/
1912/* Node flag handling functions */
1913/******************************************************************************/
1914
1918static inline void
1919pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
1920 node->flags |= flag;
1921}
1922
1926static inline void
1927pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
1928 node->flags &= (pm_node_flags_t) ~flag;
1929}
1930
1934static inline void
1935pm_node_flag_set_repeated_parameter(pm_node_t *node) {
1936 assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
1937 PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
1938 PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
1939 PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
1940 PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
1941 PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
1942 PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
1943 PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
1944
1945 pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
1946}
1947
1948/******************************************************************************/
1949/* Node creation functions */
1950/******************************************************************************/
1951
1957#define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
1958
1962static inline pm_node_flags_t
1963pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
1964 pm_node_flags_t flags = 0;
1965
1966 if (closing->type == PM_TOKEN_REGEXP_END) {
1967 pm_buffer_t unknown_flags = { 0 };
1968
1969 for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
1970 switch (*flag) {
1971 case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
1972 case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
1973 case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
1974 case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
1975
1976 case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
1977 case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
1978 case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
1979 case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
1980
1981 default: pm_buffer_append_byte(&unknown_flags, *flag);
1982 }
1983 }
1984
1985 size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
1986 if (unknown_flags_length != 0) {
1987 const char *word = unknown_flags_length >= 2 ? "options" : "option";
1988 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
1989 }
1990 pm_buffer_free(&unknown_flags);
1991 }
1992
1993 return flags;
1994}
1995
1996#undef PM_REGULAR_EXPRESSION_ENCODING_MASK
1997
1998static pm_statements_node_t *
1999pm_statements_node_create(pm_parser_t *parser);
2000
2001static void
2002pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline);
2003
2004static size_t
2005pm_statements_node_body_length(pm_statements_node_t *node);
2006
2011static inline void *
2012pm_node_alloc(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
2013 void *memory = xcalloc(1, size);
2014 if (memory == NULL) {
2015 fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
2016 abort();
2017 }
2018 return memory;
2019}
2020
2021#define PM_NODE_ALLOC(parser_, type_) (type_ *) pm_node_alloc(parser_, sizeof(type_))
2022#define PM_NODE_INIT(parser_, type_, flags_, location_) (pm_node_t) { \
2023 .type = (type_), \
2024 .flags = (flags_), \
2025 .node_id = ++(parser_)->node_id, \
2026 .location = location_ \
2027}
2028
2032static pm_missing_node_t *
2033pm_missing_node_create(pm_parser_t *parser, uint32_t start, uint32_t length) {
2034 pm_missing_node_t *node = PM_NODE_ALLOC(parser, pm_missing_node_t);
2035
2036 *node = (pm_missing_node_t) {
2037 .base = PM_NODE_INIT(parser, PM_MISSING_NODE, 0, ((pm_location_t) { .start = start, .length = length }))
2038 };
2039
2040 return node;
2041}
2042
2046static pm_alias_global_variable_node_t *
2047pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
2048 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
2049 pm_alias_global_variable_node_t *node = PM_NODE_ALLOC(parser, pm_alias_global_variable_node_t);
2050
2051 *node = (pm_alias_global_variable_node_t) {
2052 .base = PM_NODE_INIT(parser, PM_ALIAS_GLOBAL_VARIABLE_NODE, 0, PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, old_name)),
2053 .new_name = new_name,
2054 .old_name = old_name,
2055 .keyword_loc = TOK2LOC(parser, keyword)
2056 };
2057
2058 return node;
2059}
2060
2064static pm_alias_method_node_t *
2065pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
2066 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
2067 pm_alias_method_node_t *node = PM_NODE_ALLOC(parser, pm_alias_method_node_t);
2068
2069 *node = (pm_alias_method_node_t) {
2070 .base = PM_NODE_INIT(parser, PM_ALIAS_METHOD_NODE, 0, PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, old_name)),
2071 .new_name = new_name,
2072 .old_name = old_name,
2073 .keyword_loc = TOK2LOC(parser, keyword)
2074 };
2075
2076 return node;
2077}
2078
2082static pm_alternation_pattern_node_t *
2083pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
2084 pm_alternation_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_alternation_pattern_node_t);
2085
2086 *node = (pm_alternation_pattern_node_t) {
2087 .base = PM_NODE_INIT(parser, PM_ALTERNATION_PATTERN_NODE, 0, PM_LOCATION_INIT_NODES(left, right)),
2088 .left = left,
2089 .right = right,
2090 .operator_loc = TOK2LOC(parser, operator)
2091 };
2092
2093 return node;
2094}
2095
2099static pm_and_node_t *
2100pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
2101 pm_assert_value_expression(parser, left);
2102
2103 pm_and_node_t *node = PM_NODE_ALLOC(parser, pm_and_node_t);
2104
2105 *node = (pm_and_node_t) {
2106 .base = PM_NODE_INIT(parser, PM_AND_NODE, 0, PM_LOCATION_INIT_NODES(left, right)),
2107 .left = left,
2108 .operator_loc = TOK2LOC(parser, operator),
2109 .right = right
2110 };
2111
2112 return node;
2113}
2114
2118static pm_arguments_node_t *
2119pm_arguments_node_create(pm_parser_t *parser) {
2120 pm_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_arguments_node_t);
2121
2122 *node = (pm_arguments_node_t) {
2123 .base = PM_NODE_INIT(parser, PM_ARGUMENTS_NODE, 0, PM_LOCATION_INIT_UNSET),
2124 .arguments = { 0 }
2125 };
2126
2127 return node;
2128}
2129
2133static size_t
2134pm_arguments_node_size(pm_arguments_node_t *node) {
2135 return node->arguments.size;
2136}
2137
2141static void
2142pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argument) {
2143 if (pm_arguments_node_size(node) == 0) {
2144 PM_NODE_START_SET_NODE(node, argument);
2145 }
2146
2147 if (PM_NODE_END(node) < PM_NODE_END(argument)) {
2148 PM_NODE_LENGTH_SET_NODE(node, argument);
2149 }
2150
2151 pm_node_list_append(&node->arguments, argument);
2152
2153 if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
2154 if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
2155 pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
2156 } else {
2157 pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
2158 }
2159 }
2160}
2161
2165static pm_array_node_t *
2166pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
2167 pm_array_node_t *node = PM_NODE_ALLOC(parser, pm_array_node_t);
2168
2169 if (opening == NULL) {
2170 *node = (pm_array_node_t) {
2171 .base = PM_NODE_INIT(parser, PM_ARRAY_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_UNSET),
2172 .opening_loc = { 0 },
2173 .closing_loc = { 0 },
2174 .elements = { 0 }
2175 };
2176 } else {
2177 *node = (pm_array_node_t) {
2178 .base = PM_NODE_INIT(parser, PM_ARRAY_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, opening)),
2179 .opening_loc = TOK2LOC(parser, opening),
2180 .closing_loc = TOK2LOC(parser, opening),
2181 .elements = { 0 }
2182 };
2183 }
2184
2185 return node;
2186}
2187
2191static inline void
2192pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
2193 if (!node->elements.size && !node->opening_loc.length) {
2194 PM_NODE_START_SET_NODE(node, element);
2195 }
2196
2197 pm_node_list_append(&node->elements, element);
2198 PM_NODE_LENGTH_SET_NODE(node, element);
2199
2200 // If the element is not a static literal, then the array is not a static
2201 // literal. Turn that flag off.
2202 if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
2203 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
2204 }
2205
2206 if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
2207 pm_node_flag_set(UP(node), PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
2208 }
2209}
2210
2214static void
2215pm_array_node_close_set(const pm_parser_t *parser, pm_array_node_t *node, const pm_token_t *closing) {
2216 assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == 0);
2217 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
2218 node->closing_loc = TOK2LOC(parser, closing);
2219}
2220
2225static pm_array_pattern_node_t *
2226pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
2227 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2228
2229 *node = (pm_array_pattern_node_t) {
2230 .base = PM_NODE_INIT(parser, PM_ARRAY_PATTERN_NODE, 0, PM_LOCATION_INIT_NODES(nodes->nodes[0], nodes->nodes[nodes->size - 1])),
2231 .constant = NULL,
2232 .rest = NULL,
2233 .requireds = { 0 },
2234 .posts = { 0 },
2235 .opening_loc = { 0 },
2236 .closing_loc = { 0 }
2237 };
2238
2239 // For now we're going to just copy over each pointer manually. This could be
2240 // much more efficient, as we could instead resize the node list.
2241 bool found_rest = false;
2242 pm_node_t *child;
2243
2244 PM_NODE_LIST_FOREACH(nodes, index, child) {
2245 if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
2246 node->rest = child;
2247 found_rest = true;
2248 } else if (found_rest) {
2249 pm_node_list_append(&node->posts, child);
2250 } else {
2251 pm_node_list_append(&node->requireds, child);
2252 }
2253 }
2254
2255 return node;
2256}
2257
2261static pm_array_pattern_node_t *
2262pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
2263 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2264
2265 *node = (pm_array_pattern_node_t) {
2266 .base = PM_NODE_INIT(parser, PM_ARRAY_PATTERN_NODE, 0, PM_LOCATION_INIT_NODE(rest)),
2267 .constant = NULL,
2268 .rest = rest,
2269 .requireds = { 0 },
2270 .posts = { 0 },
2271 .opening_loc = { 0 },
2272 .closing_loc = { 0 }
2273 };
2274
2275 return node;
2276}
2277
2282static pm_array_pattern_node_t *
2283pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
2284 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2285
2286 *node = (pm_array_pattern_node_t) {
2287 .base = PM_NODE_INIT(parser, PM_ARRAY_PATTERN_NODE, 0, PM_LOCATION_INIT_NODE_TOKEN(parser, constant, closing)),
2288 .constant = constant,
2289 .rest = NULL,
2290 .opening_loc = TOK2LOC(parser, opening),
2291 .closing_loc = TOK2LOC(parser, closing),
2292 .requireds = { 0 },
2293 .posts = { 0 }
2294 };
2295
2296 return node;
2297}
2298
2303static pm_array_pattern_node_t *
2304pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
2305 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2306
2307 *node = (pm_array_pattern_node_t) {
2308 .base = PM_NODE_INIT(parser, PM_ARRAY_PATTERN_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, opening, closing)),
2309 .constant = NULL,
2310 .rest = NULL,
2311 .opening_loc = TOK2LOC(parser, opening),
2312 .closing_loc = TOK2LOC(parser, closing),
2313 .requireds = { 0 },
2314 .posts = { 0 }
2315 };
2316
2317 return node;
2318}
2319
2320static inline void
2321pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t *inner) {
2322 pm_node_list_append(&node->requireds, inner);
2323}
2324
2328static pm_assoc_node_t *
2329pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
2330 pm_assoc_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_node_t);
2331 uint32_t end;
2332
2333 if (value != NULL && PM_NODE_END(value) > PM_NODE_END(key)) {
2334 end = PM_NODE_END(value);
2335 } else if (operator != NULL) {
2336 end = PM_TOKEN_END(parser, operator);
2337 } else {
2338 end = PM_NODE_END(key);
2339 }
2340
2341 // Hash string keys will be frozen, so we can mark them as frozen here so
2342 // that the compiler picks them up and also when we check for static literal
2343 // on the keys it gets factored in.
2344 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
2345 key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
2346 }
2347
2348 // If the key and value of this assoc node are both static literals, then
2349 // we can mark this node as a static literal.
2350 pm_node_flags_t flags = 0;
2351 if (
2352 !PM_NODE_TYPE_P(key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(key, PM_HASH_NODE) && !PM_NODE_TYPE_P(key, PM_RANGE_NODE) &&
2353 value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)
2354 ) {
2355 flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
2356 }
2357
2358 *node = (pm_assoc_node_t) {
2359 .base = PM_NODE_INIT(parser, PM_ASSOC_NODE, flags, ((pm_location_t) { .start = PM_NODE_START(key), .length = U32(end - PM_NODE_START(key)) })),
2360 .key = key,
2361 .operator_loc = NTOK2LOC(parser, operator),
2362 .value = value
2363 };
2364
2365 return node;
2366}
2367
2371static pm_assoc_splat_node_t *
2372pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
2373 assert(operator->type == PM_TOKEN_USTAR_STAR);
2374 pm_assoc_splat_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_splat_node_t);
2375
2376 *node = (pm_assoc_splat_node_t) {
2377 .base = PM_NODE_INIT(parser, PM_ASSOC_SPLAT_NODE, 0, (value == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, value)),
2378 .value = value,
2379 .operator_loc = TOK2LOC(parser, operator)
2380 };
2381
2382 return node;
2383}
2384
2388static pm_back_reference_read_node_t *
2389pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
2390 assert(name->type == PM_TOKEN_BACK_REFERENCE);
2391 pm_back_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_back_reference_read_node_t);
2392
2393 *node = (pm_back_reference_read_node_t) {
2394 .base = PM_NODE_INIT(parser, PM_BACK_REFERENCE_READ_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, name)),
2395 .name = pm_parser_constant_id_token(parser, name)
2396 };
2397
2398 return node;
2399}
2400
2404static pm_begin_node_t *
2405pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
2406 pm_begin_node_t *node = PM_NODE_ALLOC(parser, pm_begin_node_t);
2407
2408 uint32_t start = begin_keyword == NULL ? 0 : PM_TOKEN_START(parser, begin_keyword);
2409 uint32_t end = statements == NULL ? (begin_keyword == NULL ? 0 : PM_TOKEN_END(parser, begin_keyword)) : PM_NODE_END(statements);
2410
2411 *node = (pm_begin_node_t) {
2412 .base = PM_NODE_INIT(parser, PM_BEGIN_NODE, 0, ((pm_location_t) { .start = start, .length = U32(end - start) })),
2413 .begin_keyword_loc = NTOK2LOC(parser, begin_keyword),
2414 .statements = statements,
2415 .end_keyword_loc = { 0 }
2416 };
2417
2418 return node;
2419}
2420
2424static void
2425pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
2426 if (node->begin_keyword_loc.length == 0) {
2427 PM_NODE_START_SET_NODE(node, rescue_clause);
2428 }
2429 PM_NODE_LENGTH_SET_NODE(node, rescue_clause);
2430 node->rescue_clause = rescue_clause;
2431}
2432
2436static void
2437pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
2438 if ((node->begin_keyword_loc.length == 0) && PM_NODE_START(node) == 0) {
2439 PM_NODE_START_SET_NODE(node, else_clause);
2440 }
2441 PM_NODE_LENGTH_SET_NODE(node, else_clause);
2442 node->else_clause = else_clause;
2443}
2444
2448static void
2449pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
2450 if ((node->begin_keyword_loc.length == 0) && PM_NODE_START(node) == 0) {
2451 PM_NODE_START_SET_NODE(node, ensure_clause);
2452 }
2453 PM_NODE_LENGTH_SET_NODE(node, ensure_clause);
2454 node->ensure_clause = ensure_clause;
2455}
2456
2460static void
2461pm_begin_node_end_keyword_set(const pm_parser_t *parser, pm_begin_node_t *node, const pm_token_t *end_keyword) {
2462 assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == 0);
2463 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
2464 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
2465}
2466
2470static pm_block_argument_node_t *
2471pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
2472 assert(operator->type == PM_TOKEN_UAMPERSAND);
2473 pm_block_argument_node_t *node = PM_NODE_ALLOC(parser, pm_block_argument_node_t);
2474
2475 *node = (pm_block_argument_node_t) {
2476 .base = PM_NODE_INIT(parser, PM_BLOCK_ARGUMENT_NODE, 0, (expression == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, expression)),
2477 .expression = expression,
2478 .operator_loc = TOK2LOC(parser, operator)
2479 };
2480
2481 return node;
2482}
2483
2487static pm_block_node_t *
2488pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
2489 pm_block_node_t *node = PM_NODE_ALLOC(parser, pm_block_node_t);
2490
2491 *node = (pm_block_node_t) {
2492 .base = PM_NODE_INIT(parser, PM_BLOCK_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, opening, closing)),
2493 .locals = *locals,
2494 .parameters = parameters,
2495 .body = body,
2496 .opening_loc = TOK2LOC(parser, opening),
2497 .closing_loc = TOK2LOC(parser, closing)
2498 };
2499
2500 return node;
2501}
2502
2506static pm_block_parameter_node_t *
2507pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
2508 assert(operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
2509 pm_block_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameter_node_t);
2510
2511 *node = (pm_block_parameter_node_t) {
2512 .base = PM_NODE_INIT(parser, PM_BLOCK_PARAMETER_NODE, 0, (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name)),
2513 .name = name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
2514 .name_loc = NTOK2LOC(parser, name),
2515 .operator_loc = TOK2LOC(parser, operator)
2516 };
2517
2518 return node;
2519}
2520
2524static pm_block_parameters_node_t *
2525pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
2526 pm_block_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameters_node_t);
2527
2528 uint32_t start;
2529 if (opening != NULL) {
2530 start = PM_TOKEN_START(parser, opening);
2531 } else if (parameters != NULL) {
2532 start = PM_NODE_START(parameters);
2533 } else {
2534 start = 0;
2535 }
2536
2537 uint32_t end;
2538 if (parameters != NULL) {
2539 end = PM_NODE_END(parameters);
2540 } else if (opening != NULL) {
2541 end = PM_TOKEN_END(parser, opening);
2542 } else {
2543 end = 0;
2544 }
2545
2546 *node = (pm_block_parameters_node_t) {
2547 .base = PM_NODE_INIT(parser, PM_BLOCK_PARAMETERS_NODE, 0, ((pm_location_t) { .start = start, .length = U32(end - start) })),
2548 .parameters = parameters,
2549 .opening_loc = NTOK2LOC(parser, opening),
2550 .closing_loc = { 0 },
2551 .locals = { 0 }
2552 };
2553
2554 return node;
2555}
2556
2560static void
2561pm_block_parameters_node_closing_set(const pm_parser_t *parser, pm_block_parameters_node_t *node, const pm_token_t *closing) {
2562 assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == 0);
2563 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
2564 node->closing_loc = TOK2LOC(parser, closing);
2565}
2566
2570static pm_block_local_variable_node_t *
2571pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
2572 pm_block_local_variable_node_t *node = PM_NODE_ALLOC(parser, pm_block_local_variable_node_t);
2573
2574 *node = (pm_block_local_variable_node_t) {
2575 .base = PM_NODE_INIT(parser, PM_BLOCK_LOCAL_VARIABLE_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, name)),
2576 .name = pm_parser_constant_id_token(parser, name)
2577 };
2578
2579 return node;
2580}
2581
2585static void
2586pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
2587 pm_node_list_append(&node->locals, UP(local));
2588
2589 if (PM_NODE_LENGTH(node) == 0) {
2590 PM_NODE_START_SET_NODE(node, local);
2591 }
2592
2593 PM_NODE_LENGTH_SET_NODE(node, local);
2594}
2595
2599static pm_break_node_t *
2600pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
2601 assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
2602 pm_break_node_t *node = PM_NODE_ALLOC(parser, pm_break_node_t);
2603
2604 *node = (pm_break_node_t) {
2605 .base = PM_NODE_INIT(parser, PM_BREAK_NODE, 0, (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments)),
2606 .arguments = arguments,
2607 .keyword_loc = TOK2LOC(parser, keyword)
2608 };
2609
2610 return node;
2611}
2612
2613// There are certain flags that we want to use internally but don't want to
2614// expose because they are not relevant beyond parsing. Therefore we'll define
2615// them here and not define them in config.yml/a header file.
2616static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = (1 << 2);
2617
2618static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = ((PM_CALL_NODE_FLAGS_LAST - 1) << 1);
2619static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = ((PM_CALL_NODE_FLAGS_LAST - 1) << 2);
2620static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = ((PM_CALL_NODE_FLAGS_LAST - 1) << 3);
2621
2627static pm_call_node_t *
2628pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
2629 pm_call_node_t *node = PM_NODE_ALLOC(parser, pm_call_node_t);
2630
2631 *node = (pm_call_node_t) {
2632 .base = PM_NODE_INIT(parser, PM_CALL_NODE, flags, PM_LOCATION_INIT_UNSET),
2633 .receiver = NULL,
2634 .call_operator_loc = { 0 },
2635 .message_loc = { 0 },
2636 .opening_loc = { 0 },
2637 .arguments = NULL,
2638 .closing_loc = { 0 },
2639 .equal_loc = { 0 },
2640 .block = NULL,
2641 .name = 0
2642 };
2643
2644 return node;
2645}
2646
2651static inline pm_node_flags_t
2652pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
2653 return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
2654}
2655
2660static pm_call_node_t *
2661pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
2662 pm_assert_value_expression(parser, receiver);
2663
2664 pm_node_flags_t flags = pm_call_node_ignore_visibility_flag(receiver);
2665 if (arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_ARGUMENT_NODE)) {
2666 flags |= PM_CALL_NODE_FLAGS_INDEX;
2667 }
2668
2669 pm_call_node_t *node = pm_call_node_create(parser, flags);
2670
2671 PM_NODE_START_SET_NODE(node, receiver);
2672
2673 const pm_location_t *end = pm_arguments_end(arguments);
2674 assert(end != NULL && "unreachable");
2675 PM_NODE_LENGTH_SET_LOCATION(node, end);
2676
2677 node->receiver = receiver;
2678 node->message_loc.start = arguments->opening_loc.start;
2679 node->message_loc.length = (arguments->closing_loc.start + arguments->closing_loc.length) - arguments->opening_loc.start;
2680
2681 node->opening_loc = arguments->opening_loc;
2682 node->arguments = arguments->arguments;
2683 node->closing_loc = arguments->closing_loc;
2684 node->block = arguments->block;
2685
2686 node->name = pm_parser_constant_id_constant(parser, "[]", 2);
2687 return node;
2688}
2689
2693static pm_call_node_t *
2694pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument, pm_node_flags_t flags) {
2695 pm_assert_value_expression(parser, receiver);
2696 pm_assert_value_expression(parser, argument);
2697
2698 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
2699
2700 PM_NODE_START_SET_NODE(node, PM_NODE_START(receiver) < PM_NODE_START(argument) ? receiver : argument);
2701 PM_NODE_LENGTH_SET_NODE(node, PM_NODE_END(receiver) > PM_NODE_END(argument) ? receiver : argument);
2702
2703 node->receiver = receiver;
2704 node->message_loc = TOK2LOC(parser, operator);
2705
2706 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
2707 pm_arguments_node_arguments_append(arguments, argument);
2708 node->arguments = arguments;
2709
2710 node->name = pm_parser_constant_id_token(parser, operator);
2711 return node;
2712}
2713
2714static const uint8_t * parse_operator_symbol_name(const pm_token_t *);
2715
2719static pm_call_node_t *
2720pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
2721 pm_assert_value_expression(parser, receiver);
2722
2723 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2724
2725 PM_NODE_START_SET_NODE(node, receiver);
2726 const pm_location_t *end = pm_arguments_end(arguments);
2727 if (end == NULL) {
2728 PM_NODE_LENGTH_SET_TOKEN(parser, node, message);
2729 } else {
2730 PM_NODE_LENGTH_SET_LOCATION(node, end);
2731 }
2732
2733 node->receiver = receiver;
2734 node->call_operator_loc = TOK2LOC(parser, operator);
2735 node->message_loc = TOK2LOC(parser, message);
2736 node->opening_loc = arguments->opening_loc;
2737 node->arguments = arguments->arguments;
2738 node->closing_loc = arguments->closing_loc;
2739 node->block = arguments->block;
2740
2741 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2742 pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2743 }
2744
2749 node->name = pm_parser_constant_id_raw(parser, message->start, parse_operator_symbol_name(message));
2750 return node;
2751}
2752
2756static pm_call_node_t *
2757pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
2758 pm_call_node_t *node = pm_call_node_create(parser, 0);
2759 node->base.location = (pm_location_t) { .start = 0, .length = U32(parser->end - parser->start) };
2760
2761 node->receiver = receiver;
2762 node->arguments = arguments;
2763
2764 node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
2765 return node;
2766}
2767
2772static pm_call_node_t *
2773pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
2774 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2775
2776 PM_NODE_START_SET_TOKEN(parser, node, message);
2777 const pm_location_t *end = pm_arguments_end(arguments);
2778 assert(end != NULL && "unreachable");
2779 PM_NODE_LENGTH_SET_LOCATION(node, end);
2780
2781 node->message_loc = TOK2LOC(parser, message);
2782 node->opening_loc = arguments->opening_loc;
2783 node->arguments = arguments->arguments;
2784 node->closing_loc = arguments->closing_loc;
2785 node->block = arguments->block;
2786
2787 node->name = pm_parser_constant_id_token(parser, message);
2788 return node;
2789}
2790
2795static pm_call_node_t *
2796pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
2797 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2798
2799 node->base.location = (pm_location_t) { 0 };
2800 node->arguments = arguments;
2801
2802 node->name = name;
2803 return node;
2804}
2805
2809static pm_call_node_t *
2810pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
2811 pm_assert_value_expression(parser, receiver);
2812 if (receiver != NULL) pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
2813
2814 pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
2815
2816 PM_NODE_START_SET_TOKEN(parser, node, message);
2817 if (arguments->closing_loc.length > 0) {
2818 PM_NODE_LENGTH_SET_LOCATION(node, &arguments->closing_loc);
2819 } else {
2820 assert(receiver != NULL);
2821 PM_NODE_LENGTH_SET_NODE(node, receiver);
2822 }
2823
2824 node->receiver = receiver;
2825 node->message_loc = TOK2LOC(parser, message);
2826 node->opening_loc = arguments->opening_loc;
2827 node->arguments = arguments->arguments;
2828 node->closing_loc = arguments->closing_loc;
2829
2830 node->name = pm_parser_constant_id_constant(parser, "!", 1);
2831 return node;
2832}
2833
2837static pm_call_node_t *
2838pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
2839 pm_assert_value_expression(parser, receiver);
2840
2841 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2842
2843 PM_NODE_START_SET_NODE(node, receiver);
2844 const pm_location_t *end = pm_arguments_end(arguments);
2845 assert(end != NULL && "unreachable");
2846 PM_NODE_LENGTH_SET_LOCATION(node, end);
2847
2848 node->receiver = receiver;
2849 node->call_operator_loc = TOK2LOC(parser, operator);
2850 node->opening_loc = arguments->opening_loc;
2851 node->arguments = arguments->arguments;
2852 node->closing_loc = arguments->closing_loc;
2853 node->block = arguments->block;
2854
2855 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2856 pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2857 }
2858
2859 node->name = pm_parser_constant_id_constant(parser, "call", 4);
2860 return node;
2861}
2862
2866static pm_call_node_t *
2867pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
2868 pm_assert_value_expression(parser, receiver);
2869
2870 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2871
2872 PM_NODE_START_SET_TOKEN(parser, node, operator);
2873 PM_NODE_LENGTH_SET_NODE(node, receiver);
2874
2875 node->receiver = receiver;
2876 node->message_loc = TOK2LOC(parser, operator);
2877
2878 node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
2879 return node;
2880}
2881
2886static pm_call_node_t *
2887pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
2888 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2889
2890 node->base.location = TOK2LOC(parser, message);
2891 node->message_loc = TOK2LOC(parser, message);
2892
2893 node->name = pm_parser_constant_id_token(parser, message);
2894 return node;
2895}
2896
2901static inline bool
2902pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
2903 return (
2904 (node->message_loc.length > 0) &&
2905 (parser->start[node->message_loc.start + node->message_loc.length - 1] != '!') &&
2906 (parser->start[node->message_loc.start + node->message_loc.length - 1] != '?') &&
2907 char_is_identifier_start(parser, parser->start + node->message_loc.start, (ptrdiff_t) node->message_loc.length) &&
2908 (node->opening_loc.length == 0) &&
2909 (node->arguments == NULL) &&
2910 (node->block == NULL)
2911 );
2912}
2913
2917static void
2918pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
2919 pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
2920
2921 if (write_constant->length > 0) {
2922 size_t length = write_constant->length - 1;
2923
2924 void *memory = xmalloc(length);
2925 memcpy(memory, write_constant->start, length);
2926
2927 *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
2928 } else {
2929 // We can get here if the message was missing because of a syntax error.
2930 *read_name = pm_parser_constant_id_constant(parser, "", 0);
2931 }
2932}
2933
2937static pm_call_and_write_node_t *
2938pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2939 assert(target->block == NULL);
2940 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2941 pm_call_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_and_write_node_t);
2942
2943 *node = (pm_call_and_write_node_t) {
2944 .base = PM_NODE_INIT(parser, PM_CALL_AND_WRITE_NODE, FL(target), PM_LOCATION_INIT_NODES(target, value)),
2945 .receiver = target->receiver,
2946 .call_operator_loc = target->call_operator_loc,
2947 .message_loc = target->message_loc,
2948 .read_name = 0,
2949 .write_name = target->name,
2950 .operator_loc = TOK2LOC(parser, operator),
2951 .value = value
2952 };
2953
2954 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
2955
2956 // Here we're going to free the target, since it is no longer necessary.
2957 // However, we don't want to call `pm_node_destroy` because we want to keep
2958 // around all of its children since we just reused them.
2959 xfree_sized(target, sizeof(pm_call_node_t));
2960
2961 return node;
2962}
2963
2968static void
2969pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
2970 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
2971 if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
2972 pm_node_t *node;
2973 PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
2974 if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
2975 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
2976 break;
2977 }
2978 }
2979 }
2980
2981 if (block != NULL) {
2982 pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
2983 }
2984 }
2985}
2986
2990static pm_index_and_write_node_t *
2991pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2992 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2993 pm_index_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_and_write_node_t);
2994
2995 pm_index_arguments_check(parser, target->arguments, target->block);
2996
2997 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
2998 *node = (pm_index_and_write_node_t) {
2999 .base = PM_NODE_INIT(parser, PM_INDEX_AND_WRITE_NODE, FL(target), PM_LOCATION_INIT_NODES(target, value)),
3000 .receiver = target->receiver,
3001 .call_operator_loc = target->call_operator_loc,
3002 .opening_loc = target->opening_loc,
3003 .arguments = target->arguments,
3004 .closing_loc = target->closing_loc,
3005 .block = (pm_block_argument_node_t *) target->block,
3006 .operator_loc = TOK2LOC(parser, operator),
3007 .value = value
3008 };
3009
3010 // Here we're going to free the target, since it is no longer necessary.
3011 // However, we don't want to call `pm_node_destroy` because we want to keep
3012 // around all of its children since we just reused them.
3013 xfree_sized(target, sizeof(pm_call_node_t));
3014
3015 return node;
3016}
3017
3021static pm_call_operator_write_node_t *
3022pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3023 assert(target->block == NULL);
3024 pm_call_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_operator_write_node_t);
3025
3026 *node = (pm_call_operator_write_node_t) {
3027 .base = PM_NODE_INIT(parser, PM_CALL_OPERATOR_WRITE_NODE, FL(target), PM_LOCATION_INIT_NODES(target, value)),
3028 .receiver = target->receiver,
3029 .call_operator_loc = target->call_operator_loc,
3030 .message_loc = target->message_loc,
3031 .read_name = 0,
3032 .write_name = target->name,
3033 .binary_operator = pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
3034 .binary_operator_loc = TOK2LOC(parser, operator),
3035 .value = value
3036 };
3037
3038 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3039
3040 // Here we're going to free the target, since it is no longer necessary.
3041 // However, we don't want to call `pm_node_destroy` because we want to keep
3042 // around all of its children since we just reused them.
3043 xfree_sized(target, sizeof(pm_call_node_t));
3044
3045 return node;
3046}
3047
3051static pm_index_operator_write_node_t *
3052pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3053 pm_index_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_operator_write_node_t);
3054
3055 pm_index_arguments_check(parser, target->arguments, target->block);
3056
3057 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3058 *node = (pm_index_operator_write_node_t) {
3059 .base = PM_NODE_INIT(parser, PM_INDEX_OPERATOR_WRITE_NODE, FL(target), PM_LOCATION_INIT_NODES(target, value)),
3060 .receiver = target->receiver,
3061 .call_operator_loc = target->call_operator_loc,
3062 .opening_loc = target->opening_loc,
3063 .arguments = target->arguments,
3064 .closing_loc = target->closing_loc,
3065 .block = (pm_block_argument_node_t *) target->block,
3066 .binary_operator = pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
3067 .binary_operator_loc = TOK2LOC(parser, operator),
3068 .value = value
3069 };
3070
3071 // Here we're going to free the target, since it is no longer necessary.
3072 // However, we don't want to call `pm_node_destroy` because we want to keep
3073 // around all of its children since we just reused them.
3074 xfree_sized(target, sizeof(pm_call_node_t));
3075
3076 return node;
3077}
3078
3082static pm_call_or_write_node_t *
3083pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3084 assert(target->block == NULL);
3085 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3086 pm_call_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_or_write_node_t);
3087
3088 *node = (pm_call_or_write_node_t) {
3089 .base = PM_NODE_INIT(parser, PM_CALL_OR_WRITE_NODE, FL(target), PM_LOCATION_INIT_NODES(target, value)),
3090 .receiver = target->receiver,
3091 .call_operator_loc = target->call_operator_loc,
3092 .message_loc = target->message_loc,
3093 .read_name = 0,
3094 .write_name = target->name,
3095 .operator_loc = TOK2LOC(parser, operator),
3096 .value = value
3097 };
3098
3099 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3100
3101 // Here we're going to free the target, since it is no longer necessary.
3102 // However, we don't want to call `pm_node_destroy` because we want to keep
3103 // around all of its children since we just reused them.
3104 xfree_sized(target, sizeof(pm_call_node_t));
3105
3106 return node;
3107}
3108
3112static pm_index_or_write_node_t *
3113pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3114 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3115 pm_index_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_or_write_node_t);
3116
3117 pm_index_arguments_check(parser, target->arguments, target->block);
3118
3119 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3120 *node = (pm_index_or_write_node_t) {
3121 .base = PM_NODE_INIT(parser, PM_INDEX_OR_WRITE_NODE, FL(target), PM_LOCATION_INIT_NODES(target, value)),
3122 .receiver = target->receiver,
3123 .call_operator_loc = target->call_operator_loc,
3124 .opening_loc = target->opening_loc,
3125 .arguments = target->arguments,
3126 .closing_loc = target->closing_loc,
3127 .block = (pm_block_argument_node_t *) target->block,
3128 .operator_loc = TOK2LOC(parser, operator),
3129 .value = value
3130 };
3131
3132 // Here we're going to free the target, since it is no longer necessary.
3133 // However, we don't want to call `pm_node_destroy` because we want to keep
3134 // around all of its children since we just reused them.
3135 xfree_sized(target, sizeof(pm_call_node_t));
3136
3137 return node;
3138}
3139
3144static pm_call_target_node_t *
3145pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3146 pm_call_target_node_t *node = PM_NODE_ALLOC(parser, pm_call_target_node_t);
3147
3148 *node = (pm_call_target_node_t) {
3149 .base = PM_NODE_INIT(parser, PM_CALL_TARGET_NODE, FL(target), PM_LOCATION_INIT_NODE(target)),
3150 .receiver = target->receiver,
3151 .call_operator_loc = target->call_operator_loc,
3152 .name = target->name,
3153 .message_loc = target->message_loc
3154 };
3155
3156 /* It is possible to get here where we have parsed an invalid syntax tree
3157 * where the call operator was not present. In that case we will have a
3158 * problem because it is a required location. In this case we need to fill
3159 * it in with a fake location so that the syntax tree remains valid. */
3160 if (node->call_operator_loc.length == 0) {
3161 node->call_operator_loc = target->base.location;
3162 }
3163
3164 // Here we're going to free the target, since it is no longer necessary.
3165 // However, we don't want to call `pm_node_destroy` because we want to keep
3166 // around all of its children since we just reused them.
3167 xfree_sized(target, sizeof(pm_call_node_t));
3168
3169 return node;
3170}
3171
3176static pm_index_target_node_t *
3177pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3178 pm_index_target_node_t *node = PM_NODE_ALLOC(parser, pm_index_target_node_t);
3179
3180 pm_index_arguments_check(parser, target->arguments, target->block);
3181 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3182
3183 *node = (pm_index_target_node_t) {
3184 .base = PM_NODE_INIT(parser, PM_INDEX_TARGET_NODE, FL(target) | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE, PM_LOCATION_INIT_NODE(target)),
3185 .receiver = target->receiver,
3186 .opening_loc = target->opening_loc,
3187 .arguments = target->arguments,
3188 .closing_loc = target->closing_loc,
3189 .block = (pm_block_argument_node_t *) target->block,
3190 };
3191
3192 // Here we're going to free the target, since it is no longer necessary.
3193 // However, we don't want to call `pm_node_destroy` because we want to keep
3194 // around all of its children since we just reused them.
3195 xfree_sized(target, sizeof(pm_call_node_t));
3196
3197 return node;
3198}
3199
3203static pm_capture_pattern_node_t *
3204pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
3205 pm_capture_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_capture_pattern_node_t);
3206
3207 *node = (pm_capture_pattern_node_t) {
3208 .base = PM_NODE_INIT(parser, PM_CAPTURE_PATTERN_NODE, 0, PM_LOCATION_INIT_NODES(value, target)),
3209 .value = value,
3210 .target = target,
3211 .operator_loc = TOK2LOC(parser, operator)
3212 };
3213
3214 return node;
3215}
3216
3220static pm_case_node_t *
3221pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3222 pm_case_node_t *node = PM_NODE_ALLOC(parser, pm_case_node_t);
3223
3224 *node = (pm_case_node_t) {
3225 .base = PM_NODE_INIT(parser, PM_CASE_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, case_keyword, end_keyword == NULL ? case_keyword : end_keyword)),
3226 .predicate = predicate,
3227 .else_clause = NULL,
3228 .case_keyword_loc = TOK2LOC(parser, case_keyword),
3229 .end_keyword_loc = NTOK2LOC(parser, end_keyword),
3230 .conditions = { 0 }
3231 };
3232
3233 return node;
3234}
3235
3239static void
3240pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
3241 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
3242
3243 pm_node_list_append(&node->conditions, condition);
3244 PM_NODE_LENGTH_SET_NODE(node, condition);
3245}
3246
3250static void
3251pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
3252 node->else_clause = else_clause;
3253 PM_NODE_LENGTH_SET_NODE(node, else_clause);
3254}
3255
3259static void
3260pm_case_node_end_keyword_loc_set(const pm_parser_t *parser, pm_case_node_t *node, const pm_token_t *end_keyword) {
3261 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
3262 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
3263}
3264
3268static pm_case_match_node_t *
3269pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate) {
3270 pm_case_match_node_t *node = PM_NODE_ALLOC(parser, pm_case_match_node_t);
3271
3272 *node = (pm_case_match_node_t) {
3273 .base = PM_NODE_INIT(parser, PM_CASE_MATCH_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, case_keyword)),
3274 .predicate = predicate,
3275 .else_clause = NULL,
3276 .case_keyword_loc = TOK2LOC(parser, case_keyword),
3277 .end_keyword_loc = { 0 },
3278 .conditions = { 0 }
3279 };
3280
3281 return node;
3282}
3283
3287static void
3288pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condition) {
3289 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
3290
3291 pm_node_list_append(&node->conditions, condition);
3292 PM_NODE_LENGTH_SET_NODE(node, condition);
3293}
3294
3298static void
3299pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
3300 node->else_clause = else_clause;
3301 PM_NODE_LENGTH_SET_NODE(node, else_clause);
3302}
3303
3307static void
3308pm_case_match_node_end_keyword_loc_set(const pm_parser_t *parser, pm_case_match_node_t *node, const pm_token_t *end_keyword) {
3309 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
3310 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
3311}
3312
3316static pm_class_node_t *
3317pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
3318 pm_class_node_t *node = PM_NODE_ALLOC(parser, pm_class_node_t);
3319
3320 *node = (pm_class_node_t) {
3321 .base = PM_NODE_INIT(parser, PM_CLASS_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, class_keyword, end_keyword)),
3322 .locals = *locals,
3323 .class_keyword_loc = TOK2LOC(parser, class_keyword),
3324 .constant_path = constant_path,
3325 .inheritance_operator_loc = NTOK2LOC(parser, inheritance_operator),
3326 .superclass = superclass,
3327 .body = body,
3328 .end_keyword_loc = TOK2LOC(parser, end_keyword),
3329 .name = pm_parser_constant_id_token(parser, name)
3330 };
3331
3332 return node;
3333}
3334
3338static pm_class_variable_and_write_node_t *
3339pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3340 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3341 pm_class_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_and_write_node_t);
3342
3343 *node = (pm_class_variable_and_write_node_t) {
3344 .base = PM_NODE_INIT(parser, PM_CLASS_VARIABLE_AND_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)),
3345 .name = target->name,
3346 .name_loc = target->base.location,
3347 .operator_loc = TOK2LOC(parser, operator),
3348 .value = value
3349 };
3350
3351 return node;
3352}
3353
3357static pm_class_variable_operator_write_node_t *
3358pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3359 pm_class_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_operator_write_node_t);
3360
3361 *node = (pm_class_variable_operator_write_node_t) {
3362 .base = PM_NODE_INIT(parser, PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)),
3363 .name = target->name,
3364 .name_loc = target->base.location,
3365 .binary_operator_loc = TOK2LOC(parser, operator),
3366 .value = value,
3367 .binary_operator = pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
3368 };
3369
3370 return node;
3371}
3372
3376static pm_class_variable_or_write_node_t *
3377pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3378 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3379 pm_class_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_or_write_node_t);
3380
3381 *node = (pm_class_variable_or_write_node_t) {
3382 .base = PM_NODE_INIT(parser, PM_CLASS_VARIABLE_OR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)),
3383 .name = target->name,
3384 .name_loc = target->base.location,
3385 .operator_loc = TOK2LOC(parser, operator),
3386 .value = value
3387 };
3388
3389 return node;
3390}
3391
3395static pm_class_variable_read_node_t *
3396pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
3397 assert(token->type == PM_TOKEN_CLASS_VARIABLE);
3398 pm_class_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_read_node_t);
3399
3400 *node = (pm_class_variable_read_node_t) {
3401 .base = PM_NODE_INIT(parser, PM_CLASS_VARIABLE_READ_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token)),
3402 .name = pm_parser_constant_id_token(parser, token)
3403 };
3404
3405 return node;
3406}
3407
3414static inline pm_node_flags_t
3415pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
3416 if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.length == 0) {
3417 return flags;
3418 }
3419 return 0;
3420}
3421
3425static pm_class_variable_write_node_t *
3426pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
3427 pm_class_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_write_node_t);
3428 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
3429
3430 *node = (pm_class_variable_write_node_t) {
3431 .base = PM_NODE_INIT(parser, PM_CLASS_VARIABLE_WRITE_NODE, flags, PM_LOCATION_INIT_NODES(read_node, value)),
3432 .name = read_node->name,
3433 .name_loc = read_node->base.location,
3434 .operator_loc = TOK2LOC(parser, operator),
3435 .value = value
3436 };
3437
3438 return node;
3439}
3440
3444static pm_constant_path_and_write_node_t *
3445pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3446 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3447 pm_constant_path_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_and_write_node_t);
3448
3449 *node = (pm_constant_path_and_write_node_t) {
3450 .base = PM_NODE_INIT(parser, PM_CONSTANT_PATH_AND_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)),
3451 .target = target,
3452 .operator_loc = TOK2LOC(parser, operator),
3453 .value = value
3454 };
3455
3456 return node;
3457}
3458
3462static pm_constant_path_operator_write_node_t *
3463pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3464 pm_constant_path_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_operator_write_node_t);
3465
3466 *node = (pm_constant_path_operator_write_node_t) {
3467 .base = PM_NODE_INIT(parser, PM_CONSTANT_PATH_OPERATOR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)),
3468 .target = target,
3469 .binary_operator_loc = TOK2LOC(parser, operator),
3470 .value = value,
3471 .binary_operator = pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
3472 };
3473
3474 return node;
3475}
3476
3480static pm_constant_path_or_write_node_t *
3481pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3482 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3483 pm_constant_path_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_or_write_node_t);
3484
3485 *node = (pm_constant_path_or_write_node_t) {
3486 .base = PM_NODE_INIT(parser, PM_CONSTANT_PATH_OR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)),
3487 .target = target,
3488 .operator_loc = TOK2LOC(parser, operator),
3489 .value = value
3490 };
3491
3492 return node;
3493}
3494
3498static pm_constant_path_node_t *
3499pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3500 pm_assert_value_expression(parser, parent);
3501 pm_constant_path_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_node_t);
3502
3503 pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3504 if (name_token->type == PM_TOKEN_CONSTANT) {
3505 name = pm_parser_constant_id_token(parser, name_token);
3506 }
3507
3508 *node = (pm_constant_path_node_t) {
3509 .base = PM_NODE_INIT(parser, PM_CONSTANT_PATH_NODE, 0, (parent == NULL) ? PM_LOCATION_INIT_TOKENS(parser, delimiter, name_token) : PM_LOCATION_INIT_NODE_TOKEN(parser, parent, name_token)),
3510 .parent = parent,
3511 .name = name,
3512 .delimiter_loc = TOK2LOC(parser, delimiter),
3513 .name_loc = TOK2LOC(parser, name_token)
3514 };
3515
3516 return node;
3517}
3518
3522static pm_constant_path_write_node_t *
3523pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3524 pm_constant_path_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_write_node_t);
3525 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
3526
3527 *node = (pm_constant_path_write_node_t) {
3528 .base = PM_NODE_INIT(parser, PM_CONSTANT_PATH_WRITE_NODE, flags, PM_LOCATION_INIT_NODES(target, value)),
3529 .target = target,
3530 .operator_loc = TOK2LOC(parser, operator),
3531 .value = value
3532 };
3533
3534 return node;
3535}
3536
3540static pm_constant_and_write_node_t *
3541pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3542 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3543 pm_constant_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_and_write_node_t);
3544
3545 *node = (pm_constant_and_write_node_t) {
3546 .base = PM_NODE_INIT(parser, PM_CONSTANT_AND_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)),
3547 .name = target->name,
3548 .name_loc = target->base.location,
3549 .operator_loc = TOK2LOC(parser, operator),
3550 .value = value
3551 };
3552
3553 return node;
3554}
3555
3559static pm_constant_operator_write_node_t *
3560pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3561 pm_constant_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_operator_write_node_t);
3562
3563 *node = (pm_constant_operator_write_node_t) {
3564 .base = PM_NODE_INIT(parser, PM_CONSTANT_OPERATOR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)),
3565 .name = target->name,
3566 .name_loc = target->base.location,
3567 .binary_operator_loc = TOK2LOC(parser, operator),
3568 .value = value,
3569 .binary_operator = pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
3570 };
3571
3572 return node;
3573}
3574
3578static pm_constant_or_write_node_t *
3579pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3580 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3581 pm_constant_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_or_write_node_t);
3582
3583 *node = (pm_constant_or_write_node_t) {
3584 .base = PM_NODE_INIT(parser, PM_CONSTANT_OR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)),
3585 .name = target->name,
3586 .name_loc = target->base.location,
3587 .operator_loc = TOK2LOC(parser, operator),
3588 .value = value
3589 };
3590
3591 return node;
3592}
3593
3597static pm_constant_read_node_t *
3598pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
3599 assert(name->type == PM_TOKEN_CONSTANT || name->type == 0);
3600 pm_constant_read_node_t *node = PM_NODE_ALLOC(parser, pm_constant_read_node_t);
3601
3602 *node = (pm_constant_read_node_t) {
3603 .base = PM_NODE_INIT(parser, PM_CONSTANT_READ_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, name)),
3604 .name = pm_parser_constant_id_token(parser, name)
3605 };
3606
3607 return node;
3608}
3609
3613static pm_constant_write_node_t *
3614pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3615 pm_constant_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_write_node_t);
3616 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
3617
3618 *node = (pm_constant_write_node_t) {
3619 .base = PM_NODE_INIT(parser, PM_CONSTANT_WRITE_NODE, flags, PM_LOCATION_INIT_NODES(target, value)),
3620 .name = target->name,
3621 .name_loc = target->base.location,
3622 .operator_loc = TOK2LOC(parser, operator),
3623 .value = value
3624 };
3625
3626 return node;
3627}
3628
3632static void
3633pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3634 switch (PM_NODE_TYPE(node)) {
3635 case PM_BEGIN_NODE: {
3636 const pm_begin_node_t *cast = (pm_begin_node_t *) node;
3637 if (cast->statements != NULL) pm_def_node_receiver_check(parser, UP(cast->statements));
3638 break;
3639 }
3640 case PM_PARENTHESES_NODE: {
3641 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
3642 if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
3643 break;
3644 }
3645 case PM_STATEMENTS_NODE: {
3646 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
3647 pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
3648 break;
3649 }
3650 case PM_ARRAY_NODE:
3651 case PM_FLOAT_NODE:
3652 case PM_IMAGINARY_NODE:
3653 case PM_INTEGER_NODE:
3654 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
3655 case PM_INTERPOLATED_STRING_NODE:
3656 case PM_INTERPOLATED_SYMBOL_NODE:
3657 case PM_INTERPOLATED_X_STRING_NODE:
3658 case PM_RATIONAL_NODE:
3659 case PM_REGULAR_EXPRESSION_NODE:
3660 case PM_SOURCE_ENCODING_NODE:
3661 case PM_SOURCE_FILE_NODE:
3662 case PM_SOURCE_LINE_NODE:
3663 case PM_STRING_NODE:
3664 case PM_SYMBOL_NODE:
3665 case PM_X_STRING_NODE:
3666 pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
3667 break;
3668 default:
3669 break;
3670 }
3671}
3672
3676static pm_def_node_t *
3677pm_def_node_create(
3678 pm_parser_t *parser,
3679 pm_constant_id_t name,
3680 const pm_token_t *name_loc,
3681 pm_node_t *receiver,
3682 pm_parameters_node_t *parameters,
3683 pm_node_t *body,
3684 pm_constant_id_list_t *locals,
3685 const pm_token_t *def_keyword,
3686 const pm_token_t *operator,
3687 const pm_token_t *lparen,
3688 const pm_token_t *rparen,
3689 const pm_token_t *equal,
3690 const pm_token_t *end_keyword
3691) {
3692 pm_def_node_t *node = PM_NODE_ALLOC(parser, pm_def_node_t);
3693
3694 if (receiver != NULL) {
3695 pm_def_node_receiver_check(parser, receiver);
3696 }
3697
3698 *node = (pm_def_node_t) {
3699 .base = PM_NODE_INIT(parser, PM_DEF_NODE, 0, (end_keyword == NULL) ? PM_LOCATION_INIT_TOKEN_NODE(parser, def_keyword, body) : PM_LOCATION_INIT_TOKENS(parser, def_keyword, end_keyword)),
3700 .name = name,
3701 .name_loc = TOK2LOC(parser, name_loc),
3702 .receiver = receiver,
3703 .parameters = parameters,
3704 .body = body,
3705 .locals = *locals,
3706 .def_keyword_loc = TOK2LOC(parser, def_keyword),
3707 .operator_loc = NTOK2LOC(parser, operator),
3708 .lparen_loc = NTOK2LOC(parser, lparen),
3709 .rparen_loc = NTOK2LOC(parser, rparen),
3710 .equal_loc = NTOK2LOC(parser, equal),
3711 .end_keyword_loc = NTOK2LOC(parser, end_keyword)
3712 };
3713
3714 return node;
3715}
3716
3720static pm_defined_node_t *
3721pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_token_t *keyword) {
3722 pm_defined_node_t *node = PM_NODE_ALLOC(parser, pm_defined_node_t);
3723
3724 *node = (pm_defined_node_t) {
3725 .base = PM_NODE_INIT(parser, PM_DEFINED_NODE, 0, (rparen == NULL) ? PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, value) : PM_LOCATION_INIT_TOKENS(parser, keyword, rparen)),
3726 .lparen_loc = NTOK2LOC(parser, lparen),
3727 .value = value,
3728 .rparen_loc = NTOK2LOC(parser, rparen),
3729 .keyword_loc = TOK2LOC(parser, keyword)
3730 };
3731
3732 return node;
3733}
3734
3738static pm_else_node_t *
3739pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3740 pm_else_node_t *node = PM_NODE_ALLOC(parser, pm_else_node_t);
3741
3742 *node = (pm_else_node_t) {
3743 .base = PM_NODE_INIT(parser, PM_ELSE_NODE, 0, ((end_keyword == NULL) && (statements != NULL)) ? PM_LOCATION_INIT_TOKEN_NODE(parser, else_keyword, statements) : PM_LOCATION_INIT_TOKENS(parser, else_keyword, end_keyword)),
3744 .else_keyword_loc = TOK2LOC(parser, else_keyword),
3745 .statements = statements,
3746 .end_keyword_loc = NTOK2LOC(parser, end_keyword)
3747 };
3748
3749 return node;
3750}
3751
3755static pm_embedded_statements_node_t *
3756pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
3757 pm_embedded_statements_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_statements_node_t);
3758
3759 *node = (pm_embedded_statements_node_t) {
3760 .base = PM_NODE_INIT(parser, PM_EMBEDDED_STATEMENTS_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, opening, closing)),
3761 .opening_loc = TOK2LOC(parser, opening),
3762 .statements = statements,
3763 .closing_loc = TOK2LOC(parser, closing)
3764 };
3765
3766 return node;
3767}
3768
3772static pm_embedded_variable_node_t *
3773pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
3774 pm_embedded_variable_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_variable_node_t);
3775
3776 *node = (pm_embedded_variable_node_t) {
3777 .base = PM_NODE_INIT(parser, PM_EMBEDDED_VARIABLE_NODE, 0, PM_LOCATION_INIT_TOKEN_NODE(parser, operator, variable)),
3778 .operator_loc = TOK2LOC(parser, operator),
3779 .variable = variable
3780 };
3781
3782 return node;
3783}
3784
3788static pm_ensure_node_t *
3789pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3790 pm_ensure_node_t *node = PM_NODE_ALLOC(parser, pm_ensure_node_t);
3791
3792 *node = (pm_ensure_node_t) {
3793 .base = PM_NODE_INIT(parser, PM_ENSURE_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, ensure_keyword, end_keyword)),
3794 .ensure_keyword_loc = TOK2LOC(parser, ensure_keyword),
3795 .statements = statements,
3796 .end_keyword_loc = TOK2LOC(parser, end_keyword)
3797 };
3798
3799 return node;
3800}
3801
3805static pm_false_node_t *
3806pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
3807 assert(token->type == PM_TOKEN_KEYWORD_FALSE);
3808 pm_false_node_t *node = PM_NODE_ALLOC(parser, pm_false_node_t);
3809
3810 *node = (pm_false_node_t) {
3811 .base = PM_NODE_INIT(parser, PM_FALSE_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token))
3812 };
3813
3814 return node;
3815}
3816
3821static pm_find_pattern_node_t *
3822pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
3823 pm_find_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_find_pattern_node_t);
3824
3825 pm_node_t *left = nodes->nodes[0];
3826 assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
3827 pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left;
3828
3829 pm_node_t *right;
3830
3831 if (nodes->size == 1) {
3832 right = UP(pm_missing_node_create(parser, PM_NODE_END(left), 0));
3833 } else {
3834 right = nodes->nodes[nodes->size - 1];
3835 assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
3836 }
3837
3838#if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
3839 // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode.
3840 // The resulting AST will anyway be ignored, but this file still needs to compile.
3841 pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node;
3842#else
3843 pm_node_t *right_splat_node = right;
3844#endif
3845 *node = (pm_find_pattern_node_t) {
3846 .base = PM_NODE_INIT(parser, PM_FIND_PATTERN_NODE, 0, PM_LOCATION_INIT_NODES(left, right)),
3847 .constant = NULL,
3848 .left = left_splat_node,
3849 .right = right_splat_node,
3850 .requireds = { 0 },
3851 .opening_loc = { 0 },
3852 .closing_loc = { 0 }
3853 };
3854
3855 // For now we're going to just copy over each pointer manually. This could be
3856 // much more efficient, as we could instead resize the node list to only point
3857 // to 1...-1.
3858 for (size_t index = 1; index < nodes->size - 1; index++) {
3859 pm_node_list_append(&node->requireds, nodes->nodes[index]);
3860 }
3861
3862 return node;
3863}
3864
3869static double
3870pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
3871 ptrdiff_t diff = token->end - token->start;
3872 if (diff <= 0) return 0.0;
3873
3874 // First, get a buffer of the content.
3875 size_t length = (size_t) diff;
3876 const size_t buffer_size = sizeof(char) * (length + 1);
3877 char *buffer = xmalloc(buffer_size);
3878 memcpy((void *) buffer, token->start, length);
3879
3880 // Next, determine if we need to replace the decimal point because of
3881 // locale-specific options, and then normalize them if we have to.
3882 char decimal_point = *localeconv()->decimal_point;
3883 if (decimal_point != '.') {
3884 for (size_t index = 0; index < length; index++) {
3885 if (buffer[index] == '.') buffer[index] = decimal_point;
3886 }
3887 }
3888
3889 // Next, handle underscores by removing them from the buffer.
3890 for (size_t index = 0; index < length; index++) {
3891 if (buffer[index] == '_') {
3892 memmove((void *) (buffer + index), (void *) (buffer + index + 1), length - index);
3893 length--;
3894 }
3895 }
3896
3897 // Null-terminate the buffer so that strtod cannot read off the end.
3898 buffer[length] = '\0';
3899
3900 // Now, call strtod to parse the value. Note that CRuby has their own
3901 // version of strtod which avoids locales. We're okay using the locale-aware
3902 // version because we've already validated through the parser that the token
3903 // is in a valid format.
3904 errno = 0;
3905 char *eptr;
3906 double value = strtod(buffer, &eptr);
3907
3908 // This should never happen, because we've already checked that the token
3909 // is in a valid format. However it's good to be safe.
3910 if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
3911 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, PM_ERR_FLOAT_PARSE);
3912 xfree_sized(buffer, buffer_size);
3913 return 0.0;
3914 }
3915
3916 // If errno is set, then it should only be ERANGE. At this point we need to
3917 // check if it's infinity (it should be).
3918 if (errno == ERANGE && PRISM_ISINF(value)) {
3919 int warn_width;
3920 const char *ellipsis;
3921
3922 if (length > 20) {
3923 warn_width = 20;
3924 ellipsis = "...";
3925 } else {
3926 warn_width = (int) length;
3927 ellipsis = "";
3928 }
3929
3930 pm_diagnostic_list_append_format(&parser->warning_list, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
3931 value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
3932 }
3933
3934 // Finally we can free the buffer and return the value.
3935 xfree_sized(buffer, buffer_size);
3936 return value;
3937}
3938
3942static pm_float_node_t *
3943pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
3944 assert(token->type == PM_TOKEN_FLOAT);
3945 pm_float_node_t *node = PM_NODE_ALLOC(parser, pm_float_node_t);
3946
3947 *node = (pm_float_node_t) {
3948 .base = PM_NODE_INIT(parser, PM_FLOAT_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)),
3949 .value = pm_double_parse(parser, token)
3950 };
3951
3952 return node;
3953}
3954
3958static pm_imaginary_node_t *
3959pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
3960 assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
3961
3962 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
3963 *node = (pm_imaginary_node_t) {
3964 .base = PM_NODE_INIT(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)),
3965 .numeric = UP(pm_float_node_create(parser, &((pm_token_t) {
3966 .type = PM_TOKEN_FLOAT,
3967 .start = token->start,
3968 .end = token->end - 1
3969 })))
3970 };
3971
3972 return node;
3973}
3974
3978static pm_rational_node_t *
3979pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
3980 assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
3981
3982 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
3983 *node = (pm_rational_node_t) {
3984 .base = PM_NODE_INIT(parser, PM_RATIONAL_NODE, PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)),
3985 .numerator = { 0 },
3986 .denominator = { 0 }
3987 };
3988
3989 const uint8_t *start = token->start;
3990 const uint8_t *end = token->end - 1; // r
3991
3992 while (start < end && *start == '0') start++; // 0.1 -> .1
3993 while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
3994
3995 size_t length = (size_t) (end - start);
3996 if (length == 1) {
3997 node->denominator.value = 1;
3998 return node;
3999 }
4000
4001 const uint8_t *point = memchr(start, '.', length);
4002 assert(point && "should have a decimal point");
4003
4004 uint8_t *digits = xmalloc(length);
4005 if (digits == NULL) {
4006 fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
4007 abort();
4008 }
4009
4010 memcpy(digits, start, (unsigned long) (point - start));
4011 memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
4012 pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
4013
4014 size_t fract_length = 0;
4015 for (const uint8_t *fract = point; fract < end; ++fract) {
4016 if (*fract != '_') ++fract_length;
4017 }
4018 digits[0] = '1';
4019 if (fract_length > 1) memset(digits + 1, '0', fract_length - 1);
4020 pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + fract_length);
4021 xfree_sized(digits, length);
4022
4023 pm_integers_reduce(&node->numerator, &node->denominator);
4024 return node;
4025}
4026
4031static pm_imaginary_node_t *
4032pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4033 assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
4034
4035 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4036 *node = (pm_imaginary_node_t) {
4037 .base = PM_NODE_INIT(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)),
4038 .numeric = UP(pm_float_node_rational_create(parser, &((pm_token_t) {
4039 .type = PM_TOKEN_FLOAT_RATIONAL,
4040 .start = token->start,
4041 .end = token->end - 1
4042 })))
4043 };
4044
4045 return node;
4046}
4047
4051static pm_for_node_t *
4052pm_for_node_create(
4053 pm_parser_t *parser,
4054 pm_node_t *index,
4055 pm_node_t *collection,
4056 pm_statements_node_t *statements,
4057 const pm_token_t *for_keyword,
4058 const pm_token_t *in_keyword,
4059 const pm_token_t *do_keyword,
4060 const pm_token_t *end_keyword
4061) {
4062 pm_for_node_t *node = PM_NODE_ALLOC(parser, pm_for_node_t);
4063
4064 *node = (pm_for_node_t) {
4065 .base = PM_NODE_INIT(parser, PM_FOR_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, for_keyword, end_keyword)),
4066 .index = index,
4067 .collection = collection,
4068 .statements = statements,
4069 .for_keyword_loc = TOK2LOC(parser, for_keyword),
4070 .in_keyword_loc = TOK2LOC(parser, in_keyword),
4071 .do_keyword_loc = NTOK2LOC(parser, do_keyword),
4072 .end_keyword_loc = TOK2LOC(parser, end_keyword)
4073 };
4074
4075 return node;
4076}
4077
4081static pm_forwarding_arguments_node_t *
4082pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
4083 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4084 pm_forwarding_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_arguments_node_t);
4085
4086 *node = (pm_forwarding_arguments_node_t) {
4087 .base = PM_NODE_INIT(parser, PM_FORWARDING_ARGUMENTS_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token))
4088 };
4089
4090 return node;
4091}
4092
4096static pm_forwarding_parameter_node_t *
4097pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
4098 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4099 pm_forwarding_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_parameter_node_t);
4100
4101 *node = (pm_forwarding_parameter_node_t) {
4102 .base = PM_NODE_INIT(parser, PM_FORWARDING_PARAMETER_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token))
4103 };
4104
4105 return node;
4106}
4107
4111static pm_forwarding_super_node_t *
4112pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
4113 assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
4114 assert(token->type == PM_TOKEN_KEYWORD_SUPER);
4115 pm_forwarding_super_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_super_node_t);
4116
4117 pm_block_node_t *block = NULL;
4118 if (arguments->block != NULL) {
4119 block = (pm_block_node_t *) arguments->block;
4120 }
4121
4122 *node = (pm_forwarding_super_node_t) {
4123 .base = PM_NODE_INIT(parser, PM_FORWARDING_SUPER_NODE, 0, (block == NULL) ? PM_LOCATION_INIT_TOKEN(parser, token) : PM_LOCATION_INIT_TOKEN_NODE(parser, token, block)),
4124 .block = block
4125 };
4126
4127 return node;
4128}
4129
4134static pm_hash_pattern_node_t *
4135pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4136 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4137
4138 *node = (pm_hash_pattern_node_t) {
4139 .base = PM_NODE_INIT(parser, PM_HASH_PATTERN_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, opening, closing)),
4140 .constant = NULL,
4141 .opening_loc = TOK2LOC(parser, opening),
4142 .closing_loc = TOK2LOC(parser, closing),
4143 .elements = { 0 },
4144 .rest = NULL
4145 };
4146
4147 return node;
4148}
4149
4153static pm_hash_pattern_node_t *
4154pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
4155 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4156
4157 uint32_t start;
4158 uint32_t end;
4159
4160 if (elements->size > 0) {
4161 if (rest) {
4162 start = MIN(PM_NODE_START(rest), PM_NODE_START(elements->nodes[0]));
4163 end = MAX(PM_NODE_END(rest), PM_NODE_END(elements->nodes[elements->size - 1]));
4164 } else {
4165 start = PM_NODE_START(elements->nodes[0]);
4166 end = PM_NODE_END(elements->nodes[elements->size - 1]);
4167 }
4168 } else {
4169 assert(rest != NULL);
4170 start = PM_NODE_START(rest);
4171 end = PM_NODE_END(rest);
4172 }
4173
4174 *node = (pm_hash_pattern_node_t) {
4175 .base = PM_NODE_INIT(parser, PM_HASH_PATTERN_NODE, 0, ((pm_location_t) { .start = start, .length = U32(end - start) })),
4176 .constant = NULL,
4177 .elements = { 0 },
4178 .rest = rest,
4179 .opening_loc = { 0 },
4180 .closing_loc = { 0 }
4181 };
4182
4183 pm_node_list_concat(&node->elements, elements);
4184 return node;
4185}
4186
4190static pm_constant_id_t
4191pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
4192 switch (PM_NODE_TYPE(target)) {
4193 case PM_GLOBAL_VARIABLE_READ_NODE:
4194 return ((pm_global_variable_read_node_t *) target)->name;
4195 case PM_BACK_REFERENCE_READ_NODE:
4196 return ((pm_back_reference_read_node_t *) target)->name;
4197 case PM_NUMBERED_REFERENCE_READ_NODE:
4198 // This will only ever happen in the event of a syntax error, but we
4199 // still need to provide something for the node.
4200 return pm_parser_constant_id_raw(parser, parser->start + PM_NODE_START(target), parser->start + PM_NODE_END(target));
4201 default:
4202 assert(false && "unreachable");
4203 return (pm_constant_id_t) -1;
4204 }
4205}
4206
4210static pm_global_variable_and_write_node_t *
4211pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4212 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4213 pm_global_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_and_write_node_t);
4214
4215 *node = (pm_global_variable_and_write_node_t) {
4216 .base = PM_NODE_INIT(parser, PM_GLOBAL_VARIABLE_AND_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)),
4217 .name = pm_global_variable_write_name(parser, target),
4218 .name_loc = target->location,
4219 .operator_loc = TOK2LOC(parser, operator),
4220 .value = value
4221 };
4222
4223 return node;
4224}
4225
4229static pm_global_variable_operator_write_node_t *
4230pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4231 pm_global_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_operator_write_node_t);
4232
4233 *node = (pm_global_variable_operator_write_node_t) {
4234 .base = PM_NODE_INIT(parser, PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)),
4235 .name = pm_global_variable_write_name(parser, target),
4236 .name_loc = target->location,
4237 .binary_operator_loc = TOK2LOC(parser, operator),
4238 .value = value,
4239 .binary_operator = pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
4240 };
4241
4242 return node;
4243}
4244
4248static pm_global_variable_or_write_node_t *
4249pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4250 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4251 pm_global_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_or_write_node_t);
4252
4253 *node = (pm_global_variable_or_write_node_t) {
4254 .base = PM_NODE_INIT(parser, PM_GLOBAL_VARIABLE_OR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)),
4255 .name = pm_global_variable_write_name(parser, target),
4256 .name_loc = target->location,
4257 .operator_loc = TOK2LOC(parser, operator),
4258 .value = value
4259 };
4260
4261 return node;
4262}
4263
4267static pm_global_variable_read_node_t *
4268pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4269 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4270
4271 *node = (pm_global_variable_read_node_t) {
4272 .base = PM_NODE_INIT(parser, PM_GLOBAL_VARIABLE_READ_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, name)),
4273 .name = pm_parser_constant_id_token(parser, name)
4274 };
4275
4276 return node;
4277}
4278
4282static pm_global_variable_read_node_t *
4283pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
4284 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4285
4286 *node = (pm_global_variable_read_node_t) {
4287 .base = PM_NODE_INIT(parser, PM_GLOBAL_VARIABLE_READ_NODE, 0, PM_LOCATION_INIT_UNSET),
4288 .name = name
4289 };
4290
4291 return node;
4292}
4293
4297static pm_global_variable_write_node_t *
4298pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4299 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4300 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
4301
4302 *node = (pm_global_variable_write_node_t) {
4303 .base = PM_NODE_INIT(parser, PM_GLOBAL_VARIABLE_WRITE_NODE, flags, PM_LOCATION_INIT_NODES(target, value)),
4304 .name = pm_global_variable_write_name(parser, target),
4305 .name_loc = target->location,
4306 .operator_loc = TOK2LOC(parser, operator),
4307 .value = value
4308 };
4309
4310 return node;
4311}
4312
4316static pm_global_variable_write_node_t *
4317pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
4318 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4319
4320 *node = (pm_global_variable_write_node_t) {
4321 .base = PM_NODE_INIT(parser, PM_GLOBAL_VARIABLE_WRITE_NODE, 0, PM_LOCATION_INIT_UNSET),
4322 .name = name,
4323 .name_loc = { 0 },
4324 .operator_loc = { 0 },
4325 .value = value
4326 };
4327
4328 return node;
4329}
4330
4334static pm_hash_node_t *
4335pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4336 assert(opening != NULL);
4337 pm_hash_node_t *node = PM_NODE_ALLOC(parser, pm_hash_node_t);
4338
4339 *node = (pm_hash_node_t) {
4340 .base = PM_NODE_INIT(parser, PM_HASH_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, opening)),
4341 .opening_loc = TOK2LOC(parser, opening),
4342 .closing_loc = { 0 },
4343 .elements = { 0 }
4344 };
4345
4346 return node;
4347}
4348
4352static inline void
4353pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
4354 pm_node_list_append(&hash->elements, element);
4355
4356 bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
4357 if (static_literal) {
4358 pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
4359 static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
4360 static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
4361 static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
4362 }
4363
4364 if (!static_literal) {
4365 pm_node_flag_unset(UP(hash), PM_NODE_FLAG_STATIC_LITERAL);
4366 }
4367}
4368
4369static inline void
4370pm_hash_node_closing_loc_set(const pm_parser_t *parser, pm_hash_node_t *hash, pm_token_t *token) {
4371 PM_NODE_LENGTH_SET_TOKEN(parser, hash, token);
4372 hash->closing_loc = TOK2LOC(parser, token);
4373}
4374
4378static pm_if_node_t *
4379pm_if_node_create(pm_parser_t *parser,
4380 const pm_token_t *if_keyword,
4381 pm_node_t *predicate,
4382 const pm_token_t *then_keyword,
4383 pm_statements_node_t *statements,
4384 pm_node_t *subsequent,
4385 const pm_token_t *end_keyword
4386) {
4387 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4388 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4389
4390 uint32_t start = PM_TOKEN_START(parser, if_keyword);
4391 uint32_t end;
4392
4393 if (end_keyword != NULL) {
4394 end = PM_TOKEN_END(parser, end_keyword);
4395 } else if (subsequent != NULL) {
4396 end = PM_NODE_END(subsequent);
4397 } else if (pm_statements_node_body_length(statements) != 0) {
4398 end = PM_NODE_END(statements);
4399 } else {
4400 end = PM_NODE_END(predicate);
4401 }
4402
4403 *node = (pm_if_node_t) {
4404 .base = PM_NODE_INIT(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, ((pm_location_t) { .start = start, .length = U32(end - start) })),
4405 .if_keyword_loc = TOK2LOC(parser, if_keyword),
4406 .predicate = predicate,
4407 .then_keyword_loc = NTOK2LOC(parser, then_keyword),
4408 .statements = statements,
4409 .subsequent = subsequent,
4410 .end_keyword_loc = NTOK2LOC(parser, end_keyword)
4411 };
4412
4413 return node;
4414}
4415
4419static pm_if_node_t *
4420pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
4421 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4422 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4423
4424 pm_statements_node_t *statements = pm_statements_node_create(parser);
4425 pm_statements_node_body_append(parser, statements, statement, true);
4426
4427 *node = (pm_if_node_t) {
4428 .base = PM_NODE_INIT(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, PM_LOCATION_INIT_NODES(statement, predicate)),
4429 .if_keyword_loc = TOK2LOC(parser, if_keyword),
4430 .predicate = predicate,
4431 .then_keyword_loc = { 0 },
4432 .statements = statements,
4433 .subsequent = NULL,
4434 .end_keyword_loc = { 0 }
4435 };
4436
4437 return node;
4438}
4439
4443static pm_if_node_t *
4444pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
4445 pm_assert_value_expression(parser, predicate);
4446 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4447
4448 pm_statements_node_t *if_statements = pm_statements_node_create(parser);
4449 pm_statements_node_body_append(parser, if_statements, true_expression, true);
4450
4451 pm_statements_node_t *else_statements = pm_statements_node_create(parser);
4452 pm_statements_node_body_append(parser, else_statements, false_expression, true);
4453
4454 pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, NULL);
4455 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4456
4457 *node = (pm_if_node_t) {
4458 .base = PM_NODE_INIT(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, PM_LOCATION_INIT_NODES(predicate, false_expression)),
4459 .if_keyword_loc = { 0 },
4460 .predicate = predicate,
4461 .then_keyword_loc = TOK2LOC(parser, qmark),
4462 .statements = if_statements,
4463 .subsequent = UP(else_node),
4464 .end_keyword_loc = { 0 }
4465 };
4466
4467 return node;
4468
4469}
4470
4471static inline void
4472pm_if_node_end_keyword_loc_set(const pm_parser_t *parser, pm_if_node_t *node, const pm_token_t *keyword) {
4473 PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword);
4474 node->end_keyword_loc = TOK2LOC(parser, keyword);
4475}
4476
4477static inline void
4478pm_else_node_end_keyword_loc_set(const pm_parser_t *parser, pm_else_node_t *node, const pm_token_t *keyword) {
4479 PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword);
4480 node->end_keyword_loc = TOK2LOC(parser, keyword);
4481}
4482
4486static pm_implicit_node_t *
4487pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
4488 pm_implicit_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_node_t);
4489
4490 *node = (pm_implicit_node_t) {
4491 .base = PM_NODE_INIT(parser, PM_IMPLICIT_NODE, 0, PM_LOCATION_INIT_NODE(value)),
4492 .value = value
4493 };
4494
4495 return node;
4496}
4497
4501static pm_implicit_rest_node_t *
4502pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
4503 assert(token->type == PM_TOKEN_COMMA);
4504
4505 pm_implicit_rest_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_rest_node_t);
4506
4507 *node = (pm_implicit_rest_node_t) {
4508 .base = PM_NODE_INIT(parser, PM_IMPLICIT_REST_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token))
4509 };
4510
4511 return node;
4512}
4513
4517static pm_integer_node_t *
4518pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4519 assert(token->type == PM_TOKEN_INTEGER);
4520 pm_integer_node_t *node = PM_NODE_ALLOC(parser, pm_integer_node_t);
4521
4522 *node = (pm_integer_node_t) {
4523 .base = PM_NODE_INIT(parser, PM_INTEGER_NODE, base | PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)),
4524 .value = { 0 }
4525 };
4526
4527 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4528 switch (base) {
4529 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4530 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4531 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4532 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4533 default: assert(false && "unreachable"); break;
4534 }
4535
4536 pm_integer_parse(&node->value, integer_base, token->start, token->end);
4537 return node;
4538}
4539
4544static pm_imaginary_node_t *
4545pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4546 assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
4547
4548 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4549 *node = (pm_imaginary_node_t) {
4550 .base = PM_NODE_INIT(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)),
4551 .numeric = UP(pm_integer_node_create(parser, base, &((pm_token_t) {
4552 .type = PM_TOKEN_INTEGER,
4553 .start = token->start,
4554 .end = token->end - 1
4555 })))
4556 };
4557
4558 return node;
4559}
4560
4565static pm_rational_node_t *
4566pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4567 assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
4568
4569 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4570 *node = (pm_rational_node_t) {
4571 .base = PM_NODE_INIT(parser, PM_RATIONAL_NODE, base | PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)),
4572 .numerator = { 0 },
4573 .denominator = { .value = 1, 0 }
4574 };
4575
4576 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4577 switch (base) {
4578 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4579 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4580 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4581 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4582 default: assert(false && "unreachable"); break;
4583 }
4584
4585 pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
4586
4587 return node;
4588}
4589
4594static pm_imaginary_node_t *
4595pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4596 assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
4597
4598 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4599 *node = (pm_imaginary_node_t) {
4600 .base = PM_NODE_INIT(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)),
4601 .numeric = UP(pm_integer_node_rational_create(parser, base, &((pm_token_t) {
4602 .type = PM_TOKEN_INTEGER_RATIONAL,
4603 .start = token->start,
4604 .end = token->end - 1
4605 })))
4606 };
4607
4608 return node;
4609}
4610
4614static pm_in_node_t *
4615pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
4616 pm_in_node_t *node = PM_NODE_ALLOC(parser, pm_in_node_t);
4617
4618 uint32_t start = PM_TOKEN_START(parser, in_keyword);
4619 uint32_t end;
4620
4621 if (statements != NULL) {
4622 end = PM_NODE_END(statements);
4623 } else if (then_keyword != NULL) {
4624 end = PM_TOKEN_END(parser, then_keyword);
4625 } else {
4626 end = PM_NODE_END(pattern);
4627 }
4628
4629 *node = (pm_in_node_t) {
4630 .base = PM_NODE_INIT(parser, PM_IN_NODE, 0, ((pm_location_t) { .start = start, .length = U32(end - start) })),
4631 .pattern = pattern,
4632 .statements = statements,
4633 .in_loc = TOK2LOC(parser, in_keyword),
4634 .then_loc = NTOK2LOC(parser, then_keyword)
4635 };
4636
4637 return node;
4638}
4639
4643static pm_instance_variable_and_write_node_t *
4644pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4645 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4646 pm_instance_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_and_write_node_t);
4647
4648 *node = (pm_instance_variable_and_write_node_t) {
4649 .base = PM_NODE_INIT(parser, PM_INSTANCE_VARIABLE_AND_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)),
4650 .name = target->name,
4651 .name_loc = target->base.location,
4652 .operator_loc = TOK2LOC(parser, operator),
4653 .value = value
4654 };
4655
4656 return node;
4657}
4658
4662static pm_instance_variable_operator_write_node_t *
4663pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4664 pm_instance_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_operator_write_node_t);
4665
4666 *node = (pm_instance_variable_operator_write_node_t) {
4667 .base = PM_NODE_INIT(parser, PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)),
4668 .name = target->name,
4669 .name_loc = target->base.location,
4670 .binary_operator_loc = TOK2LOC(parser, operator),
4671 .value = value,
4672 .binary_operator = pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
4673 };
4674
4675 return node;
4676}
4677
4681static pm_instance_variable_or_write_node_t *
4682pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4683 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4684 pm_instance_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_or_write_node_t);
4685
4686 *node = (pm_instance_variable_or_write_node_t) {
4687 .base = PM_NODE_INIT(parser, PM_INSTANCE_VARIABLE_OR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)),
4688 .name = target->name,
4689 .name_loc = target->base.location,
4690 .operator_loc = TOK2LOC(parser, operator),
4691 .value = value
4692 };
4693
4694 return node;
4695}
4696
4700static pm_instance_variable_read_node_t *
4701pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
4702 assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
4703 pm_instance_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_read_node_t);
4704
4705 *node = (pm_instance_variable_read_node_t) {
4706 .base = PM_NODE_INIT(parser, PM_INSTANCE_VARIABLE_READ_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token)),
4707 .name = pm_parser_constant_id_token(parser, token)
4708 };
4709
4710 return node;
4711}
4712
4717static pm_instance_variable_write_node_t *
4718pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
4719 pm_instance_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_write_node_t);
4720 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
4721
4722 *node = (pm_instance_variable_write_node_t) {
4723 .base = PM_NODE_INIT(parser, PM_INSTANCE_VARIABLE_WRITE_NODE, flags, PM_LOCATION_INIT_NODES(read_node, value)),
4724 .name = read_node->name,
4725 .name_loc = read_node->base.location,
4726 .operator_loc = TOK2LOC(parser, operator),
4727 .value = value
4728 };
4729
4730 return node;
4731}
4732
4738static void
4739pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
4740 switch (PM_NODE_TYPE(part)) {
4741 case PM_STRING_NODE:
4742 pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4743 break;
4744 case PM_EMBEDDED_STATEMENTS_NODE: {
4745 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
4746 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
4747
4748 if (embedded == NULL) {
4749 // If there are no statements or more than one statement, then
4750 // we lose the static literal flag.
4751 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
4752 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
4753 // If the embedded statement is a string, then we can keep the
4754 // static literal flag and mark the string as frozen.
4755 pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4756 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
4757 // If the embedded statement is an interpolated string and it's
4758 // a static literal, then we can keep the static literal flag.
4759 } else {
4760 // Otherwise we lose the static literal flag.
4761 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
4762 }
4763
4764 break;
4765 }
4766 case PM_EMBEDDED_VARIABLE_NODE:
4767 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
4768 break;
4769 default:
4770 assert(false && "unexpected node type");
4771 break;
4772 }
4773
4774 pm_node_list_append(parts, part);
4775}
4776
4780static pm_interpolated_regular_expression_node_t *
4781pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4782 pm_interpolated_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_regular_expression_node_t);
4783
4784 *node = (pm_interpolated_regular_expression_node_t) {
4785 .base = PM_NODE_INIT(parser, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, opening)),
4786 .opening_loc = TOK2LOC(parser, opening),
4787 .closing_loc = TOK2LOC(parser, opening),
4788 .parts = { 0 }
4789 };
4790
4791 return node;
4792}
4793
4794static inline void
4795pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
4796 if (PM_NODE_START(node) > PM_NODE_START(part)) {
4797 PM_NODE_START_SET_NODE(node, part);
4798 }
4799 if (PM_NODE_END(node) < PM_NODE_END(part)) {
4800 PM_NODE_LENGTH_SET_NODE(node, part);
4801 }
4802
4803 pm_interpolated_node_append(UP(node), &node->parts, part);
4804}
4805
4806static inline void
4807pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
4808 node->closing_loc = TOK2LOC(parser, closing);
4809 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
4810 pm_node_flag_set(UP(node), pm_regular_expression_flags_create(parser, closing));
4811}
4812
4836static inline void
4837pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
4838#define CLEAR_FLAGS(node) \
4839 node->base.flags = (pm_node_flags_t) (FL(node) & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
4840
4841#define MUTABLE_FLAGS(node) \
4842 node->base.flags = (pm_node_flags_t) ((FL(node) | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
4843
4844 if (node->parts.size == 0 && node->opening_loc.length == 0) {
4845 PM_NODE_START_SET_NODE(node, part);
4846 }
4847
4848 if (PM_NODE_END(part) > PM_NODE_END(node)) {
4849 PM_NODE_LENGTH_SET_NODE(node, part);
4850 }
4851
4852 switch (PM_NODE_TYPE(part)) {
4853 case PM_STRING_NODE:
4854 // If inner string is not frozen, it stops being a static literal. We should *not* clear other flags,
4855 // because concatenating two frozen strings (`'foo' 'bar'`) is still frozen. This holds true for
4856 // as long as this interpolation only consists of other string literals.
4857 if (!PM_NODE_FLAG_P(part, PM_STRING_FLAGS_FROZEN)) {
4858 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
4859 }
4860 part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
4861 break;
4862 case PM_INTERPOLATED_STRING_NODE:
4863 if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
4864 // If the string that we're concatenating is a static literal,
4865 // then we can keep the static literal flag for this string.
4866 } else {
4867 // Otherwise, we lose the static literal flag here and we should
4868 // also clear the mutability flags.
4869 CLEAR_FLAGS(node);
4870 }
4871 break;
4872 case PM_EMBEDDED_STATEMENTS_NODE: {
4873 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
4874 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
4875
4876 if (embedded == NULL) {
4877 // If we're embedding multiple statements or no statements, then
4878 // the string is not longer a static literal.
4879 CLEAR_FLAGS(node);
4880 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
4881 // If the embedded statement is a string, then we can make that
4882 // string as frozen and static literal, and not touch the static
4883 // literal status of this string.
4884 embedded->flags = (pm_node_flags_t) ((embedded->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
4885
4886 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
4887 MUTABLE_FLAGS(node);
4888 }
4889 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
4890 // If the embedded statement is an interpolated string, but that
4891 // string is marked as static literal, then we can keep our
4892 // static literal status for this string.
4893 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
4894 MUTABLE_FLAGS(node);
4895 }
4896 } else {
4897 // In all other cases, we lose the static literal flag here and
4898 // become mutable.
4899 CLEAR_FLAGS(node);
4900 }
4901
4902 break;
4903 }
4904 case PM_EMBEDDED_VARIABLE_NODE:
4905 // Embedded variables clear static literal, which means we also
4906 // should clear the mutability flags.
4907 CLEAR_FLAGS(node);
4908 break;
4909 case PM_X_STRING_NODE:
4910 case PM_INTERPOLATED_X_STRING_NODE:
4911 case PM_SYMBOL_NODE:
4912 case PM_INTERPOLATED_SYMBOL_NODE:
4913 // These will only happen in error cases. But we want to handle it
4914 // here so that we don't fail the assertion.
4915 CLEAR_FLAGS(node);
4916 break;
4917 default:
4918 assert(false && "unexpected node type");
4919 break;
4920 }
4921
4922 pm_node_list_append(&node->parts, part);
4923
4924#undef CLEAR_FLAGS
4925#undef MUTABLE_FLAGS
4926}
4927
4931static pm_interpolated_string_node_t *
4932pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
4933 pm_interpolated_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_string_node_t);
4934 pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
4935
4936 switch (parser->frozen_string_literal) {
4937 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
4938 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
4939 break;
4940 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
4941 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
4942 break;
4943 }
4944
4945 uint32_t start = opening == NULL ? 0 : PM_TOKEN_START(parser, opening);
4946 uint32_t end = closing == NULL ? 0 : PM_TOKEN_END(parser, closing);
4947
4948 *node = (pm_interpolated_string_node_t) {
4949 .base = PM_NODE_INIT(parser, PM_INTERPOLATED_STRING_NODE, flags, ((pm_location_t) { .start = start, .length = U32(end - start) })),
4950 .opening_loc = NTOK2LOC(parser, opening),
4951 .closing_loc = NTOK2LOC(parser, closing),
4952 .parts = { 0 }
4953 };
4954
4955 if (parts != NULL) {
4956 pm_node_t *part;
4957 PM_NODE_LIST_FOREACH(parts, index, part) {
4958 pm_interpolated_string_node_append(node, part);
4959 }
4960 }
4961
4962 return node;
4963}
4964
4968static void
4969pm_interpolated_string_node_closing_set(const pm_parser_t *parser, pm_interpolated_string_node_t *node, const pm_token_t *closing) {
4970 node->closing_loc = TOK2LOC(parser, closing);
4971 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
4972}
4973
4974static void
4975pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) {
4976 if (node->parts.size == 0 && node->opening_loc.length == 0) {
4977 PM_NODE_START_SET_NODE(node, part);
4978 }
4979
4980 pm_interpolated_node_append(UP(node), &node->parts, part);
4981
4982 if (PM_NODE_END(part) > PM_NODE_END(node)) {
4983 PM_NODE_LENGTH_SET_NODE(node, part);
4984 }
4985}
4986
4987static void
4988pm_interpolated_symbol_node_closing_loc_set(const pm_parser_t *parser, pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
4989 node->closing_loc = TOK2LOC(parser, closing);
4990 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
4991}
4992
4996static pm_interpolated_symbol_node_t *
4997pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
4998 pm_interpolated_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_symbol_node_t);
4999
5000 uint32_t start = opening == NULL ? 0 : PM_TOKEN_START(parser, opening);
5001 uint32_t end = closing == NULL ? 0 : PM_TOKEN_END(parser, closing);
5002
5003 *node = (pm_interpolated_symbol_node_t) {
5004 .base = PM_NODE_INIT(parser, PM_INTERPOLATED_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL, ((pm_location_t) { .start = start, .length = U32(end - start) })),
5005 .opening_loc = NTOK2LOC(parser, opening),
5006 .closing_loc = NTOK2LOC(parser, closing),
5007 .parts = { 0 }
5008 };
5009
5010 if (parts != NULL) {
5011 pm_node_t *part;
5012 PM_NODE_LIST_FOREACH(parts, index, part) {
5013 pm_interpolated_symbol_node_append(node, part);
5014 }
5015 }
5016
5017 return node;
5018}
5019
5023static pm_interpolated_x_string_node_t *
5024pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5025 pm_interpolated_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_x_string_node_t);
5026
5027 *node = (pm_interpolated_x_string_node_t) {
5028 .base = PM_NODE_INIT(parser, PM_INTERPOLATED_X_STRING_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, opening, closing)),
5029 .opening_loc = TOK2LOC(parser, opening),
5030 .closing_loc = TOK2LOC(parser, closing),
5031 .parts = { 0 }
5032 };
5033
5034 return node;
5035}
5036
5037static inline void
5038pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
5039 pm_interpolated_node_append(UP(node), &node->parts, part);
5040 PM_NODE_LENGTH_SET_NODE(node, part);
5041}
5042
5043static inline void
5044pm_interpolated_xstring_node_closing_set(const pm_parser_t *parser, pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
5045 node->closing_loc = TOK2LOC(parser, closing);
5046 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
5047}
5048
5052static pm_it_local_variable_read_node_t *
5053pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5054 pm_it_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_it_local_variable_read_node_t);
5055
5056 *node = (pm_it_local_variable_read_node_t) {
5057 .base = PM_NODE_INIT(parser, PM_IT_LOCAL_VARIABLE_READ_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, name)),
5058 };
5059
5060 return node;
5061}
5062
5066static pm_it_parameters_node_t *
5067pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5068 pm_it_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_it_parameters_node_t);
5069
5070 *node = (pm_it_parameters_node_t) {
5071 .base = PM_NODE_INIT(parser, PM_IT_PARAMETERS_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, opening, closing)),
5072 };
5073
5074 return node;
5075}
5076
5080static pm_keyword_hash_node_t *
5081pm_keyword_hash_node_create(pm_parser_t *parser) {
5082 pm_keyword_hash_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_hash_node_t);
5083
5084 *node = (pm_keyword_hash_node_t) {
5085 .base = PM_NODE_INIT(parser, PM_KEYWORD_HASH_NODE, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS, PM_LOCATION_INIT_UNSET),
5086 .elements = { 0 }
5087 };
5088
5089 return node;
5090}
5091
5095static void
5096pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
5097 // If the element being added is not an AssocNode or does not have a symbol
5098 // key, then we want to turn the SYMBOL_KEYS flag off.
5099 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
5100 pm_node_flag_unset(UP(hash), PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
5101 }
5102
5103 pm_node_list_append(&hash->elements, element);
5104 if (PM_NODE_LENGTH(hash) == 0) {
5105 PM_NODE_START_SET_NODE(hash, element);
5106 }
5107 PM_NODE_LENGTH_SET_NODE(hash, element);
5108}
5109
5113static pm_required_keyword_parameter_node_t *
5114pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
5115 pm_required_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_keyword_parameter_node_t);
5116
5117 *node = (pm_required_keyword_parameter_node_t) {
5118 .base = PM_NODE_INIT(parser, PM_REQUIRED_KEYWORD_PARAMETER_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, name)),
5119 .name = pm_parser_constant_id_raw(parser, name->start, name->end - 1),
5120 .name_loc = TOK2LOC(parser, name),
5121 };
5122
5123 return node;
5124}
5125
5129static pm_optional_keyword_parameter_node_t *
5130pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
5131 pm_optional_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_keyword_parameter_node_t);
5132
5133 *node = (pm_optional_keyword_parameter_node_t) {
5134 .base = PM_NODE_INIT(parser, PM_OPTIONAL_KEYWORD_PARAMETER_NODE, 0, PM_LOCATION_INIT_TOKEN_NODE(parser, name, value)),
5135 .name = pm_parser_constant_id_raw(parser, name->start, name->end - 1),
5136 .name_loc = TOK2LOC(parser, name),
5137 .value = value
5138 };
5139
5140 return node;
5141}
5142
5146static pm_keyword_rest_parameter_node_t *
5147pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
5148 pm_keyword_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_rest_parameter_node_t);
5149
5150 *node = (pm_keyword_rest_parameter_node_t) {
5151 .base = PM_NODE_INIT(parser, PM_KEYWORD_REST_PARAMETER_NODE, 0, (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name)),
5152 .name = name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
5153 .name_loc = NTOK2LOC(parser, name),
5154 .operator_loc = TOK2LOC(parser, operator)
5155 };
5156
5157 return node;
5158}
5159
5163static pm_lambda_node_t *
5164pm_lambda_node_create(
5165 pm_parser_t *parser,
5166 pm_constant_id_list_t *locals,
5167 const pm_token_t *operator,
5168 const pm_token_t *opening,
5169 const pm_token_t *closing,
5170 pm_node_t *parameters,
5171 pm_node_t *body
5172) {
5173 pm_lambda_node_t *node = PM_NODE_ALLOC(parser, pm_lambda_node_t);
5174
5175 *node = (pm_lambda_node_t) {
5176 .base = PM_NODE_INIT(parser, PM_LAMBDA_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, operator, closing)),
5177 .locals = *locals,
5178 .operator_loc = TOK2LOC(parser, operator),
5179 .opening_loc = TOK2LOC(parser, opening),
5180 .closing_loc = TOK2LOC(parser, closing),
5181 .parameters = parameters,
5182 .body = body
5183 };
5184
5185 return node;
5186}
5187
5191static pm_local_variable_and_write_node_t *
5192pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5193 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5194 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5195 pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t);
5196
5197 *node = (pm_local_variable_and_write_node_t) {
5198 .base = PM_NODE_INIT(parser, PM_LOCAL_VARIABLE_AND_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)),
5199 .name_loc = target->location,
5200 .operator_loc = TOK2LOC(parser, operator),
5201 .value = value,
5202 .name = name,
5203 .depth = depth
5204 };
5205
5206 return node;
5207}
5208
5212static pm_local_variable_operator_write_node_t *
5213pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5214 pm_local_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_operator_write_node_t);
5215
5216 *node = (pm_local_variable_operator_write_node_t) {
5217 .base = PM_NODE_INIT(parser, PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)),
5218 .name_loc = target->location,
5219 .binary_operator_loc = TOK2LOC(parser, operator),
5220 .value = value,
5221 .name = name,
5222 .binary_operator = pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
5223 .depth = depth
5224 };
5225
5226 return node;
5227}
5228
5232static pm_local_variable_or_write_node_t *
5233pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5234 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5235 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5236 pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t);
5237
5238 *node = (pm_local_variable_or_write_node_t) {
5239 .base = PM_NODE_INIT(parser, PM_LOCAL_VARIABLE_OR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)),
5240 .name_loc = target->location,
5241 .operator_loc = TOK2LOC(parser, operator),
5242 .value = value,
5243 .name = name,
5244 .depth = depth
5245 };
5246
5247 return node;
5248}
5249
5253static pm_local_variable_read_node_t *
5254pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
5255 if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
5256
5257 pm_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_read_node_t);
5258
5259 *node = (pm_local_variable_read_node_t) {
5260 .base = PM_NODE_INIT(parser, PM_LOCAL_VARIABLE_READ_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, name)),
5261 .name = name_id,
5262 .depth = depth
5263 };
5264
5265 return node;
5266}
5267
5271static pm_local_variable_read_node_t *
5272pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5273 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5274 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, false);
5275}
5276
5281static pm_local_variable_read_node_t *
5282pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5283 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5284 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, true);
5285}
5286
5290static pm_local_variable_write_node_t *
5291pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
5292 pm_local_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_write_node_t);
5293 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
5294
5295 *node = (pm_local_variable_write_node_t) {
5296 .base = PM_NODE_INIT(parser, PM_LOCAL_VARIABLE_WRITE_NODE, flags, ((pm_location_t) { .start = name_loc->start, .length = PM_NODE_END(value) - name_loc->start })),
5297 .name = name,
5298 .depth = depth,
5299 .value = value,
5300 .name_loc = *name_loc,
5301 .operator_loc = TOK2LOC(parser, operator)
5302 };
5303
5304 return node;
5305}
5306
5310static inline bool
5311pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5312 return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5313}
5314
5319static inline bool
5320pm_token_is_numbered_parameter(const pm_parser_t *parser, uint32_t start, uint32_t length) {
5321 return (
5322 (length == 2) &&
5323 (parser->start[start] == '_') &&
5324 (parser->start[start + 1] != '0') &&
5325 pm_char_is_decimal_digit(parser->start[start + 1])
5326 );
5327}
5328
5333static inline void
5334pm_refute_numbered_parameter(pm_parser_t *parser, uint32_t start, uint32_t length) {
5335 if (pm_token_is_numbered_parameter(parser, start, length)) {
5336 PM_PARSER_ERR_FORMAT(parser, start, length, PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + start);
5337 }
5338}
5339
5344static pm_local_variable_target_node_t *
5345pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
5346 pm_refute_numbered_parameter(parser, location->start, location->length);
5347 pm_local_variable_target_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_target_node_t);
5348
5349 *node = (pm_local_variable_target_node_t) {
5350 .base = PM_NODE_INIT(parser, PM_LOCAL_VARIABLE_TARGET_NODE, 0, ((pm_location_t) { .start = location->start, .length = location->length })),
5351 .name = name,
5352 .depth = depth
5353 };
5354
5355 return node;
5356}
5357
5361static pm_match_predicate_node_t *
5362pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5363 pm_assert_value_expression(parser, value);
5364
5365 pm_match_predicate_node_t *node = PM_NODE_ALLOC(parser, pm_match_predicate_node_t);
5366
5367 *node = (pm_match_predicate_node_t) {
5368 .base = PM_NODE_INIT(parser, PM_MATCH_PREDICATE_NODE, 0, PM_LOCATION_INIT_NODES(value, pattern)),
5369 .value = value,
5370 .pattern = pattern,
5371 .operator_loc = TOK2LOC(parser, operator)
5372 };
5373
5374 return node;
5375}
5376
5380static pm_match_required_node_t *
5381pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5382 pm_assert_value_expression(parser, value);
5383
5384 pm_match_required_node_t *node = PM_NODE_ALLOC(parser, pm_match_required_node_t);
5385
5386 *node = (pm_match_required_node_t) {
5387 .base = PM_NODE_INIT(parser, PM_MATCH_REQUIRED_NODE, 0, PM_LOCATION_INIT_NODES(value, pattern)),
5388 .value = value,
5389 .pattern = pattern,
5390 .operator_loc = TOK2LOC(parser, operator)
5391 };
5392
5393 return node;
5394}
5395
5399static pm_match_write_node_t *
5400pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
5401 pm_match_write_node_t *node = PM_NODE_ALLOC(parser, pm_match_write_node_t);
5402
5403 *node = (pm_match_write_node_t) {
5404 .base = PM_NODE_INIT(parser, PM_MATCH_WRITE_NODE, 0, PM_LOCATION_INIT_NODE(call)),
5405 .call = call,
5406 .targets = { 0 }
5407 };
5408
5409 return node;
5410}
5411
5415static pm_module_node_t *
5416pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
5417 pm_module_node_t *node = PM_NODE_ALLOC(parser, pm_module_node_t);
5418
5419 *node = (pm_module_node_t) {
5420 .base = PM_NODE_INIT(parser, PM_MODULE_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, module_keyword, end_keyword)),
5421 .locals = (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
5422 .module_keyword_loc = TOK2LOC(parser, module_keyword),
5423 .constant_path = constant_path,
5424 .body = body,
5425 .end_keyword_loc = TOK2LOC(parser, end_keyword),
5426 .name = pm_parser_constant_id_token(parser, name)
5427 };
5428
5429 return node;
5430}
5431
5435static pm_multi_target_node_t *
5436pm_multi_target_node_create(pm_parser_t *parser) {
5437 pm_multi_target_node_t *node = PM_NODE_ALLOC(parser, pm_multi_target_node_t);
5438
5439 *node = (pm_multi_target_node_t) {
5440 .base = PM_NODE_INIT(parser, PM_MULTI_TARGET_NODE, 0, PM_LOCATION_INIT_UNSET),
5441 .lefts = { 0 },
5442 .rest = NULL,
5443 .rights = { 0 },
5444 .lparen_loc = { 0 },
5445 .rparen_loc = { 0 }
5446 };
5447
5448 return node;
5449}
5450
5454static void
5455pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
5456 if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
5457 if (node->rest == NULL) {
5458 node->rest = target;
5459 } else {
5460 pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
5461 pm_node_list_append(&node->rights, target);
5462 }
5463 } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
5464 if (node->rest == NULL) {
5465 node->rest = target;
5466 } else {
5467 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
5468 pm_node_list_append(&node->rights, target);
5469 }
5470 } else if (node->rest == NULL) {
5471 pm_node_list_append(&node->lefts, target);
5472 } else {
5473 pm_node_list_append(&node->rights, target);
5474 }
5475
5476 if (PM_NODE_LENGTH(node) == 0 || (PM_NODE_START(node) > PM_NODE_START(target))) {
5477 PM_NODE_START_SET_NODE(node, target);
5478 }
5479
5480 if (PM_NODE_LENGTH(node) == 0 || (PM_NODE_END(node) < PM_NODE_END(target))) {
5481 PM_NODE_LENGTH_SET_NODE(node, target);
5482 }
5483}
5484
5488static void
5489pm_multi_target_node_opening_set(const pm_parser_t *parser, pm_multi_target_node_t *node, const pm_token_t *lparen) {
5490 PM_NODE_START_SET_TOKEN(parser, node, lparen);
5491 PM_NODE_LENGTH_SET_TOKEN(parser, node, lparen);
5492 node->lparen_loc = TOK2LOC(parser, lparen);
5493}
5494
5498static void
5499pm_multi_target_node_closing_set(const pm_parser_t *parser, pm_multi_target_node_t *node, const pm_token_t *rparen) {
5500 PM_NODE_LENGTH_SET_TOKEN(parser, node, rparen);
5501 node->rparen_loc = TOK2LOC(parser, rparen);
5502}
5503
5507static pm_multi_write_node_t *
5508pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5509 pm_multi_write_node_t *node = PM_NODE_ALLOC(parser, pm_multi_write_node_t);
5510 pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
5511
5512 *node = (pm_multi_write_node_t) {
5513 .base = PM_NODE_INIT(parser, PM_MULTI_WRITE_NODE, flags, PM_LOCATION_INIT_NODES(target, value)),
5514 .lefts = target->lefts,
5515 .rest = target->rest,
5516 .rights = target->rights,
5517 .lparen_loc = target->lparen_loc,
5518 .rparen_loc = target->rparen_loc,
5519 .operator_loc = TOK2LOC(parser, operator),
5520 .value = value
5521 };
5522
5523 // Explicitly do not call pm_node_destroy here because we want to keep
5524 // around all of the information within the MultiWriteNode node.
5525 xfree_sized(target, sizeof(pm_multi_target_node_t));
5526
5527 return node;
5528}
5529
5533static pm_next_node_t *
5534pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
5535 assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
5536 pm_next_node_t *node = PM_NODE_ALLOC(parser, pm_next_node_t);
5537
5538 *node = (pm_next_node_t) {
5539 .base = PM_NODE_INIT(parser, PM_NEXT_NODE, 0, (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments)),
5540 .keyword_loc = TOK2LOC(parser, keyword),
5541 .arguments = arguments
5542 };
5543
5544 return node;
5545}
5546
5550static pm_nil_node_t *
5551pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
5552 assert(token->type == PM_TOKEN_KEYWORD_NIL);
5553 pm_nil_node_t *node = PM_NODE_ALLOC(parser, pm_nil_node_t);
5554
5555 *node = (pm_nil_node_t) {
5556 .base = PM_NODE_INIT(parser, PM_NIL_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token))
5557 };
5558
5559 return node;
5560}
5561
5565static pm_no_block_parameter_node_t *
5566pm_no_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
5567 assert(operator->type == PM_TOKEN_AMPERSAND || operator->type == PM_TOKEN_UAMPERSAND);
5568 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
5569 pm_no_block_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_no_block_parameter_node_t);
5570
5571 *node = (pm_no_block_parameter_node_t) {
5572 .base = PM_NODE_INIT(parser, PM_NO_BLOCK_PARAMETER_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, operator, keyword)),
5573 .operator_loc = TOK2LOC(parser, operator),
5574 .keyword_loc = TOK2LOC(parser, keyword)
5575 };
5576
5577 return node;
5578}
5579
5583static pm_no_keywords_parameter_node_t *
5584pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
5585 assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
5586 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
5587 pm_no_keywords_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_no_keywords_parameter_node_t);
5588
5589 *node = (pm_no_keywords_parameter_node_t) {
5590 .base = PM_NODE_INIT(parser, PM_NO_KEYWORDS_PARAMETER_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, operator, keyword)),
5591 .operator_loc = TOK2LOC(parser, operator),
5592 .keyword_loc = TOK2LOC(parser, keyword)
5593 };
5594
5595 return node;
5596}
5597
5601static pm_numbered_parameters_node_t *
5602pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing, uint8_t maximum) {
5603 pm_numbered_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_parameters_node_t);
5604
5605 *node = (pm_numbered_parameters_node_t) {
5606 .base = PM_NODE_INIT(parser, PM_NUMBERED_PARAMETERS_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, opening, closing)),
5607 .maximum = maximum
5608 };
5609
5610 return node;
5611}
5612
5617#define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1))
5618
5625static uint32_t
5626pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) {
5627 const uint8_t *start = token->start + 1;
5628 const uint8_t *end = token->end;
5629
5630 ptrdiff_t diff = end - start;
5631 assert(diff > 0);
5632#if PTRDIFF_MAX > SIZE_MAX
5633 assert(diff < (ptrdiff_t) SIZE_MAX);
5634#endif
5635 size_t length = (size_t) diff;
5636
5637 char *digits = xcalloc(length + 1, sizeof(char));
5638 memcpy(digits, start, length);
5639 digits[length] = '\0';
5640
5641 char *endptr;
5642 errno = 0;
5643 unsigned long value = strtoul(digits, &endptr, 10);
5644
5645 if ((digits == endptr) || (*endptr != '\0')) {
5646 pm_parser_err(parser, U32(start - parser->start), U32(length), PM_ERR_INVALID_NUMBER_DECIMAL);
5647 value = 0;
5648 }
5649
5650 xfree_sized(digits, sizeof(char) * (length + 1));
5651
5652 if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
5653 PM_PARSER_WARN_FORMAT(parser, U32(start - parser->start), U32(length), PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
5654 value = 0;
5655 }
5656
5657 return (uint32_t) value;
5658}
5659
5660#undef NTH_REF_MAX
5661
5665static pm_numbered_reference_read_node_t *
5666pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5667 assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
5668 pm_numbered_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_reference_read_node_t);
5669
5670 *node = (pm_numbered_reference_read_node_t) {
5671 .base = PM_NODE_INIT(parser, PM_NUMBERED_REFERENCE_READ_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, name)),
5672 .number = pm_numbered_reference_read_node_number(parser, name)
5673 };
5674
5675 return node;
5676}
5677
5681static pm_optional_parameter_node_t *
5682pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
5683 pm_optional_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_parameter_node_t);
5684
5685 *node = (pm_optional_parameter_node_t) {
5686 .base = PM_NODE_INIT(parser, PM_OPTIONAL_PARAMETER_NODE, 0, PM_LOCATION_INIT_TOKEN_NODE(parser, name, value)),
5687 .name = pm_parser_constant_id_token(parser, name),
5688 .name_loc = TOK2LOC(parser, name),
5689 .operator_loc = TOK2LOC(parser, operator),
5690 .value = value
5691 };
5692
5693 return node;
5694}
5695
5699static pm_or_node_t *
5700pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
5701 pm_assert_value_expression(parser, left);
5702
5703 pm_or_node_t *node = PM_NODE_ALLOC(parser, pm_or_node_t);
5704
5705 *node = (pm_or_node_t) {
5706 .base = PM_NODE_INIT(parser, PM_OR_NODE, 0, PM_LOCATION_INIT_NODES(left, right)),
5707 .left = left,
5708 .right = right,
5709 .operator_loc = TOK2LOC(parser, operator)
5710 };
5711
5712 return node;
5713}
5714
5718static pm_parameters_node_t *
5719pm_parameters_node_create(pm_parser_t *parser) {
5720 pm_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_parameters_node_t);
5721
5722 *node = (pm_parameters_node_t) {
5723 .base = PM_NODE_INIT(parser, PM_PARAMETERS_NODE, 0, PM_LOCATION_INIT_UNSET),
5724 .rest = NULL,
5725 .keyword_rest = NULL,
5726 .block = NULL,
5727 .requireds = { 0 },
5728 .optionals = { 0 },
5729 .posts = { 0 },
5730 .keywords = { 0 }
5731 };
5732
5733 return node;
5734}
5735
5739static void
5740pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
5741 if ((params->base.location.length == 0) || PM_NODE_START(params) > PM_NODE_START(param)) {
5742 PM_NODE_START_SET_NODE(params, param);
5743 }
5744
5745 if ((params->base.location.length == 0) || (PM_NODE_END(params) < PM_NODE_END(param))) {
5746 PM_NODE_LENGTH_SET_NODE(params, param);
5747 }
5748}
5749
5753static void
5754pm_parameters_node_requireds_append(pm_parameters_node_t *params, pm_node_t *param) {
5755 pm_parameters_node_location_set(params, param);
5756 pm_node_list_append(&params->requireds, param);
5757}
5758
5762static void
5763pm_parameters_node_optionals_append(pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
5764 pm_parameters_node_location_set(params, UP(param));
5765 pm_node_list_append(&params->optionals, UP(param));
5766}
5767
5771static void
5772pm_parameters_node_posts_append(pm_parameters_node_t *params, pm_node_t *param) {
5773 pm_parameters_node_location_set(params, param);
5774 pm_node_list_append(&params->posts, param);
5775}
5776
5780static void
5781pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
5782 pm_parameters_node_location_set(params, param);
5783 params->rest = param;
5784}
5785
5789static void
5790pm_parameters_node_keywords_append(pm_parameters_node_t *params, pm_node_t *param) {
5791 pm_parameters_node_location_set(params, param);
5792 pm_node_list_append(&params->keywords, param);
5793}
5794
5798static void
5799pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
5800 assert(params->keyword_rest == NULL);
5801 pm_parameters_node_location_set(params, param);
5802 params->keyword_rest = param;
5803}
5804
5808static void
5809pm_parameters_node_block_set(pm_parameters_node_t *params, pm_node_t *param) {
5810 assert(params->block == NULL);
5811 pm_parameters_node_location_set(params, param);
5812 params->block = param;
5813}
5814
5818static pm_program_node_t *
5819pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
5820 pm_program_node_t *node = PM_NODE_ALLOC(parser, pm_program_node_t);
5821
5822 *node = (pm_program_node_t) {
5823 .base = PM_NODE_INIT(parser, PM_PROGRAM_NODE, 0, PM_LOCATION_INIT_NODE(statements)),
5824 .locals = *locals,
5825 .statements = statements
5826 };
5827
5828 return node;
5829}
5830
5834static pm_parentheses_node_t *
5835pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing, pm_node_flags_t flags) {
5836 pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t);
5837
5838 *node = (pm_parentheses_node_t) {
5839 .base = PM_NODE_INIT(parser, PM_PARENTHESES_NODE, flags, PM_LOCATION_INIT_TOKENS(parser, opening, closing)),
5840 .body = body,
5841 .opening_loc = TOK2LOC(parser, opening),
5842 .closing_loc = TOK2LOC(parser, closing)
5843 };
5844
5845 return node;
5846}
5847
5851static pm_pinned_expression_node_t *
5852pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
5853 pm_pinned_expression_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_expression_node_t);
5854
5855 *node = (pm_pinned_expression_node_t) {
5856 .base = PM_NODE_INIT(parser, PM_PINNED_EXPRESSION_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, operator, rparen)),
5857 .expression = expression,
5858 .operator_loc = TOK2LOC(parser, operator),
5859 .lparen_loc = TOK2LOC(parser, lparen),
5860 .rparen_loc = TOK2LOC(parser, rparen)
5861 };
5862
5863 return node;
5864}
5865
5869static pm_pinned_variable_node_t *
5870pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
5871 pm_pinned_variable_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_variable_node_t);
5872
5873 *node = (pm_pinned_variable_node_t) {
5874 .base = PM_NODE_INIT(parser, PM_PINNED_VARIABLE_NODE, 0, PM_LOCATION_INIT_TOKEN_NODE(parser, operator, variable)),
5875 .variable = variable,
5876 .operator_loc = TOK2LOC(parser, operator)
5877 };
5878
5879 return node;
5880}
5881
5885static pm_post_execution_node_t *
5886pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
5887 pm_post_execution_node_t *node = PM_NODE_ALLOC(parser, pm_post_execution_node_t);
5888
5889 *node = (pm_post_execution_node_t) {
5890 .base = PM_NODE_INIT(parser, PM_POST_EXECUTION_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, keyword, closing)),
5891 .statements = statements,
5892 .keyword_loc = TOK2LOC(parser, keyword),
5893 .opening_loc = TOK2LOC(parser, opening),
5894 .closing_loc = TOK2LOC(parser, closing)
5895 };
5896
5897 return node;
5898}
5899
5903static pm_pre_execution_node_t *
5904pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
5905 pm_pre_execution_node_t *node = PM_NODE_ALLOC(parser, pm_pre_execution_node_t);
5906
5907 *node = (pm_pre_execution_node_t) {
5908 .base = PM_NODE_INIT(parser, PM_PRE_EXECUTION_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, keyword, closing)),
5909 .statements = statements,
5910 .keyword_loc = TOK2LOC(parser, keyword),
5911 .opening_loc = TOK2LOC(parser, opening),
5912 .closing_loc = TOK2LOC(parser, closing)
5913 };
5914
5915 return node;
5916}
5917
5921static pm_range_node_t *
5922pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
5923 pm_assert_value_expression(parser, left);
5924 pm_assert_value_expression(parser, right);
5925
5926 pm_range_node_t *node = PM_NODE_ALLOC(parser, pm_range_node_t);
5927 pm_node_flags_t flags = 0;
5928
5929 // Indicate that this node is an exclusive range if the operator is `...`.
5930 if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
5931 flags |= PM_RANGE_FLAGS_EXCLUDE_END;
5932 }
5933
5934 // Indicate that this node is a static literal (i.e., can be compiled with
5935 // a putobject in CRuby) if the left and right are implicit nil, explicit
5936 // nil, or integers.
5937 if (
5938 (left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
5939 (right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
5940 ) {
5941 flags |= PM_NODE_FLAG_STATIC_LITERAL;
5942 }
5943
5944 uint32_t start = left == NULL ? PM_TOKEN_START(parser, operator) : PM_NODE_START(left);
5945 uint32_t end = right == NULL ? PM_TOKEN_END(parser, operator) : PM_NODE_END(right);
5946
5947 *node = (pm_range_node_t) {
5948 .base = PM_NODE_INIT(parser, PM_RANGE_NODE, flags, ((pm_location_t) { .start = start, .length = U32(end - start) })),
5949 .left = left,
5950 .right = right,
5951 .operator_loc = TOK2LOC(parser, operator)
5952 };
5953
5954 return node;
5955}
5956
5960static pm_redo_node_t *
5961pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
5962 assert(token->type == PM_TOKEN_KEYWORD_REDO);
5963 pm_redo_node_t *node = PM_NODE_ALLOC(parser, pm_redo_node_t);
5964
5965 *node = (pm_redo_node_t) {
5966 .base = PM_NODE_INIT(parser, PM_REDO_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token))
5967 };
5968
5969 return node;
5970}
5971
5976static pm_regular_expression_node_t *
5977pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
5978 pm_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_regular_expression_node_t);
5979 pm_node_flags_t flags = pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL;
5980
5981 *node = (pm_regular_expression_node_t) {
5982 .base = PM_NODE_INIT(parser, PM_REGULAR_EXPRESSION_NODE, flags, PM_LOCATION_INIT_TOKENS(parser, opening, closing)),
5983 .opening_loc = TOK2LOC(parser, opening),
5984 .content_loc = TOK2LOC(parser, content),
5985 .closing_loc = TOK2LOC(parser, closing),
5986 .unescaped = *unescaped
5987 };
5988
5989 return node;
5990}
5991
5995static inline pm_regular_expression_node_t *
5996pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
5997 return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
5998}
5999
6003static pm_required_parameter_node_t *
6004pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
6005 pm_required_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_parameter_node_t);
6006
6007 *node = (pm_required_parameter_node_t) {
6008 .base = PM_NODE_INIT(parser, PM_REQUIRED_PARAMETER_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token)),
6009 .name = pm_parser_constant_id_token(parser, token)
6010 };
6011
6012 return node;
6013}
6014
6018static pm_rescue_modifier_node_t *
6019pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
6020 pm_rescue_modifier_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_modifier_node_t);
6021
6022 *node = (pm_rescue_modifier_node_t) {
6023 .base = PM_NODE_INIT(parser, PM_RESCUE_MODIFIER_NODE, 0, PM_LOCATION_INIT_NODES(expression, rescue_expression)),
6024 .expression = expression,
6025 .keyword_loc = TOK2LOC(parser, keyword),
6026 .rescue_expression = rescue_expression
6027 };
6028
6029 return node;
6030}
6031
6035static pm_rescue_node_t *
6036pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
6037 pm_rescue_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_node_t);
6038
6039 *node = (pm_rescue_node_t) {
6040 .base = PM_NODE_INIT(parser, PM_RESCUE_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, keyword)),
6041 .keyword_loc = TOK2LOC(parser, keyword),
6042 .operator_loc = { 0 },
6043 .then_keyword_loc = { 0 },
6044 .reference = NULL,
6045 .statements = NULL,
6046 .subsequent = NULL,
6047 .exceptions = { 0 }
6048 };
6049
6050 return node;
6051}
6052
6053static inline void
6054pm_rescue_node_operator_set(const pm_parser_t *parser, pm_rescue_node_t *node, const pm_token_t *operator) {
6055 node->operator_loc = TOK2LOC(parser, operator);
6056}
6057
6061static void
6062pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
6063 node->reference = reference;
6064 PM_NODE_LENGTH_SET_NODE(node, reference);
6065}
6066
6070static void
6071pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
6072 node->statements = statements;
6073 if (pm_statements_node_body_length(statements) > 0) {
6074 PM_NODE_LENGTH_SET_NODE(node, statements);
6075 }
6076}
6077
6081static void
6082pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
6083 node->subsequent = subsequent;
6084 PM_NODE_LENGTH_SET_NODE(node, subsequent);
6085}
6086
6090static void
6091pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) {
6092 pm_node_list_append(&node->exceptions, exception);
6093 PM_NODE_LENGTH_SET_NODE(node, exception);
6094}
6095
6099static pm_rest_parameter_node_t *
6100pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
6101 pm_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_rest_parameter_node_t);
6102
6103 *node = (pm_rest_parameter_node_t) {
6104 .base = PM_NODE_INIT(parser, PM_REST_PARAMETER_NODE, 0, (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name)),
6105 .name = name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
6106 .name_loc = NTOK2LOC(parser, name),
6107 .operator_loc = TOK2LOC(parser, operator)
6108 };
6109
6110 return node;
6111}
6112
6116static pm_retry_node_t *
6117pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
6118 assert(token->type == PM_TOKEN_KEYWORD_RETRY);
6119 pm_retry_node_t *node = PM_NODE_ALLOC(parser, pm_retry_node_t);
6120
6121 *node = (pm_retry_node_t) {
6122 .base = PM_NODE_INIT(parser, PM_RETRY_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token))
6123 };
6124
6125 return node;
6126}
6127
6131static pm_return_node_t *
6132pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6133 pm_return_node_t *node = PM_NODE_ALLOC(parser, pm_return_node_t);
6134
6135 *node = (pm_return_node_t) {
6136 .base = PM_NODE_INIT(parser, PM_RETURN_NODE, 0, (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments)),
6137 .keyword_loc = TOK2LOC(parser, keyword),
6138 .arguments = arguments
6139 };
6140
6141 return node;
6142}
6143
6147static pm_self_node_t *
6148pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
6149 assert(token->type == PM_TOKEN_KEYWORD_SELF);
6150 pm_self_node_t *node = PM_NODE_ALLOC(parser, pm_self_node_t);
6151
6152 *node = (pm_self_node_t) {
6153 .base = PM_NODE_INIT(parser, PM_SELF_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token))
6154 };
6155
6156 return node;
6157}
6158
6162static pm_shareable_constant_node_t *
6163pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
6164 pm_shareable_constant_node_t *node = PM_NODE_ALLOC(parser, pm_shareable_constant_node_t);
6165
6166 *node = (pm_shareable_constant_node_t) {
6167 .base = PM_NODE_INIT(parser, PM_SHAREABLE_CONSTANT_NODE, (pm_node_flags_t) value, PM_LOCATION_INIT_NODE(write)),
6168 .write = write
6169 };
6170
6171 return node;
6172}
6173
6177static pm_singleton_class_node_t *
6178pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
6179 pm_singleton_class_node_t *node = PM_NODE_ALLOC(parser, pm_singleton_class_node_t);
6180
6181 *node = (pm_singleton_class_node_t) {
6182 .base = PM_NODE_INIT(parser, PM_SINGLETON_CLASS_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, class_keyword, end_keyword)),
6183 .locals = *locals,
6184 .class_keyword_loc = TOK2LOC(parser, class_keyword),
6185 .operator_loc = TOK2LOC(parser, operator),
6186 .expression = expression,
6187 .body = body,
6188 .end_keyword_loc = TOK2LOC(parser, end_keyword)
6189 };
6190
6191 return node;
6192}
6193
6197static pm_source_encoding_node_t *
6198pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
6199 assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
6200 pm_source_encoding_node_t *node = PM_NODE_ALLOC(parser, pm_source_encoding_node_t);
6201
6202 *node = (pm_source_encoding_node_t) {
6203 .base = PM_NODE_INIT(parser, PM_SOURCE_ENCODING_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token))
6204 };
6205
6206 return node;
6207}
6208
6212static pm_source_file_node_t*
6213pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
6214 pm_source_file_node_t *node = PM_NODE_ALLOC(parser, pm_source_file_node_t);
6215 assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
6216
6217 pm_node_flags_t flags = 0;
6218
6219 switch (parser->frozen_string_literal) {
6220 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6221 flags |= PM_STRING_FLAGS_MUTABLE;
6222 break;
6223 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6224 flags |= PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6225 break;
6226 }
6227
6228 *node = (pm_source_file_node_t) {
6229 .base = PM_NODE_INIT(parser, PM_SOURCE_FILE_NODE, flags, PM_LOCATION_INIT_TOKEN(parser, file_keyword)),
6230 .filepath = parser->filepath
6231 };
6232
6233 return node;
6234}
6235
6239static pm_source_line_node_t *
6240pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
6241 assert(token->type == PM_TOKEN_KEYWORD___LINE__);
6242 pm_source_line_node_t *node = PM_NODE_ALLOC(parser, pm_source_line_node_t);
6243
6244 *node = (pm_source_line_node_t) {
6245 .base = PM_NODE_INIT(parser, PM_SOURCE_LINE_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token))
6246 };
6247
6248 return node;
6249}
6250
6254static pm_splat_node_t *
6255pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
6256 pm_splat_node_t *node = PM_NODE_ALLOC(parser, pm_splat_node_t);
6257
6258 *node = (pm_splat_node_t) {
6259 .base = PM_NODE_INIT(parser, PM_SPLAT_NODE, 0, (expression == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, expression)),
6260 .operator_loc = TOK2LOC(parser, operator),
6261 .expression = expression
6262 };
6263
6264 return node;
6265}
6266
6270static pm_statements_node_t *
6271pm_statements_node_create(pm_parser_t *parser) {
6272 pm_statements_node_t *node = PM_NODE_ALLOC(parser, pm_statements_node_t);
6273
6274 *node = (pm_statements_node_t) {
6275 .base = PM_NODE_INIT(parser, PM_STATEMENTS_NODE, 0, PM_LOCATION_INIT_UNSET),
6276 .body = { 0 }
6277 };
6278
6279 return node;
6280}
6281
6285static size_t
6286pm_statements_node_body_length(pm_statements_node_t *node) {
6287 return node && node->body.size;
6288}
6289
6294static inline void
6295pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
6296 if (pm_statements_node_body_length(node) == 0 || PM_NODE_START(statement) < PM_NODE_START(node)) {
6297 PM_NODE_START_SET_NODE(node, statement);
6298 }
6299
6300 if (PM_NODE_END(statement) > PM_NODE_END(node)) {
6301 PM_NODE_LENGTH_SET_NODE(node, statement);
6302 }
6303}
6304
6308static void
6309pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline) {
6310 pm_statements_node_body_update(node, statement);
6311
6312 if (node->body.size > 0) {
6313 const pm_node_t *previous = node->body.nodes[node->body.size - 1];
6314
6315 switch (PM_NODE_TYPE(previous)) {
6316 case PM_BREAK_NODE:
6317 case PM_NEXT_NODE:
6318 case PM_REDO_NODE:
6319 case PM_RETRY_NODE:
6320 case PM_RETURN_NODE:
6321 pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
6322 break;
6323 default:
6324 break;
6325 }
6326 }
6327
6328 pm_node_list_append(&node->body, statement);
6329 if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
6330}
6331
6335static void
6336pm_statements_node_body_prepend(pm_statements_node_t *node, pm_node_t *statement) {
6337 pm_statements_node_body_update(node, statement);
6338 pm_node_list_prepend(&node->body, statement);
6339 pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
6340}
6341
6345static inline pm_string_node_t *
6346pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
6347 pm_string_node_t *node = PM_NODE_ALLOC(parser, pm_string_node_t);
6348 pm_node_flags_t flags = 0;
6349
6350 switch (parser->frozen_string_literal) {
6351 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6352 flags = PM_STRING_FLAGS_MUTABLE;
6353 break;
6354 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6355 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6356 break;
6357 }
6358
6359 uint32_t start = PM_TOKEN_START(parser, opening == NULL ? content : opening);
6360 uint32_t end = PM_TOKEN_END(parser, closing == NULL ? content : closing);
6361
6362 *node = (pm_string_node_t) {
6363 .base = PM_NODE_INIT(parser, PM_STRING_NODE, flags, ((pm_location_t) { .start = start, .length = U32(end - start) })),
6364 .opening_loc = NTOK2LOC(parser, opening),
6365 .content_loc = TOK2LOC(parser, content),
6366 .closing_loc = NTOK2LOC(parser, closing),
6367 .unescaped = *string
6368 };
6369
6370 return node;
6371}
6372
6376static pm_string_node_t *
6377pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6378 return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6379}
6380
6385static pm_string_node_t *
6386pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6387 pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
6388 parser->current_string = PM_STRING_EMPTY;
6389 return node;
6390}
6391
6395static pm_super_node_t *
6396pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
6397 assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
6398 pm_super_node_t *node = PM_NODE_ALLOC(parser, pm_super_node_t);
6399
6400 const pm_location_t *end = pm_arguments_end(arguments);
6401 assert(end != NULL && "unreachable");
6402
6403 *node = (pm_super_node_t) {
6404 .base = PM_NODE_INIT(parser, PM_SUPER_NODE, 0, ((pm_location_t) { .start = PM_TOKEN_START(parser, keyword), .length = PM_LOCATION_END(end) - PM_TOKEN_START(parser, keyword) })),
6405 .keyword_loc = TOK2LOC(parser, keyword),
6406 .lparen_loc = arguments->opening_loc,
6407 .arguments = arguments->arguments,
6408 .rparen_loc = arguments->closing_loc,
6409 .block = arguments->block
6410 };
6411
6412 return node;
6413}
6414
6419static bool
6420pm_ascii_only_p(const pm_string_t *contents) {
6421 const size_t length = pm_string_length(contents);
6422 const uint8_t *source = pm_string_source(contents);
6423
6424 for (size_t index = 0; index < length; index++) {
6425 if (source[index] & 0x80) return false;
6426 }
6427
6428 return true;
6429}
6430
6434static void
6435parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
6436 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
6437 size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
6438
6439 if (width == 0) {
6440 pm_parser_err(parser, PM_TOKEN_START(parser, location), PM_TOKEN_LENGTH(location), PM_ERR_INVALID_SYMBOL);
6441 break;
6442 }
6443
6444 cursor += width;
6445 }
6446}
6447
6452static void
6453parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
6454 const pm_encoding_t *encoding = parser->encoding;
6455
6456 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
6457 size_t width = encoding->char_width(cursor, end - cursor);
6458
6459 if (width == 0) {
6460 pm_parser_err(parser, PM_TOKEN_START(parser, location), PM_TOKEN_LENGTH(location), PM_ERR_INVALID_SYMBOL);
6461 break;
6462 }
6463
6464 cursor += width;
6465 }
6466}
6467
6477static inline pm_node_flags_t
6478parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
6479 if (parser->explicit_encoding != NULL) {
6480 // A Symbol may optionally have its encoding explicitly set. This will
6481 // happen if an escape sequence results in a non-ASCII code point.
6482 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
6483 if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
6484 return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
6485 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6486 return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
6487 } else if (validate) {
6488 parse_symbol_encoding_validate_other(parser, location, contents);
6489 }
6490 } else if (pm_ascii_only_p(contents)) {
6491 // Ruby stipulates that all source files must use an ASCII-compatible
6492 // encoding. Thus, all symbols appearing in source are eligible for
6493 // "downgrading" to US-ASCII.
6494 return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
6495 } else if (validate) {
6496 parse_symbol_encoding_validate_other(parser, location, contents);
6497 }
6498
6499 return 0;
6500}
6501
6502static pm_node_flags_t
6503parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding) {
6504 assert ((modifier == 'n' && modifier_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) ||
6505 (modifier == 'u' && modifier_encoding == PM_ENCODING_UTF_8_ENTRY) ||
6506 (modifier == 'e' && modifier_encoding == PM_ENCODING_EUC_JP_ENTRY) ||
6507 (modifier == 's' && modifier_encoding == PM_ENCODING_WINDOWS_31J_ENTRY));
6508
6509 // There's special validation logic used if a string does not contain any character escape sequences.
6510 if (parser->explicit_encoding == NULL) {
6511 // If an ASCII-only string without character escapes is used with an encoding modifier, then resulting Regexp
6512 // has the modifier encoding, unless the ASCII-8BIT modifier is used, in which case the Regexp "downgrades" to
6513 // the US-ASCII encoding.
6514 if (ascii_only) {
6515 return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags;
6516 }
6517
6518 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6519 if (!ascii_only) {
6520 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
6521 }
6522 } else if (parser->encoding != modifier_encoding) {
6523 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name);
6524
6525 if (modifier == 'n' && !ascii_only) {
6526 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source));
6527 }
6528 }
6529
6530 return flags;
6531 }
6532
6533 // TODO (nirvdrum 21-Feb-2024): To validate regexp sources with character escape sequences we need to know whether hex or Unicode escape sequences were used and Prism doesn't currently provide that data. We handle a subset of unambiguous cases in the meanwhile.
6534 bool mixed_encoding = false;
6535
6536 if (mixed_encoding) {
6537 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
6538 } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
6539 // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily.
6540 bool valid_string_in_modifier_encoding = true;
6541
6542 if (!valid_string_in_modifier_encoding) {
6543 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
6544 }
6545 } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
6546 // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now.
6547 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
6548 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source));
6549 }
6550 }
6551
6552 // We've determined the encoding would naturally be EUC-JP and there is no need to force the encoding to anything else.
6553 return flags;
6554}
6555
6562static pm_node_flags_t
6563parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags) {
6564 // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report.
6565 bool valid_unicode_range = true;
6566 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) {
6567 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source));
6568 return flags;
6569 }
6570
6571 // US-ASCII strings do not admit multi-byte character literals. However, character escape sequences corresponding
6572 // to multi-byte characters are allowed.
6573 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) {
6574 // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the
6575 // following error message appearing twice. We do the same for compatibility.
6576 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
6577 }
6578
6587 if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
6588 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY);
6589 }
6590
6591 if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
6592 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY);
6593 }
6594
6595 if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
6596 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY);
6597 }
6598
6599 if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
6600 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY);
6601 }
6602
6603 // At this point no encoding modifiers will be present on the regular expression as they would have already
6604 // been processed. Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all
6605 // regular expressions without an encoding modifier appearing in source are eligible for "downgrading" to US-ASCII.
6606 if (ascii_only) {
6607 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
6608 }
6609
6610 // A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string
6611 // or by specifying a modifier.
6612 //
6613 // NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points.
6614 if (parser->explicit_encoding != NULL) {
6615 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
6616 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
6617 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6618 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
6619 }
6620 }
6621
6622 return 0;
6623}
6624
6629static pm_symbol_node_t *
6630pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
6631 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
6632
6633 uint32_t start = opening == NULL ? PM_TOKEN_START(parser, value) : PM_TOKEN_START(parser, opening);
6634 uint32_t end = closing == NULL ? PM_TOKEN_END(parser, value) : PM_TOKEN_END(parser, closing);
6635
6636 *node = (pm_symbol_node_t) {
6637 .base = PM_NODE_INIT(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL | flags, ((pm_location_t) { .start = start, .length = U32(end - start) })),
6638 .opening_loc = NTOK2LOC(parser, opening),
6639 .value_loc = NTOK2LOC(parser, value),
6640 .closing_loc = NTOK2LOC(parser, closing),
6641 .unescaped = *unescaped
6642 };
6643
6644 return node;
6645}
6646
6650static inline pm_symbol_node_t *
6651pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
6652 return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
6653}
6654
6658static pm_symbol_node_t *
6659pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
6660 pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
6661 parser->current_string = PM_STRING_EMPTY;
6662 return node;
6663}
6664
6668static pm_symbol_node_t *
6669pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
6670 assert(token->type == PM_TOKEN_LABEL);
6671
6672 pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
6673 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
6674 pm_symbol_node_t *node = pm_symbol_node_create(parser, NULL, &label, &closing);
6675
6676 assert((label.end - label.start) >= 0);
6677 pm_string_shared_init(&node->unescaped, label.start, label.end);
6678 pm_node_flag_set(UP(node), parse_symbol_encoding(parser, &label, &node->unescaped, false));
6679
6680 return node;
6681}
6682
6686static pm_symbol_node_t *
6687pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
6688 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
6689
6690 *node = (pm_symbol_node_t) {
6691 .base = PM_NODE_INIT(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING, PM_LOCATION_INIT_UNSET),
6692 .value_loc = { 0 },
6693 .unescaped = { 0 }
6694 };
6695
6696 pm_string_constant_init(&node->unescaped, content, strlen(content));
6697 return node;
6698}
6699
6703static bool
6704pm_symbol_node_label_p(const pm_parser_t *parser, const pm_node_t *node) {
6705 const pm_location_t *location = NULL;
6706
6707 switch (PM_NODE_TYPE(node)) {
6708 case PM_SYMBOL_NODE: {
6709 const pm_symbol_node_t *cast = (pm_symbol_node_t *) node;
6710 if (cast->closing_loc.length > 0) {
6711 location = &cast->closing_loc;
6712 }
6713 break;
6714 }
6715 case PM_INTERPOLATED_SYMBOL_NODE: {
6716 const pm_interpolated_symbol_node_t *cast = (pm_interpolated_symbol_node_t *) node;
6717 if (cast->closing_loc.length > 0) {
6718 location = &cast->closing_loc;
6719 }
6720 break;
6721 }
6722 default:
6723 return false;
6724 }
6725
6726 return (location != NULL) && (parser->start[PM_LOCATION_END(location) - 1] == ':');
6727}
6728
6732static pm_symbol_node_t *
6733pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
6734 pm_symbol_node_t *new_node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
6735
6736 *new_node = (pm_symbol_node_t) {
6737 .base = PM_NODE_INIT(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKENS(parser, opening, closing)),
6738 .opening_loc = TOK2LOC(parser, opening),
6739 .value_loc = node->content_loc,
6740 .closing_loc = TOK2LOC(parser, closing),
6741 .unescaped = node->unescaped
6742 };
6743
6744 pm_token_t content = {
6745 .type = PM_TOKEN_IDENTIFIER,
6746 .start = parser->start + node->content_loc.start,
6747 .end = parser->start + node->content_loc.start + node->content_loc.length
6748 };
6749
6750 pm_node_flag_set(UP(new_node), parse_symbol_encoding(parser, &content, &node->unescaped, true));
6751
6752 // We are explicitly _not_ using pm_node_destroy here because we don't want
6753 // to trash the unescaped string. We could instead copy the string if we
6754 // know that it is owned, but we're taking the fast path for now.
6755 xfree_sized(node, sizeof(pm_string_node_t));
6756
6757 return new_node;
6758}
6759
6763static pm_string_node_t *
6764pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
6765 pm_string_node_t *new_node = PM_NODE_ALLOC(parser, pm_string_node_t);
6766 pm_node_flags_t flags = 0;
6767
6768 switch (parser->frozen_string_literal) {
6769 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6770 flags = PM_STRING_FLAGS_MUTABLE;
6771 break;
6772 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6773 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6774 break;
6775 }
6776
6777 *new_node = (pm_string_node_t) {
6778 .base = PM_NODE_INIT(parser, PM_STRING_NODE, flags, PM_LOCATION_INIT_NODE(node)),
6779 .opening_loc = node->opening_loc,
6780 .content_loc = node->value_loc,
6781 .closing_loc = node->closing_loc,
6782 .unescaped = node->unescaped
6783 };
6784
6785 // We are explicitly _not_ using pm_node_destroy here because we don't want
6786 // to trash the unescaped string. We could instead copy the string if we
6787 // know that it is owned, but we're taking the fast path for now.
6788 xfree_sized(node, sizeof(pm_symbol_node_t));
6789
6790 return new_node;
6791}
6792
6796static pm_true_node_t *
6797pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
6798 assert(token->type == PM_TOKEN_KEYWORD_TRUE);
6799 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
6800
6801 *node = (pm_true_node_t) {
6802 .base = PM_NODE_INIT(parser, PM_TRUE_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token))
6803 };
6804
6805 return node;
6806}
6807
6811static pm_true_node_t *
6812pm_true_node_synthesized_create(pm_parser_t *parser) {
6813 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
6814
6815 *node = (pm_true_node_t) {
6816 .base = PM_NODE_INIT(parser, PM_TRUE_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_UNSET)
6817 };
6818
6819 return node;
6820}
6821
6825static pm_undef_node_t *
6826pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
6827 assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
6828 pm_undef_node_t *node = PM_NODE_ALLOC(parser, pm_undef_node_t);
6829
6830 *node = (pm_undef_node_t) {
6831 .base = PM_NODE_INIT(parser, PM_UNDEF_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token)),
6832 .keyword_loc = TOK2LOC(parser, token),
6833 .names = { 0 }
6834 };
6835
6836 return node;
6837}
6838
6842static void
6843pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
6844 PM_NODE_LENGTH_SET_NODE(node, name);
6845 pm_node_list_append(&node->names, name);
6846}
6847
6851static pm_unless_node_t *
6852pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
6853 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6854
6855 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
6856 pm_node_t *end = statements == NULL ? predicate : UP(statements);
6857
6858 *node = (pm_unless_node_t) {
6859 .base = PM_NODE_INIT(parser, PM_UNLESS_NODE, PM_NODE_FLAG_NEWLINE, PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, end)),
6860 .keyword_loc = TOK2LOC(parser, keyword),
6861 .predicate = predicate,
6862 .then_keyword_loc = NTOK2LOC(parser, then_keyword),
6863 .statements = statements,
6864 .else_clause = NULL,
6865 .end_keyword_loc = { 0 }
6866 };
6867
6868 return node;
6869}
6870
6874static pm_unless_node_t *
6875pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
6876 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6877 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
6878
6879 pm_statements_node_t *statements = pm_statements_node_create(parser);
6880 pm_statements_node_body_append(parser, statements, statement, true);
6881
6882 *node = (pm_unless_node_t) {
6883 .base = PM_NODE_INIT(parser, PM_UNLESS_NODE, PM_NODE_FLAG_NEWLINE, PM_LOCATION_INIT_NODES(statement, predicate)),
6884 .keyword_loc = TOK2LOC(parser, unless_keyword),
6885 .predicate = predicate,
6886 .then_keyword_loc = { 0 },
6887 .statements = statements,
6888 .else_clause = NULL,
6889 .end_keyword_loc = { 0 }
6890 };
6891
6892 return node;
6893}
6894
6895static inline void
6896pm_unless_node_end_keyword_loc_set(const pm_parser_t *parser, pm_unless_node_t *node, const pm_token_t *end_keyword) {
6897 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
6898 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
6899}
6900
6906static void
6907pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statements) {
6908 assert(parser->current_block_exits != NULL);
6909
6910 // All of the block exits that we want to remove should be within the
6911 // statements, and since we are modifying the statements, we shouldn't have
6912 // to check the end location.
6913 uint32_t start = statements->base.location.start;
6914
6915 for (size_t index = parser->current_block_exits->size; index > 0; index--) {
6916 pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
6917 if (block_exit->location.start < start) break;
6918
6919 // Implicitly remove from the list by lowering the size.
6920 parser->current_block_exits->size--;
6921 }
6922}
6923
6927static pm_until_node_t *
6928pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6929 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
6930 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6931
6932 *node = (pm_until_node_t) {
6933 .base = PM_NODE_INIT(parser, PM_UNTIL_NODE, flags, PM_LOCATION_INIT_TOKENS(parser, keyword, closing)),
6934 .keyword_loc = TOK2LOC(parser, keyword),
6935 .do_keyword_loc = NTOK2LOC(parser, do_keyword),
6936 .closing_loc = TOK2LOC(parser, closing),
6937 .predicate = predicate,
6938 .statements = statements
6939 };
6940
6941 return node;
6942}
6943
6947static pm_until_node_t *
6948pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6949 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
6950 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6951 pm_loop_modifier_block_exits(parser, statements);
6952
6953 *node = (pm_until_node_t) {
6954 .base = PM_NODE_INIT(parser, PM_UNTIL_NODE, flags, PM_LOCATION_INIT_NODES(statements, predicate)),
6955 .keyword_loc = TOK2LOC(parser, keyword),
6956 .do_keyword_loc = { 0 },
6957 .closing_loc = { 0 },
6958 .predicate = predicate,
6959 .statements = statements
6960 };
6961
6962 return node;
6963}
6964
6968static pm_when_node_t *
6969pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
6970 pm_when_node_t *node = PM_NODE_ALLOC(parser, pm_when_node_t);
6971
6972 *node = (pm_when_node_t) {
6973 .base = PM_NODE_INIT(parser, PM_WHEN_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, keyword)),
6974 .keyword_loc = TOK2LOC(parser, keyword),
6975 .statements = NULL,
6976 .then_keyword_loc = { 0 },
6977 .conditions = { 0 }
6978 };
6979
6980 return node;
6981}
6982
6986static void
6987pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) {
6988 PM_NODE_LENGTH_SET_NODE(node, condition);
6989 pm_node_list_append(&node->conditions, condition);
6990}
6991
6995static inline void
6996pm_when_node_then_keyword_loc_set(const pm_parser_t *parser, pm_when_node_t *node, const pm_token_t *then_keyword) {
6997 PM_NODE_LENGTH_SET_TOKEN(parser, node, then_keyword);
6998 node->then_keyword_loc = TOK2LOC(parser, then_keyword);
6999}
7000
7004static void
7005pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
7006 if (PM_NODE_END(statements) > PM_NODE_END(node)) {
7007 PM_NODE_LENGTH_SET_NODE(node, statements);
7008 }
7009
7010 node->statements = statements;
7011}
7012
7016static pm_while_node_t *
7017pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7018 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7019 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7020
7021 *node = (pm_while_node_t) {
7022 .base = PM_NODE_INIT(parser, PM_WHILE_NODE, flags, PM_LOCATION_INIT_TOKENS(parser, keyword, closing)),
7023 .keyword_loc = TOK2LOC(parser, keyword),
7024 .do_keyword_loc = NTOK2LOC(parser, do_keyword),
7025 .closing_loc = TOK2LOC(parser, closing),
7026 .predicate = predicate,
7027 .statements = statements
7028 };
7029
7030 return node;
7031}
7032
7036static pm_while_node_t *
7037pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7038 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7039 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7040 pm_loop_modifier_block_exits(parser, statements);
7041
7042 *node = (pm_while_node_t) {
7043 .base = PM_NODE_INIT(parser, PM_WHILE_NODE, flags, PM_LOCATION_INIT_NODES(statements, predicate)),
7044 .keyword_loc = TOK2LOC(parser, keyword),
7045 .do_keyword_loc = { 0 },
7046 .closing_loc = { 0 },
7047 .predicate = predicate,
7048 .statements = statements
7049 };
7050
7051 return node;
7052}
7053
7057static pm_while_node_t *
7058pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
7059 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7060
7061 *node = (pm_while_node_t) {
7062 .base = PM_NODE_INIT(parser, PM_WHILE_NODE, 0, PM_LOCATION_INIT_UNSET),
7063 .keyword_loc = { 0 },
7064 .do_keyword_loc = { 0 },
7065 .closing_loc = { 0 },
7066 .predicate = predicate,
7067 .statements = statements
7068 };
7069
7070 return node;
7071}
7072
7077static pm_x_string_node_t *
7078pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
7079 pm_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_x_string_node_t);
7080
7081 *node = (pm_x_string_node_t) {
7082 .base = PM_NODE_INIT(parser, PM_X_STRING_NODE, PM_STRING_FLAGS_FROZEN, PM_LOCATION_INIT_TOKENS(parser, opening, closing)),
7083 .opening_loc = TOK2LOC(parser, opening),
7084 .content_loc = TOK2LOC(parser, content),
7085 .closing_loc = TOK2LOC(parser, closing),
7086 .unescaped = *unescaped
7087 };
7088
7089 return node;
7090}
7091
7095static inline pm_x_string_node_t *
7096pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7097 return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7098}
7099
7103static pm_yield_node_t *
7104pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
7105 pm_yield_node_t *node = PM_NODE_ALLOC(parser, pm_yield_node_t);
7106
7107 uint32_t start = PM_TOKEN_START(parser, keyword);
7108 uint32_t end;
7109
7110 if (rparen_loc->length > 0) {
7111 end = PM_LOCATION_END(rparen_loc);
7112 } else if (arguments != NULL) {
7113 end = PM_NODE_END(arguments);
7114 } else if (lparen_loc->length > 0) {
7115 end = PM_LOCATION_END(lparen_loc);
7116 } else {
7117 end = PM_TOKEN_END(parser, keyword);
7118 }
7119
7120 *node = (pm_yield_node_t) {
7121 .base = PM_NODE_INIT(parser, PM_YIELD_NODE, 0, ((pm_location_t) { .start = start, .length = U32(end - start) })),
7122 .keyword_loc = TOK2LOC(parser, keyword),
7123 .lparen_loc = *lparen_loc,
7124 .arguments = arguments,
7125 .rparen_loc = *rparen_loc
7126 };
7127
7128 return node;
7129}
7130
7135static int
7136pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
7137 pm_scope_t *scope = parser->current_scope;
7138 int depth = 0;
7139
7140 while (scope != NULL) {
7141 if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
7142 if (scope->closed) break;
7143
7144 scope = scope->previous;
7145 depth++;
7146 }
7147
7148 return -1;
7149}
7150
7156static inline int
7157pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
7158 return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
7159}
7160
7164static inline void
7165pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7166 pm_locals_write(&parser->current_scope->locals, constant_id, U32(start - parser->start), U32(end - start), reads);
7167}
7168
7172static pm_constant_id_t
7173pm_parser_local_add_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7174 pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, start, end);
7175 if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
7176 return constant_id;
7177}
7178
7182static inline pm_constant_id_t
7183pm_parser_local_add_location(pm_parser_t *parser, pm_location_t *location, uint32_t reads) {
7184 return pm_parser_local_add_raw(parser, parser->start + location->start, parser->start + location->start + location->length, reads);
7185}
7186
7190static inline pm_constant_id_t
7191pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
7192 return pm_parser_local_add_raw(parser, token->start, token->end, reads);
7193}
7194
7198static pm_constant_id_t
7199pm_parser_local_add_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
7200 pm_constant_id_t constant_id = pm_parser_constant_id_owned(parser, start, length);
7201 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
7202 return constant_id;
7203}
7204
7208static pm_constant_id_t
7209pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
7210 pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
7211 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
7212 return constant_id;
7213}
7214
7222static bool
7223pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
7224 // We want to check whether the parameter name is a numbered parameter or
7225 // not.
7226 pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, name), PM_TOKEN_LENGTH(name));
7227
7228 // Otherwise we'll fetch the constant id for the parameter name and check
7229 // whether it's already in the current scope.
7230 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
7231
7232 if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
7233 // Add an error if the parameter doesn't start with _ and has been seen before
7234 if ((name->start < name->end) && (*name->start != '_')) {
7235 pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
7236 }
7237 return true;
7238 }
7239 return false;
7240}
7241
7245static void
7246pm_parser_scope_pop(pm_parser_t *parser) {
7247 pm_scope_t *scope = parser->current_scope;
7248 parser->current_scope = scope->previous;
7249 pm_locals_free(&scope->locals);
7250 pm_node_list_free(&scope->implicit_parameters);
7251 xfree_sized(scope, sizeof(pm_scope_t));
7252}
7253
7254/******************************************************************************/
7255/* Stack helpers */
7256/******************************************************************************/
7257
7261static inline void
7262pm_state_stack_push(pm_state_stack_t *stack, bool value) {
7263 *stack = (*stack << 1) | (value & 1);
7264}
7265
7269static inline void
7270pm_state_stack_pop(pm_state_stack_t *stack) {
7271 *stack >>= 1;
7272}
7273
7277static inline bool
7278pm_state_stack_p(const pm_state_stack_t *stack) {
7279 return *stack & 1;
7280}
7281
7282static inline void
7283pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
7284 // Use the negation of the value to prevent stack overflow.
7285 pm_state_stack_push(&parser->accepts_block_stack, !value);
7286}
7287
7288static inline void
7289pm_accepts_block_stack_pop(pm_parser_t *parser) {
7290 pm_state_stack_pop(&parser->accepts_block_stack);
7291}
7292
7293static inline bool
7294pm_accepts_block_stack_p(pm_parser_t *parser) {
7295 return !pm_state_stack_p(&parser->accepts_block_stack);
7296}
7297
7298static inline void
7299pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
7300 pm_state_stack_push(&parser->do_loop_stack, value);
7301}
7302
7303static inline void
7304pm_do_loop_stack_pop(pm_parser_t *parser) {
7305 pm_state_stack_pop(&parser->do_loop_stack);
7306}
7307
7308static inline bool
7309pm_do_loop_stack_p(pm_parser_t *parser) {
7310 return pm_state_stack_p(&parser->do_loop_stack);
7311}
7312
7313/******************************************************************************/
7314/* Lexer check helpers */
7315/******************************************************************************/
7316
7321static inline uint8_t
7322peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
7323 if (cursor < parser->end) {
7324 return *cursor;
7325 } else {
7326 return '\0';
7327 }
7328}
7329
7335static inline uint8_t
7336peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
7337 return peek_at(parser, parser->current.end + offset);
7338}
7339
7344static inline uint8_t
7345peek(const pm_parser_t *parser) {
7346 return peek_at(parser, parser->current.end);
7347}
7348
7353static inline bool
7354match(pm_parser_t *parser, uint8_t value) {
7355 if (peek(parser) == value) {
7356 parser->current.end++;
7357 return true;
7358 }
7359 return false;
7360}
7361
7366static inline size_t
7367match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
7368 if (peek_at(parser, cursor) == '\n') {
7369 return 1;
7370 }
7371 if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
7372 return 2;
7373 }
7374 return 0;
7375}
7376
7382static inline size_t
7383match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
7384 return match_eol_at(parser, parser->current.end + offset);
7385}
7386
7392static inline size_t
7393match_eol(pm_parser_t *parser) {
7394 return match_eol_at(parser, parser->current.end);
7395}
7396
7400static inline const uint8_t *
7401next_newline(const uint8_t *cursor, ptrdiff_t length) {
7402 assert(length >= 0);
7403
7404 // Note that it's okay for us to use memchr here to look for \n because none
7405 // of the encodings that we support have \n as a component of a multi-byte
7406 // character.
7407 return memchr(cursor, '\n', (size_t) length);
7408}
7409
7413static inline bool
7414ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
7415 return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
7416}
7417
7422static bool
7423parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
7424 const pm_encoding_t *encoding = pm_encoding_find(start, end);
7425
7426 if (encoding != NULL) {
7427 if (parser->encoding != encoding) {
7428 parser->encoding = encoding;
7429 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
7430 }
7431
7432 parser->encoding_changed = (encoding != PM_ENCODING_UTF_8_ENTRY);
7433 return true;
7434 }
7435
7436 return false;
7437}
7438
7443static void
7444parser_lex_magic_comment_encoding(pm_parser_t *parser) {
7445 const uint8_t *cursor = parser->current.start + 1;
7446 const uint8_t *end = parser->current.end;
7447
7448 bool separator = false;
7449 while (true) {
7450 if (end - cursor <= 6) return;
7451 switch (cursor[6]) {
7452 case 'C': case 'c': cursor += 6; continue;
7453 case 'O': case 'o': cursor += 5; continue;
7454 case 'D': case 'd': cursor += 4; continue;
7455 case 'I': case 'i': cursor += 3; continue;
7456 case 'N': case 'n': cursor += 2; continue;
7457 case 'G': case 'g': cursor += 1; continue;
7458 case '=': case ':':
7459 separator = true;
7460 cursor += 6;
7461 break;
7462 default:
7463 cursor += 6;
7464 if (pm_char_is_whitespace(*cursor)) break;
7465 continue;
7466 }
7467 if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
7468 separator = false;
7469 }
7470
7471 while (true) {
7472 do {
7473 if (++cursor >= end) return;
7474 } while (pm_char_is_whitespace(*cursor));
7475
7476 if (separator) break;
7477 if (*cursor != '=' && *cursor != ':') return;
7478
7479 separator = true;
7480 cursor++;
7481 }
7482
7483 const uint8_t *value_start = cursor;
7484 while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
7485
7486 if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
7487 // If we were unable to parse the encoding value, then we've got an
7488 // issue because we didn't understand the encoding that the user was
7489 // trying to use. In this case we'll keep using the default encoding but
7490 // add an error to the parser to indicate an unsuccessful parse.
7491 pm_parser_err(parser, U32(value_start - parser->start), U32(cursor - value_start), PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
7492 }
7493}
7494
7495typedef enum {
7496 PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE,
7497 PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE,
7498 PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID
7499} pm_magic_comment_boolean_value_t;
7500
7505static pm_magic_comment_boolean_value_t
7506parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t value_length) {
7507 if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "true", 4) == 0) {
7508 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE;
7509 } else if (value_length == 5 && pm_strncasecmp(value_start, (const uint8_t *) "false", 5) == 0) {
7510 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE;
7511 } else {
7512 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID;
7513 }
7514}
7515
7516static inline bool
7517pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
7518 return b == '\'' || b == '"' || b == ':' || b == ';';
7519}
7520
7526static inline const uint8_t *
7527parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
7528 while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
7529 if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
7530 return cursor;
7531 }
7532 cursor++;
7533 }
7534 return NULL;
7535}
7536
7547static inline bool
7548parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
7549 bool result = true;
7550
7551 const uint8_t *start = parser->current.start + 1;
7552 const uint8_t *end = parser->current.end;
7553 if (end - start <= 7) return false;
7554
7555 const uint8_t *cursor;
7556 bool indicator = false;
7557
7558 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
7559 start = cursor + 3;
7560
7561 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
7562 end = cursor;
7563 indicator = true;
7564 } else {
7565 // If we have a start marker but not an end marker, then we cannot
7566 // have a magic comment.
7567 return false;
7568 }
7569 }
7570
7571 cursor = start;
7572 while (cursor < end) {
7573 while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
7574
7575 const uint8_t *key_start = cursor;
7576 while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
7577
7578 const uint8_t *key_end = cursor;
7579 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7580 if (cursor == end) break;
7581
7582 if (*cursor == ':') {
7583 cursor++;
7584 } else {
7585 if (!indicator) return false;
7586 continue;
7587 }
7588
7589 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7590 if (cursor == end) break;
7591
7592 const uint8_t *value_start;
7593 const uint8_t *value_end;
7594
7595 if (*cursor == '"') {
7596 value_start = ++cursor;
7597 for (; cursor < end && *cursor != '"'; cursor++) {
7598 if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
7599 }
7600 value_end = cursor;
7601 if (cursor < end && *cursor == '"') cursor++;
7602 } else {
7603 value_start = cursor;
7604 while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
7605 value_end = cursor;
7606 }
7607
7608 if (indicator) {
7609 while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
7610 } else {
7611 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7612 if (cursor != end) return false;
7613 }
7614
7615 // Here, we need to do some processing on the key to swap out dashes for
7616 // underscores. We only need to do this if there _is_ a dash in the key.
7617 pm_string_t key;
7618 const size_t key_length = (size_t) (key_end - key_start);
7619 const uint8_t *dash = pm_memchr(key_start, '-', key_length, parser->encoding_changed, parser->encoding);
7620
7621 if (dash == NULL) {
7622 pm_string_shared_init(&key, key_start, key_end);
7623 } else {
7624 uint8_t *buffer = xmalloc(key_length);
7625 if (buffer == NULL) break;
7626
7627 memcpy(buffer, key_start, key_length);
7628 buffer[dash - key_start] = '_';
7629
7630 while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
7631 buffer[dash - key_start] = '_';
7632 }
7633
7634 pm_string_owned_init(&key, buffer, key_length);
7635 }
7636
7637 // Finally, we can start checking the key against the list of known
7638 // magic comment keys, and potentially change state based on that.
7639 const uint8_t *key_source = pm_string_source(&key);
7640 uint32_t value_length = (uint32_t) (value_end - value_start);
7641
7642 // We only want to attempt to compare against encoding comments if it's
7643 // the first line in the file (or the second in the case of a shebang).
7644 if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
7645 if (
7646 (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
7647 (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
7648 ) {
7649 result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
7650 }
7651 }
7652
7653 if (key_length == 11) {
7654 if (pm_strncasecmp(key_source, (const uint8_t *) "warn_indent", 11) == 0) {
7655 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
7656 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
7657 PM_PARSER_WARN_TOKEN_FORMAT(
7658 parser,
7659 &parser->current,
7660 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7661 (int) key_length,
7662 (const char *) key_source,
7663 (int) value_length,
7664 (const char *) value_start
7665 );
7666 break;
7667 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
7668 parser->warn_mismatched_indentation = false;
7669 break;
7670 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
7671 parser->warn_mismatched_indentation = true;
7672 break;
7673 }
7674 }
7675 } else if (key_length == 21) {
7676 if (pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
7677 // We only want to handle frozen string literal comments if it's
7678 // before any semantic tokens have been seen.
7679 if (semantic_token_seen) {
7680 pm_parser_warn_token(parser, &parser->current, PM_WARN_IGNORED_FROZEN_STRING_LITERAL);
7681 } else {
7682 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
7683 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
7684 PM_PARSER_WARN_TOKEN_FORMAT(
7685 parser,
7686 &parser->current,
7687 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7688 (int) key_length,
7689 (const char *) key_source,
7690 (int) value_length,
7691 (const char *) value_start
7692 );
7693 break;
7694 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
7696 break;
7697 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
7699 break;
7700 }
7701 }
7702 }
7703 } else if (key_length == 24) {
7704 if (pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
7705 const uint8_t *cursor = parser->current.start;
7706 while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
7707
7708 if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
7709 pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
7710 } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
7711 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
7712 } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
7713 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
7714 } else if (value_length == 23 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_everything", 23) == 0) {
7715 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING);
7716 } else if (value_length == 17 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_copy", 17) == 0) {
7717 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY);
7718 } else {
7719 PM_PARSER_WARN_TOKEN_FORMAT(
7720 parser,
7721 &parser->current,
7722 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7723 (int) key_length,
7724 (const char *) key_source,
7725 (int) value_length,
7726 (const char *) value_start
7727 );
7728 }
7729 }
7730 }
7731
7732 // When we're done, we want to free the string in case we had to
7733 // allocate memory for it.
7734 pm_string_free(&key);
7735
7736 // Allocate a new magic comment node to append to the parser's list.
7738 if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) {
7739 magic_comment->key = (pm_location_t) { .start = U32(key_start - parser->start), .length = U32(key_length) };
7740 magic_comment->value = (pm_location_t) { .start = U32(value_start - parser->start), .length = value_length };
7741 pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
7742 }
7743 }
7744
7745 return result;
7746}
7747
7748/******************************************************************************/
7749/* Context manipulations */
7750/******************************************************************************/
7751
7752static const uint32_t context_terminators[] = {
7753 [PM_CONTEXT_NONE] = 0,
7754 [PM_CONTEXT_BEGIN] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7755 [PM_CONTEXT_BEGIN_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7756 [PM_CONTEXT_BEGIN_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7757 [PM_CONTEXT_BEGIN_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7758 [PM_CONTEXT_BLOCK_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
7759 [PM_CONTEXT_BLOCK_KEYWORDS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7760 [PM_CONTEXT_BLOCK_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7761 [PM_CONTEXT_BLOCK_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7762 [PM_CONTEXT_BLOCK_PARAMETERS] = (1U << PM_TOKEN_PIPE),
7763 [PM_CONTEXT_BLOCK_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7764 [PM_CONTEXT_CASE_WHEN] = (1U << PM_TOKEN_KEYWORD_WHEN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
7765 [PM_CONTEXT_CASE_IN] = (1U << PM_TOKEN_KEYWORD_IN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
7766 [PM_CONTEXT_CLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7767 [PM_CONTEXT_CLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7768 [PM_CONTEXT_CLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7769 [PM_CONTEXT_CLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7770 [PM_CONTEXT_DEF] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7771 [PM_CONTEXT_DEF_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7772 [PM_CONTEXT_DEF_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7773 [PM_CONTEXT_DEF_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7774 [PM_CONTEXT_DEF_PARAMS] = (1U << PM_TOKEN_EOF),
7775 [PM_CONTEXT_DEFINED] = (1U << PM_TOKEN_EOF),
7776 [PM_CONTEXT_DEFAULT_PARAMS] = (1U << PM_TOKEN_COMMA) | (1U << PM_TOKEN_PARENTHESIS_RIGHT),
7777 [PM_CONTEXT_ELSE] = (1U << PM_TOKEN_KEYWORD_END),
7778 [PM_CONTEXT_ELSIF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
7779 [PM_CONTEXT_EMBEXPR] = (1U << PM_TOKEN_EMBEXPR_END),
7780 [PM_CONTEXT_FOR] = (1U << PM_TOKEN_KEYWORD_END),
7781 [PM_CONTEXT_FOR_INDEX] = (1U << PM_TOKEN_KEYWORD_IN),
7782 [PM_CONTEXT_IF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
7783 [PM_CONTEXT_LAMBDA_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
7784 [PM_CONTEXT_LAMBDA_DO_END] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7785 [PM_CONTEXT_LAMBDA_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7786 [PM_CONTEXT_LAMBDA_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7787 [PM_CONTEXT_LAMBDA_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7788 [PM_CONTEXT_LOOP_PREDICATE] = (1U << PM_TOKEN_KEYWORD_DO) | (1U << PM_TOKEN_KEYWORD_THEN),
7789 [PM_CONTEXT_MAIN] = (1U << PM_TOKEN_EOF),
7790 [PM_CONTEXT_MODULE] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7791 [PM_CONTEXT_MODULE_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7792 [PM_CONTEXT_MODULE_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7793 [PM_CONTEXT_MODULE_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7794 [PM_CONTEXT_MULTI_TARGET] = (1U << PM_TOKEN_EOF),
7795 [PM_CONTEXT_PARENS] = (1U << PM_TOKEN_PARENTHESIS_RIGHT),
7796 [PM_CONTEXT_POSTEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
7797 [PM_CONTEXT_PREDICATE] = (1U << PM_TOKEN_KEYWORD_THEN) | (1U << PM_TOKEN_NEWLINE) | (1U << PM_TOKEN_SEMICOLON),
7798 [PM_CONTEXT_PREEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
7799 [PM_CONTEXT_RESCUE_MODIFIER] = (1U << PM_TOKEN_EOF),
7800 [PM_CONTEXT_SCLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7801 [PM_CONTEXT_SCLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7802 [PM_CONTEXT_SCLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7803 [PM_CONTEXT_SCLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7804 [PM_CONTEXT_TERNARY] = (1U << PM_TOKEN_EOF),
7805 [PM_CONTEXT_UNLESS] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7806 [PM_CONTEXT_UNTIL] = (1U << PM_TOKEN_KEYWORD_END),
7807 [PM_CONTEXT_WHILE] = (1U << PM_TOKEN_KEYWORD_END),
7808};
7809
7810static inline bool
7811context_terminator(pm_context_t context, pm_token_t *token) {
7812 return token->type < 32 && (context_terminators[context] & (1U << token->type));
7813}
7814
7819static pm_context_t
7820context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
7821 pm_context_node_t *context_node = parser->current_context;
7822
7823 while (context_node != NULL) {
7824 if (context_terminator(context_node->context, token)) return context_node->context;
7825 context_node = context_node->prev;
7826 }
7827
7828 return PM_CONTEXT_NONE;
7829}
7830
7831static bool
7832context_push(pm_parser_t *parser, pm_context_t context) {
7833 pm_context_node_t *context_node = (pm_context_node_t *) xmalloc(sizeof(pm_context_node_t));
7834 if (context_node == NULL) return false;
7835
7836 *context_node = (pm_context_node_t) { .context = context, .prev = NULL };
7837
7838 if (parser->current_context == NULL) {
7839 parser->current_context = context_node;
7840 } else {
7841 context_node->prev = parser->current_context;
7842 parser->current_context = context_node;
7843 }
7844
7845 return true;
7846}
7847
7848static void
7849context_pop(pm_parser_t *parser) {
7850 pm_context_node_t *prev = parser->current_context->prev;
7851 xfree_sized(parser->current_context, sizeof(pm_context_node_t));
7852 parser->current_context = prev;
7853}
7854
7855static bool
7856context_p(const pm_parser_t *parser, pm_context_t context) {
7857 pm_context_node_t *context_node = parser->current_context;
7858
7859 while (context_node != NULL) {
7860 if (context_node->context == context) return true;
7861 context_node = context_node->prev;
7862 }
7863
7864 return false;
7865}
7866
7867static bool
7868context_def_p(const pm_parser_t *parser) {
7869 pm_context_node_t *context_node = parser->current_context;
7870
7871 while (context_node != NULL) {
7872 switch (context_node->context) {
7873 case PM_CONTEXT_DEF:
7878 return true;
7879 case PM_CONTEXT_CLASS:
7883 case PM_CONTEXT_MODULE:
7887 case PM_CONTEXT_SCLASS:
7891 return false;
7892 default:
7893 context_node = context_node->prev;
7894 }
7895 }
7896
7897 return false;
7898}
7899
7904static const char *
7905context_human(pm_context_t context) {
7906 switch (context) {
7907 case PM_CONTEXT_NONE:
7908 assert(false && "unreachable");
7909 return "";
7910 case PM_CONTEXT_BEGIN: return "begin statement";
7911 case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
7912 case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
7913 case PM_CONTEXT_BLOCK_PARAMETERS: return "'|'..'|' block parameter";
7914 case PM_CONTEXT_CASE_WHEN: return "'when' clause";
7915 case PM_CONTEXT_CASE_IN: return "'in' clause";
7916 case PM_CONTEXT_CLASS: return "class definition";
7917 case PM_CONTEXT_DEF: return "method definition";
7918 case PM_CONTEXT_DEF_PARAMS: return "method parameters";
7919 case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
7920 case PM_CONTEXT_DEFINED: return "'defined?' expression";
7921 case PM_CONTEXT_ELSE:
7928 case PM_CONTEXT_SCLASS_ELSE: return "'else' clause";
7929 case PM_CONTEXT_ELSIF: return "'elsif' clause";
7930 case PM_CONTEXT_EMBEXPR: return "embedded expression";
7937 case PM_CONTEXT_SCLASS_ENSURE: return "'ensure' clause";
7938 case PM_CONTEXT_FOR: return "for loop";
7939 case PM_CONTEXT_FOR_INDEX: return "for loop index";
7940 case PM_CONTEXT_IF: return "if statement";
7941 case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
7942 case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
7943 case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
7944 case PM_CONTEXT_MAIN: return "top level context";
7945 case PM_CONTEXT_MODULE: return "module definition";
7946 case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
7947 case PM_CONTEXT_PARENS: return "parentheses";
7948 case PM_CONTEXT_POSTEXE: return "'END' block";
7949 case PM_CONTEXT_PREDICATE: return "predicate";
7950 case PM_CONTEXT_PREEXE: return "'BEGIN' block";
7958 case PM_CONTEXT_SCLASS_RESCUE: return "'rescue' clause";
7959 case PM_CONTEXT_SCLASS: return "singleton class definition";
7960 case PM_CONTEXT_TERNARY: return "ternary expression";
7961 case PM_CONTEXT_UNLESS: return "unless statement";
7962 case PM_CONTEXT_UNTIL: return "until statement";
7963 case PM_CONTEXT_WHILE: return "while statement";
7964 }
7965
7966 assert(false && "unreachable");
7967 return "";
7968}
7969
7970/******************************************************************************/
7971/* Specific token lexers */
7972/******************************************************************************/
7973
7974static inline void
7975pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
7976 if (invalid != NULL) {
7977 pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
7978 pm_parser_err(parser, U32(invalid - parser->start), 1, diag_id);
7979 }
7980}
7981
7982static size_t
7983pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
7984 const uint8_t *invalid = NULL;
7985 size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
7986 pm_strspn_number_validate(parser, string, length, invalid);
7987 return length;
7988}
7989
7990static size_t
7991pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
7992 const uint8_t *invalid = NULL;
7993 size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
7994 pm_strspn_number_validate(parser, string, length, invalid);
7995 return length;
7996}
7997
7998static size_t
7999pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8000 const uint8_t *invalid = NULL;
8001 size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
8002 pm_strspn_number_validate(parser, string, length, invalid);
8003 return length;
8004}
8005
8006static size_t
8007pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8008 const uint8_t *invalid = NULL;
8009 size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
8010 pm_strspn_number_validate(parser, string, length, invalid);
8011 return length;
8012}
8013
8014static pm_token_type_t
8015lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
8016 pm_token_type_t type = PM_TOKEN_INTEGER;
8017
8018 // Here we're going to attempt to parse the optional decimal portion of a
8019 // float. If it's not there, then it's okay and we'll just continue on.
8020 if (peek(parser) == '.') {
8021 if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8022 parser->current.end += 2;
8023 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8024 type = PM_TOKEN_FLOAT;
8025 } else {
8026 // If we had a . and then something else, then it's not a float
8027 // suffix on a number it's a method call or something else.
8028 return type;
8029 }
8030 }
8031
8032 // Here we're going to attempt to parse the optional exponent portion of a
8033 // float. If it's not there, it's okay and we'll just continue on.
8034 if ((peek(parser) == 'e') || (peek(parser) == 'E')) {
8035 if ((peek_offset(parser, 1) == '+') || (peek_offset(parser, 1) == '-')) {
8036 parser->current.end += 2;
8037
8038 if (pm_char_is_decimal_digit(peek(parser))) {
8039 parser->current.end++;
8040 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8041 } else {
8042 pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
8043 }
8044 } else if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8045 parser->current.end++;
8046 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8047 } else {
8048 return type;
8049 }
8050
8051 *seen_e = true;
8052 type = PM_TOKEN_FLOAT;
8053 }
8054
8055 return type;
8056}
8057
8058static pm_token_type_t
8059lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8060 pm_token_type_t type = PM_TOKEN_INTEGER;
8061 *seen_e = false;
8062
8063 if (peek_offset(parser, -1) == '0') {
8064 switch (*parser->current.end) {
8065 // 0d1111 is a decimal number
8066 case 'd':
8067 case 'D':
8068 parser->current.end++;
8069 if (pm_char_is_decimal_digit(peek(parser))) {
8070 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8071 } else {
8072 match(parser, '_');
8073 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
8074 }
8075
8076 break;
8077
8078 // 0b1111 is a binary number
8079 case 'b':
8080 case 'B':
8081 parser->current.end++;
8082 if (pm_char_is_binary_digit(peek(parser))) {
8083 parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
8084 } else {
8085 match(parser, '_');
8086 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
8087 }
8088
8089 parser->integer_base = PM_INTEGER_BASE_FLAGS_BINARY;
8090 break;
8091
8092 // 0o1111 is an octal number
8093 case 'o':
8094 case 'O':
8095 parser->current.end++;
8096 if (pm_char_is_octal_digit(peek(parser))) {
8097 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8098 } else {
8099 match(parser, '_');
8100 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
8101 }
8102
8103 parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
8104 break;
8105
8106 // 01111 is an octal number
8107 case '_':
8108 case '0':
8109 case '1':
8110 case '2':
8111 case '3':
8112 case '4':
8113 case '5':
8114 case '6':
8115 case '7':
8116 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8117 parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
8118 break;
8119
8120 // 0x1111 is a hexadecimal number
8121 case 'x':
8122 case 'X':
8123 parser->current.end++;
8124 if (pm_char_is_hexadecimal_digit(peek(parser))) {
8125 parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
8126 } else {
8127 match(parser, '_');
8128 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
8129 }
8130
8131 parser->integer_base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
8132 break;
8133
8134 // 0.xxx is a float
8135 case '.': {
8136 type = lex_optional_float_suffix(parser, seen_e);
8137 break;
8138 }
8139
8140 // 0exxx is a float
8141 case 'e':
8142 case 'E': {
8143 type = lex_optional_float_suffix(parser, seen_e);
8144 break;
8145 }
8146 }
8147 } else {
8148 // If it didn't start with a 0, then we'll lex as far as we can into a
8149 // decimal number.
8150 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8151
8152 // Afterward, we'll lex as far as we can into an optional float suffix.
8153 type = lex_optional_float_suffix(parser, seen_e);
8154 }
8155
8156 // At this point we have a completed number, but we want to provide the user
8157 // with a good experience if they put an additional .xxx fractional
8158 // component on the end, so we'll check for that here.
8159 if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8160 const uint8_t *fraction_start = parser->current.end;
8161 const uint8_t *fraction_end = parser->current.end + 2;
8162 fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
8163 pm_parser_err(parser, U32(fraction_start - parser->start), U32(fraction_end - fraction_start), PM_ERR_INVALID_NUMBER_FRACTION);
8164 }
8165
8166 return type;
8167}
8168
8169static pm_token_type_t
8170lex_numeric(pm_parser_t *parser) {
8171 pm_token_type_t type = PM_TOKEN_INTEGER;
8172 parser->integer_base = PM_INTEGER_BASE_FLAGS_DECIMAL;
8173
8174 if (parser->current.end < parser->end) {
8175 bool seen_e = false;
8176 type = lex_numeric_prefix(parser, &seen_e);
8177
8178 const uint8_t *end = parser->current.end;
8179 pm_token_type_t suffix_type = type;
8180
8181 if (type == PM_TOKEN_INTEGER) {
8182 if (match(parser, 'r')) {
8183 suffix_type = PM_TOKEN_INTEGER_RATIONAL;
8184
8185 if (match(parser, 'i')) {
8186 suffix_type = PM_TOKEN_INTEGER_RATIONAL_IMAGINARY;
8187 }
8188 } else if (match(parser, 'i')) {
8189 suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
8190 }
8191 } else {
8192 if (!seen_e && match(parser, 'r')) {
8193 suffix_type = PM_TOKEN_FLOAT_RATIONAL;
8194
8195 if (match(parser, 'i')) {
8196 suffix_type = PM_TOKEN_FLOAT_RATIONAL_IMAGINARY;
8197 }
8198 } else if (match(parser, 'i')) {
8199 suffix_type = PM_TOKEN_FLOAT_IMAGINARY;
8200 }
8201 }
8202
8203 const uint8_t b = peek(parser);
8204 if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
8205 parser->current.end = end;
8206 } else {
8207 type = suffix_type;
8208 }
8209 }
8210
8211 return type;
8212}
8213
8214static pm_token_type_t
8215lex_global_variable(pm_parser_t *parser) {
8216 if (parser->current.end >= parser->end) {
8217 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
8218 return PM_TOKEN_GLOBAL_VARIABLE;
8219 }
8220
8221 // True if multiple characters are allowed after the declaration of the
8222 // global variable. Not true when it starts with "$-".
8223 bool allow_multiple = true;
8224
8225 switch (*parser->current.end) {
8226 case '~': // $~: match-data
8227 case '*': // $*: argv
8228 case '$': // $$: pid
8229 case '?': // $?: last status
8230 case '!': // $!: error string
8231 case '@': // $@: error position
8232 case '/': // $/: input record separator
8233 case '\\': // $\: output record separator
8234 case ';': // $;: field separator
8235 case ',': // $,: output field separator
8236 case '.': // $.: last read line number
8237 case '=': // $=: ignorecase
8238 case ':': // $:: load path
8239 case '<': // $<: reading filename
8240 case '>': // $>: default output handle
8241 case '\"': // $": already loaded files
8242 parser->current.end++;
8243 return PM_TOKEN_GLOBAL_VARIABLE;
8244
8245 case '&': // $&: last match
8246 case '`': // $`: string before last match
8247 case '\'': // $': string after last match
8248 case '+': // $+: string matches last paren.
8249 parser->current.end++;
8250 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_BACK_REFERENCE;
8251
8252 case '0': {
8253 parser->current.end++;
8254 size_t width;
8255
8256 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
8257 do {
8258 parser->current.end += width;
8259 } while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
8260
8261 // $0 isn't allowed to be followed by anything.
8262 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8263 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->current, diag_id);
8264 }
8265
8266 return PM_TOKEN_GLOBAL_VARIABLE;
8267 }
8268
8269 case '1':
8270 case '2':
8271 case '3':
8272 case '4':
8273 case '5':
8274 case '6':
8275 case '7':
8276 case '8':
8277 case '9':
8278 parser->current.end += pm_strspn_decimal_digit(parser->current.end, parser->end - parser->current.end);
8279 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_NUMBERED_REFERENCE;
8280
8281 case '-':
8282 parser->current.end++;
8283 allow_multiple = false;
8285 default: {
8286 size_t width;
8287
8288 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
8289 do {
8290 parser->current.end += width;
8291 } while (allow_multiple && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
8292 } else if (pm_char_is_whitespace(peek(parser))) {
8293 // If we get here, then we have a $ followed by whitespace,
8294 // which is not allowed.
8295 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
8296 } else {
8297 // If we get here, then we have a $ followed by something that
8298 // isn't recognized as a global variable.
8299 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8300 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8301 PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), diag_id, (int) (PM_TOKEN_LENGTH(&parser->current) + U32(width)), (const char *) parser->current.start);
8302 }
8303
8304 return PM_TOKEN_GLOBAL_VARIABLE;
8305 }
8306 }
8307}
8308
8321static inline pm_token_type_t
8322lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
8323 if (memcmp(current_start, value, vlen) == 0) {
8324 pm_lex_state_t last_state = parser->lex_state;
8325
8326 if (parser->lex_state & PM_LEX_STATE_FNAME) {
8327 lex_state_set(parser, PM_LEX_STATE_ENDFN);
8328 } else {
8329 lex_state_set(parser, state);
8330 if (state == PM_LEX_STATE_BEG) {
8331 parser->command_start = true;
8332 }
8333
8334 if ((modifier_type != PM_TOKEN_EOF) && !(last_state & (PM_LEX_STATE_BEG | PM_LEX_STATE_LABELED | PM_LEX_STATE_CLASS))) {
8335 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
8336 return modifier_type;
8337 }
8338 }
8339
8340 return type;
8341 }
8342
8343 return PM_TOKEN_EOF;
8344}
8345
8346static pm_token_type_t
8347lex_identifier(pm_parser_t *parser, bool previous_command_start) {
8348 // Lex as far as we can into the current identifier.
8349 size_t width;
8350 const uint8_t *end = parser->end;
8351 const uint8_t *current_start = parser->current.start;
8352 const uint8_t *current_end = parser->current.end;
8353 bool encoding_changed = parser->encoding_changed;
8354
8355 if (encoding_changed) {
8356 while ((width = char_is_identifier(parser, current_end, end - current_end)) > 0) {
8357 current_end += width;
8358 }
8359 } else {
8360 while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
8361 current_end += width;
8362 }
8363 }
8364 parser->current.end = current_end;
8365
8366 // Now cache the length of the identifier so that we can quickly compare it
8367 // against known keywords.
8368 width = (size_t) (current_end - current_start);
8369
8370 if (current_end < end) {
8371 if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
8372 // First we'll attempt to extend the identifier by a ! or ?. Then we'll
8373 // check if we're returning the defined? keyword or just an identifier.
8374 width++;
8375
8376 if (
8377 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
8378 (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
8379 ) {
8380 // If we're in a position where we can accept a : at the end of an
8381 // identifier, then we'll optionally accept it.
8382 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
8383 (void) match(parser, ':');
8384 return PM_TOKEN_LABEL;
8385 }
8386
8387 if (parser->lex_state != PM_LEX_STATE_DOT) {
8388 if (width == 8 && (lex_keyword(parser, current_start, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
8389 return PM_TOKEN_KEYWORD_DEFINED;
8390 }
8391 }
8392
8393 return PM_TOKEN_METHOD_NAME;
8394 }
8395
8396 if (lex_state_p(parser, PM_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
8397 // If we're in a position where we can accept a = at the end of an
8398 // identifier, then we'll optionally accept it.
8399 return PM_TOKEN_IDENTIFIER;
8400 }
8401
8402 if (
8403 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
8404 peek(parser) == ':' && peek_offset(parser, 1) != ':'
8405 ) {
8406 // If we're in a position where we can accept a : at the end of an
8407 // identifier, then we'll optionally accept it.
8408 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
8409 (void) match(parser, ':');
8410 return PM_TOKEN_LABEL;
8411 }
8412 }
8413
8414 if (parser->lex_state != PM_LEX_STATE_DOT) {
8415 pm_token_type_t type;
8416 switch (width) {
8417 case 2:
8418 if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
8419 if (pm_do_loop_stack_p(parser)) {
8420 return PM_TOKEN_KEYWORD_DO_LOOP;
8421 }
8422 return PM_TOKEN_KEYWORD_DO;
8423 }
8424
8425 if ((type = lex_keyword(parser, current_start, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
8426 if ((type = lex_keyword(parser, current_start, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8427 if ((type = lex_keyword(parser, current_start, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8428 break;
8429 case 3:
8430 if ((type = lex_keyword(parser, current_start, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8431 if ((type = lex_keyword(parser, current_start, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8432 if ((type = lex_keyword(parser, current_start, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8433 if ((type = lex_keyword(parser, current_start, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8434 if ((type = lex_keyword(parser, current_start, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8435 if ((type = lex_keyword(parser, current_start, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8436 if ((type = lex_keyword(parser, current_start, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8437 break;
8438 case 4:
8439 if ((type = lex_keyword(parser, current_start, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8440 if ((type = lex_keyword(parser, current_start, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8441 if ((type = lex_keyword(parser, current_start, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8442 if ((type = lex_keyword(parser, current_start, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8443 if ((type = lex_keyword(parser, current_start, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8444 if ((type = lex_keyword(parser, current_start, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8445 if ((type = lex_keyword(parser, current_start, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8446 if ((type = lex_keyword(parser, current_start, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8447 break;
8448 case 5:
8449 if ((type = lex_keyword(parser, current_start, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8450 if ((type = lex_keyword(parser, current_start, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8451 if ((type = lex_keyword(parser, current_start, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8452 if ((type = lex_keyword(parser, current_start, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8453 if ((type = lex_keyword(parser, current_start, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8454 if ((type = lex_keyword(parser, current_start, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8455 if ((type = lex_keyword(parser, current_start, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8456 if ((type = lex_keyword(parser, current_start, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8457 if ((type = lex_keyword(parser, current_start, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8458 if ((type = lex_keyword(parser, current_start, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8459 if ((type = lex_keyword(parser, current_start, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
8460 if ((type = lex_keyword(parser, current_start, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
8461 if ((type = lex_keyword(parser, current_start, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8462 break;
8463 case 6:
8464 if ((type = lex_keyword(parser, current_start, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8465 if ((type = lex_keyword(parser, current_start, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8466 if ((type = lex_keyword(parser, current_start, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
8467 if ((type = lex_keyword(parser, current_start, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8468 if ((type = lex_keyword(parser, current_start, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
8469 break;
8470 case 8:
8471 if ((type = lex_keyword(parser, current_start, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8472 if ((type = lex_keyword(parser, current_start, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8473 break;
8474 case 12:
8475 if ((type = lex_keyword(parser, current_start, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8476 break;
8477 }
8478 }
8479
8480 if (encoding_changed) {
8481 return parser->encoding->isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
8482 }
8483 return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
8484}
8485
8490static bool
8491current_token_starts_line(pm_parser_t *parser) {
8492 return (parser->current.start == parser->start) || (parser->current.start[-1] == '\n');
8493}
8494
8509static pm_token_type_t
8510lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
8511 // If there is no content following this #, then we're at the end of
8512 // the string and we can safely return string content.
8513 if (pound + 1 >= parser->end) {
8514 parser->current.end = pound + 1;
8515 return PM_TOKEN_STRING_CONTENT;
8516 }
8517
8518 // Now we'll check against the character that follows the #. If it
8519 // constitutes valid interplation, we'll handle that, otherwise we'll return
8520 // 0.
8521 switch (pound[1]) {
8522 case '@': {
8523 // In this case we may have hit an embedded instance or class variable.
8524 if (pound + 2 >= parser->end) {
8525 parser->current.end = pound + 1;
8526 return PM_TOKEN_STRING_CONTENT;
8527 }
8528
8529 // If we're looking at a @ and there's another @, then we'll skip past the
8530 // second @.
8531 const uint8_t *variable = pound + 2;
8532 if (*variable == '@' && pound + 3 < parser->end) variable++;
8533
8534 if (char_is_identifier_start(parser, variable, parser->end - variable)) {
8535 // At this point we're sure that we've either hit an embedded instance
8536 // or class variable. In this case we'll first need to check if we've
8537 // already consumed content.
8538 if (pound > parser->current.start) {
8539 parser->current.end = pound;
8540 return PM_TOKEN_STRING_CONTENT;
8541 }
8542
8543 // Otherwise we need to return the embedded variable token
8544 // and then switch to the embedded variable lex mode.
8545 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
8546 parser->current.end = pound + 1;
8547 return PM_TOKEN_EMBVAR;
8548 }
8549
8550 // If we didn't get a valid interpolation, then this is just regular
8551 // string content. This is like if we get "#@-". In this case the caller
8552 // should keep lexing.
8553 parser->current.end = pound + 1;
8554 return 0;
8555 }
8556 case '$':
8557 // In this case we may have hit an embedded global variable. If there's
8558 // not enough room, then we'll just return string content.
8559 if (pound + 2 >= parser->end) {
8560 parser->current.end = pound + 1;
8561 return PM_TOKEN_STRING_CONTENT;
8562 }
8563
8564 // This is the character that we're going to check to see if it is the
8565 // start of an identifier that would indicate that this is a global
8566 // variable.
8567 const uint8_t *check = pound + 2;
8568
8569 if (pound[2] == '-') {
8570 if (pound + 3 >= parser->end) {
8571 parser->current.end = pound + 2;
8572 return PM_TOKEN_STRING_CONTENT;
8573 }
8574
8575 check++;
8576 }
8577
8578 // If the character that we're going to check is the start of an
8579 // identifier, or we don't have a - and the character is a decimal number
8580 // or a global name punctuation character, then we've hit an embedded
8581 // global variable.
8582 if (
8583 char_is_identifier_start(parser, check, parser->end - check) ||
8584 (pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
8585 ) {
8586 // In this case we've hit an embedded global variable. First check to
8587 // see if we've already consumed content. If we have, then we need to
8588 // return that content as string content first.
8589 if (pound > parser->current.start) {
8590 parser->current.end = pound;
8591 return PM_TOKEN_STRING_CONTENT;
8592 }
8593
8594 // Otherwise, we need to return the embedded variable token and switch
8595 // to the embedded variable lex mode.
8596 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
8597 parser->current.end = pound + 1;
8598 return PM_TOKEN_EMBVAR;
8599 }
8600
8601 // In this case we've hit a #$ that does not indicate a global variable.
8602 // In this case we'll continue lexing past it.
8603 parser->current.end = pound + 1;
8604 return 0;
8605 case '{':
8606 // In this case it's the start of an embedded expression. If we have
8607 // already consumed content, then we need to return that content as string
8608 // content first.
8609 if (pound > parser->current.start) {
8610 parser->current.end = pound;
8611 return PM_TOKEN_STRING_CONTENT;
8612 }
8613
8614 parser->enclosure_nesting++;
8615
8616 // Otherwise we'll skip past the #{ and begin lexing the embedded
8617 // expression.
8618 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBEXPR });
8619 parser->current.end = pound + 2;
8620 parser->command_start = true;
8621 pm_do_loop_stack_push(parser, false);
8622 return PM_TOKEN_EMBEXPR_BEGIN;
8623 default:
8624 // In this case we've hit a # that doesn't constitute interpolation. We'll
8625 // mark that by returning the not provided token type. This tells the
8626 // consumer to keep lexing forward.
8627 parser->current.end = pound + 1;
8628 return 0;
8629 }
8630}
8631
8632static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
8633static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
8634static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
8635static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
8636static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
8637
8641static const bool ascii_printable_chars[] = {
8642 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
8643 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8644 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8645 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8646 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8647 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
8648 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8649 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
8650};
8651
8652static inline bool
8653char_is_ascii_printable(const uint8_t b) {
8654 return (b < 0x80) && ascii_printable_chars[b];
8655}
8656
8661static inline uint8_t
8662escape_hexadecimal_digit(const uint8_t value) {
8663 return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
8664}
8665
8671static inline uint32_t
8672escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length, const pm_location_t *error_location) {
8673 uint32_t value = 0;
8674 for (size_t index = 0; index < length; index++) {
8675 if (index != 0) value <<= 4;
8676 value |= escape_hexadecimal_digit(string[index]);
8677 }
8678
8679 // Here we're going to verify that the value is actually a valid Unicode
8680 // codepoint and not a surrogate pair.
8681 if (value >= 0xD800 && value <= 0xDFFF) {
8682 if (error_location != NULL) {
8683 pm_parser_err(parser, error_location->start, error_location->length, PM_ERR_ESCAPE_INVALID_UNICODE);
8684 } else {
8685 pm_parser_err(parser, U32(string - parser->start), U32(length), PM_ERR_ESCAPE_INVALID_UNICODE);
8686 }
8687 return 0xFFFD;
8688 }
8689
8690 return value;
8691}
8692
8696static inline uint8_t
8697escape_byte(uint8_t value, const uint8_t flags) {
8698 if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
8699 if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
8700 return value;
8701}
8702
8706static inline void
8707escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
8708 // \u escape sequences in string-like structures implicitly change the
8709 // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
8710 // literal.
8711 if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
8712 if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
8713 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(end - start), PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
8714 }
8715
8717 }
8718
8719 if (!pm_buffer_append_unicode_codepoint(buffer, value)) {
8720 pm_parser_err(parser, U32(start - parser->start), U32(end - start), PM_ERR_ESCAPE_INVALID_UNICODE);
8721 pm_buffer_append_byte(buffer, 0xEF);
8722 pm_buffer_append_byte(buffer, 0xBF);
8723 pm_buffer_append_byte(buffer, 0xBD);
8724 }
8725}
8726
8731static inline void
8732escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) {
8733 if (byte >= 0x80) {
8734 if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
8735 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
8736 }
8737
8738 parser->explicit_encoding = parser->encoding;
8739 }
8740
8741 pm_buffer_append_byte(buffer, byte);
8742}
8743
8759static inline void
8760escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
8761 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8762 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
8763 }
8764
8765 escape_write_byte_encoded(parser, buffer, byte);
8766}
8767
8771static inline void
8772escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
8773 size_t width;
8774 if (parser->encoding_changed) {
8775 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8776 } else {
8777 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
8778 }
8779
8780 if (width == 1) {
8781 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
8782 } else if (width > 1) {
8783 // Valid multibyte character. Just ignore escape.
8784 pm_buffer_t *b = (flags & PM_ESCAPE_FLAG_REGEXP) ? regular_expression_buffer : buffer;
8785 pm_buffer_append_bytes(b, parser->current.end, width);
8786 parser->current.end += width;
8787 } else {
8788 // Assume the next character wasn't meant to be part of this escape
8789 // sequence since it is invalid. Add an error and move on.
8790 parser->current.end++;
8791 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
8792 }
8793}
8794
8800static void
8801escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *type) {
8802#define FLAG(value) ((value & PM_ESCAPE_FLAG_CONTROL) ? "\\C-" : (value & PM_ESCAPE_FLAG_META) ? "\\M-" : "")
8803
8804 PM_PARSER_WARN_TOKEN_FORMAT(
8805 parser,
8806 &parser->current,
8807 PM_WARN_INVALID_CHARACTER,
8808 FLAG(flags),
8809 FLAG(flag),
8810 type
8811 );
8812
8813#undef FLAG
8814}
8815
8819static void
8820escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
8821 uint8_t peeked = peek(parser);
8822 switch (peeked) {
8823 case '\\': {
8824 parser->current.end++;
8825 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
8826 return;
8827 }
8828 case '\'': {
8829 parser->current.end++;
8830 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
8831 return;
8832 }
8833 case 'a': {
8834 parser->current.end++;
8835 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
8836 return;
8837 }
8838 case 'b': {
8839 parser->current.end++;
8840 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
8841 return;
8842 }
8843 case 'e': {
8844 parser->current.end++;
8845 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
8846 return;
8847 }
8848 case 'f': {
8849 parser->current.end++;
8850 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
8851 return;
8852 }
8853 case 'n': {
8854 parser->current.end++;
8855 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
8856 return;
8857 }
8858 case 'r': {
8859 parser->current.end++;
8860 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
8861 return;
8862 }
8863 case 's': {
8864 parser->current.end++;
8865 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
8866 return;
8867 }
8868 case 't': {
8869 parser->current.end++;
8870 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
8871 return;
8872 }
8873 case 'v': {
8874 parser->current.end++;
8875 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
8876 return;
8877 }
8878 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
8879 uint8_t value = (uint8_t) (*parser->current.end - '0');
8880 parser->current.end++;
8881
8882 if (pm_char_is_octal_digit(peek(parser))) {
8883 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
8884 parser->current.end++;
8885
8886 if (pm_char_is_octal_digit(peek(parser))) {
8887 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
8888 parser->current.end++;
8889 }
8890 }
8891
8892 value = escape_byte(value, flags);
8893 escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
8894 return;
8895 }
8896 case 'x': {
8897 const uint8_t *start = parser->current.end - 1;
8898
8899 parser->current.end++;
8900 uint8_t byte = peek(parser);
8901
8902 if (pm_char_is_hexadecimal_digit(byte)) {
8903 uint8_t value = escape_hexadecimal_digit(byte);
8904 parser->current.end++;
8905
8906 byte = peek(parser);
8907 if (pm_char_is_hexadecimal_digit(byte)) {
8908 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
8909 parser->current.end++;
8910 }
8911
8912 value = escape_byte(value, flags);
8913 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8914 if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
8915 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
8916 } else {
8917 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8918 }
8919 }
8920
8921 escape_write_byte_encoded(parser, buffer, value);
8922 } else {
8923 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
8924 }
8925
8926 return;
8927 }
8928 case 'u': {
8929 const uint8_t *start = parser->current.end - 1;
8930 parser->current.end++;
8931
8932 if (parser->current.end == parser->end) {
8933 const uint8_t *start = parser->current.end - 2;
8934 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
8935 } else if (peek(parser) == '{') {
8936 const uint8_t *unicode_codepoints_start = parser->current.end - 2;
8937 parser->current.end++;
8938
8939 size_t whitespace;
8940 while (true) {
8941 if ((whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
8942 parser->current.end += whitespace;
8943 } else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
8944 // This is super hacky, but it gets us nicer error
8945 // messages because we can still pass it off to the
8946 // regular expression engine even if we hit an
8947 // unterminated regular expression.
8948 parser->current.end += 2;
8949 } else {
8950 break;
8951 }
8952 }
8953
8954 const uint8_t *extra_codepoints_start = NULL;
8955 int codepoints_count = 0;
8956
8957 while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
8958 const uint8_t *unicode_start = parser->current.end;
8959 size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
8960
8961 if (hexadecimal_length > 6) {
8962 // \u{nnnn} character literal allows only 1-6 hexadecimal digits
8963 pm_parser_err(parser, U32(unicode_start - parser->start), U32(hexadecimal_length), PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
8964 } else if (hexadecimal_length == 0) {
8965 // there are not hexadecimal characters
8966
8967 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8968 // If this is a regular expression, we are going to
8969 // let the regular expression engine handle this
8970 // error instead of us.
8971 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
8972 } else {
8973 pm_parser_err(parser, PM_TOKEN_END(parser, &parser->current), 0, PM_ERR_ESCAPE_INVALID_UNICODE);
8974 pm_parser_err(parser, PM_TOKEN_END(parser, &parser->current), 0, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
8975 }
8976
8977 return;
8978 }
8979
8980 parser->current.end += hexadecimal_length;
8981 codepoints_count++;
8982 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
8983 extra_codepoints_start = unicode_start;
8984 }
8985
8986 uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length, NULL);
8987 escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
8988
8989 parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
8990 }
8991
8992 // ?\u{nnnn} character literal should contain only one codepoint
8993 // and cannot be like ?\u{nnnn mmmm}.
8994 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
8995 pm_parser_err(parser, U32(extra_codepoints_start - parser->start), U32(parser->current.end - 1 - extra_codepoints_start), PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
8996 }
8997
8998 if (parser->current.end == parser->end) {
8999 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
9000 } else if (peek(parser) == '}') {
9001 parser->current.end++;
9002 } else {
9003 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9004 // If this is a regular expression, we are going to let
9005 // the regular expression engine handle this error
9006 // instead of us.
9007 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9008 } else {
9009 pm_parser_err(parser, U32(unicode_codepoints_start - parser->start), U32(parser->current.end - unicode_codepoints_start), PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9010 }
9011 }
9012
9013 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9014 pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
9015 }
9016 } else {
9017 size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
9018
9019 if (length == 0) {
9020 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9021 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9022 } else {
9023 const uint8_t *start = parser->current.end - 2;
9024 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9025 }
9026 } else if (length == 4) {
9027 uint32_t value = escape_unicode(parser, parser->current.end, 4, NULL);
9028
9029 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9030 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
9031 }
9032
9033 escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
9034 parser->current.end += 4;
9035 } else {
9036 parser->current.end += length;
9037
9038 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9039 // If this is a regular expression, we are going to let
9040 // the regular expression engine handle this error
9041 // instead of us.
9042 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9043 } else {
9044 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
9045 }
9046 }
9047 }
9048
9049 return;
9050 }
9051 case 'c': {
9052 parser->current.end++;
9053 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9054 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9055 }
9056
9057 if (parser->current.end == parser->end) {
9058 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9059 return;
9060 }
9061
9062 uint8_t peeked = peek(parser);
9063 switch (peeked) {
9064 case '?': {
9065 parser->current.end++;
9066 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9067 return;
9068 }
9069 case '\\':
9070 parser->current.end++;
9071
9072 if (match(parser, 'u') || match(parser, 'U')) {
9073 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
9074 return;
9075 }
9076
9077 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9078 return;
9079 case ' ':
9080 parser->current.end++;
9081 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9082 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9083 return;
9084 case '\t':
9085 parser->current.end++;
9086 escape_read_warn(parser, flags, 0, "\\t");
9087 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9088 return;
9089 default: {
9090 if (!char_is_ascii_printable(peeked)) {
9091 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9092 return;
9093 }
9094
9095 parser->current.end++;
9096 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9097 return;
9098 }
9099 }
9100 }
9101 case 'C': {
9102 parser->current.end++;
9103 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9104 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9105 }
9106
9107 if (peek(parser) != '-') {
9108 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9109 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_CONTROL);
9110 return;
9111 }
9112
9113 parser->current.end++;
9114 if (parser->current.end == parser->end) {
9115 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9116 return;
9117 }
9118
9119 uint8_t peeked = peek(parser);
9120 switch (peeked) {
9121 case '?': {
9122 parser->current.end++;
9123 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9124 return;
9125 }
9126 case '\\':
9127 parser->current.end++;
9128
9129 if (match(parser, 'u') || match(parser, 'U')) {
9130 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
9131 return;
9132 }
9133
9134 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9135 return;
9136 case ' ':
9137 parser->current.end++;
9138 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9139 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9140 return;
9141 case '\t':
9142 parser->current.end++;
9143 escape_read_warn(parser, flags, 0, "\\t");
9144 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9145 return;
9146 default: {
9147 if (!char_is_ascii_printable(peeked)) {
9148 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9149 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_CONTROL);
9150 return;
9151 }
9152
9153 parser->current.end++;
9154 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9155 return;
9156 }
9157 }
9158 }
9159 case 'M': {
9160 parser->current.end++;
9161 if (flags & PM_ESCAPE_FLAG_META) {
9162 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
9163 }
9164
9165 if (peek(parser) != '-') {
9166 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9167 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
9168 return;
9169 }
9170
9171 parser->current.end++;
9172 if (parser->current.end == parser->end) {
9173 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
9174 return;
9175 }
9176
9177 uint8_t peeked = peek(parser);
9178 switch (peeked) {
9179 case '\\':
9180 parser->current.end++;
9181
9182 if (match(parser, 'u') || match(parser, 'U')) {
9183 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
9184 return;
9185 }
9186
9187 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
9188 return;
9189 case ' ':
9190 parser->current.end++;
9191 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_META, "\\s");
9192 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9193 return;
9194 case '\t':
9195 parser->current.end++;
9196 escape_read_warn(parser, flags & ((uint8_t) ~PM_ESCAPE_FLAG_CONTROL), PM_ESCAPE_FLAG_META, "\\t");
9197 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9198 return;
9199 default:
9200 if (!char_is_ascii_printable(peeked)) {
9201 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9202 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
9203 return;
9204 }
9205
9206 parser->current.end++;
9207 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9208 return;
9209 }
9210 }
9211 case '\r': {
9212 if (peek_offset(parser, 1) == '\n') {
9213 parser->current.end += 2;
9214 escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
9215 return;
9216 }
9218 }
9219 default: {
9220 if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
9221 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9222 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
9223 return;
9224 }
9225 if (parser->current.end < parser->end) {
9226 escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags);
9227 } else {
9228 pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
9229 }
9230 return;
9231 }
9232 }
9233}
9234
9260static pm_token_type_t
9261lex_question_mark(pm_parser_t *parser) {
9262 if (lex_state_end_p(parser)) {
9263 lex_state_set(parser, PM_LEX_STATE_BEG);
9264 return PM_TOKEN_QUESTION_MARK;
9265 }
9266
9267 if (parser->current.end >= parser->end) {
9268 pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
9269 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
9270 return PM_TOKEN_CHARACTER_LITERAL;
9271 }
9272
9273 if (pm_char_is_whitespace(*parser->current.end)) {
9274 lex_state_set(parser, PM_LEX_STATE_BEG);
9275 return PM_TOKEN_QUESTION_MARK;
9276 }
9277
9278 lex_state_set(parser, PM_LEX_STATE_BEG);
9279
9280 if (match(parser, '\\')) {
9281 lex_state_set(parser, PM_LEX_STATE_END);
9282
9283 pm_buffer_t buffer;
9284 pm_buffer_init_capacity(&buffer, 3);
9285
9286 escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
9287 pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
9288
9289 return PM_TOKEN_CHARACTER_LITERAL;
9290 } else {
9291 size_t encoding_width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9292
9293 // Ternary operators can have a ? immediately followed by an identifier
9294 // which starts with an underscore. We check for this case here.
9295 if (
9296 !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
9297 (
9298 (parser->current.end + encoding_width >= parser->end) ||
9299 !char_is_identifier(parser, parser->current.end + encoding_width, parser->end - (parser->current.end + encoding_width))
9300 )
9301 ) {
9302 lex_state_set(parser, PM_LEX_STATE_END);
9303 parser->current.end += encoding_width;
9304 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
9305 return PM_TOKEN_CHARACTER_LITERAL;
9306 }
9307 }
9308
9309 return PM_TOKEN_QUESTION_MARK;
9310}
9311
9316static pm_token_type_t
9317lex_at_variable(pm_parser_t *parser) {
9318 pm_token_type_t type = match(parser, '@') ? PM_TOKEN_CLASS_VARIABLE : PM_TOKEN_INSTANCE_VARIABLE;
9319 const uint8_t *end = parser->end;
9320
9321 size_t width;
9322 if ((width = char_is_identifier_start(parser, parser->current.end, end - parser->current.end)) > 0) {
9323 parser->current.end += width;
9324
9325 while ((width = char_is_identifier(parser, parser->current.end, end - parser->current.end)) > 0) {
9326 parser->current.end += width;
9327 }
9328 } else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) {
9329 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
9330 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) {
9331 diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
9332 }
9333
9334 size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end);
9335 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
9336 } else {
9337 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
9338 pm_parser_err_token(parser, &parser->current, diag_id);
9339 }
9340
9341 // If we're lexing an embedded variable, then we need to pop back into the
9342 // parent lex context.
9343 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
9344 lex_mode_pop(parser);
9345 }
9346
9347 return type;
9348}
9349
9353static inline void
9354parser_lex_callback(pm_parser_t *parser) {
9355 if (parser->lex_callback) {
9356 parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current);
9357 }
9358}
9359
9363static inline pm_comment_t *
9364parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
9365 pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t));
9366 if (comment == NULL) return NULL;
9367
9368 *comment = (pm_comment_t) {
9369 .type = type,
9370 .location = TOK2LOC(parser, &parser->current)
9371 };
9372
9373 return comment;
9374}
9375
9381static pm_token_type_t
9382lex_embdoc(pm_parser_t *parser) {
9383 // First, lex out the EMBDOC_BEGIN token.
9384 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9385
9386 if (newline == NULL) {
9387 parser->current.end = parser->end;
9388 } else {
9389 pm_line_offset_list_append(&parser->line_offsets, U32(newline - parser->start + 1));
9390 parser->current.end = newline + 1;
9391 }
9392
9393 parser->current.type = PM_TOKEN_EMBDOC_BEGIN;
9394 parser_lex_callback(parser);
9395
9396 // Now, create a comment that is going to be attached to the parser.
9397 const uint8_t *comment_start = parser->current.start;
9398 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
9399 if (comment == NULL) return PM_TOKEN_EOF;
9400
9401 // Now, loop until we find the end of the embedded documentation or the end
9402 // of the file.
9403 while (parser->current.end + 4 <= parser->end) {
9404 parser->current.start = parser->current.end;
9405
9406 // If we've hit the end of the embedded documentation then we'll return
9407 // that token here.
9408 if (
9409 (memcmp(parser->current.end, "=end", 4) == 0) &&
9410 (
9411 (parser->current.end + 4 == parser->end) || // end of file
9412 pm_char_is_whitespace(parser->current.end[4]) || // whitespace
9413 (parser->current.end[4] == '\0') || // NUL or end of script
9414 (parser->current.end[4] == '\004') || // ^D
9415 (parser->current.end[4] == '\032') // ^Z
9416 )
9417 ) {
9418 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9419
9420 if (newline == NULL) {
9421 parser->current.end = parser->end;
9422 } else {
9423 pm_line_offset_list_append(&parser->line_offsets, U32(newline - parser->start + 1));
9424 parser->current.end = newline + 1;
9425 }
9426
9427 parser->current.type = PM_TOKEN_EMBDOC_END;
9428 parser_lex_callback(parser);
9429
9430 comment->location.length = (uint32_t) (parser->current.end - comment_start);
9431 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9432
9433 return PM_TOKEN_EMBDOC_END;
9434 }
9435
9436 // Otherwise, we'll parse until the end of the line and return a line of
9437 // embedded documentation.
9438 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9439
9440 if (newline == NULL) {
9441 parser->current.end = parser->end;
9442 } else {
9443 pm_line_offset_list_append(&parser->line_offsets, U32(newline - parser->start + 1));
9444 parser->current.end = newline + 1;
9445 }
9446
9447 parser->current.type = PM_TOKEN_EMBDOC_LINE;
9448 parser_lex_callback(parser);
9449 }
9450
9451 pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
9452
9453 comment->location.length = (uint32_t) (parser->current.end - comment_start);
9454 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9455
9456 return PM_TOKEN_EOF;
9457}
9458
9464static inline void
9465parser_lex_ignored_newline(pm_parser_t *parser) {
9466 parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
9467 parser_lex_callback(parser);
9468}
9469
9479static inline void
9480parser_flush_heredoc_end(pm_parser_t *parser) {
9481 assert(parser->heredoc_end <= parser->end);
9482 parser->next_start = parser->heredoc_end;
9483 parser->heredoc_end = NULL;
9484}
9485
9489static bool
9490parser_end_of_line_p(const pm_parser_t *parser) {
9491 const uint8_t *cursor = parser->current.end;
9492
9493 while (cursor < parser->end && *cursor != '\n' && *cursor != '#') {
9494 if (!pm_char_is_inline_whitespace(*cursor++)) return false;
9495 }
9496
9497 return true;
9498}
9499
9518typedef struct {
9524
9529 const uint8_t *cursor;
9531
9551
9555static inline void
9556pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
9557 pm_buffer_append_byte(&token_buffer->buffer, byte);
9558}
9559
9560static inline void
9561pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
9562 pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
9563}
9564
9568static inline size_t
9569parser_char_width(const pm_parser_t *parser) {
9570 size_t width;
9571 if (parser->encoding_changed) {
9572 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9573 } else {
9574 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
9575 }
9576
9577 // TODO: If the character is invalid in the given encoding, then we'll just
9578 // push one byte into the buffer. This should actually be an error.
9579 return (width == 0 ? 1 : width);
9580}
9581
9585static void
9586pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
9587 size_t width = parser_char_width(parser);
9588 pm_buffer_append_bytes(&token_buffer->buffer, parser->current.end, width);
9589 parser->current.end += width;
9590}
9591
9592static void
9593pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
9594 size_t width = parser_char_width(parser);
9595 pm_buffer_append_bytes(&token_buffer->base.buffer, parser->current.end, width);
9596 pm_buffer_append_bytes(&token_buffer->regexp_buffer, parser->current.end, width);
9597 parser->current.end += width;
9598}
9599
9600static bool
9601pm_slice_ascii_only_p(const uint8_t *value, size_t length) {
9602 for (size_t index = 0; index < length; index++) {
9603 if (value[index] & 0x80) return false;
9604 }
9605
9606 return true;
9607}
9608
9615static inline void
9616pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9617 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->buffer), pm_buffer_length(&token_buffer->buffer));
9618}
9619
9620static inline void
9621pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9622 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->base.buffer), pm_buffer_length(&token_buffer->base.buffer));
9623 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p((const uint8_t *) pm_buffer_value(&token_buffer->regexp_buffer), pm_buffer_length(&token_buffer->regexp_buffer));
9624 pm_buffer_free(&token_buffer->regexp_buffer);
9625}
9626
9636static void
9637pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9638 if (token_buffer->cursor == NULL) {
9639 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
9640 } else {
9641 pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
9642 pm_token_buffer_copy(parser, token_buffer);
9643 }
9644}
9645
9646static void
9647pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9648 if (token_buffer->base.cursor == NULL) {
9649 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
9650 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p(parser->current.start, (size_t) (parser->current.end - parser->current.start));
9651 } else {
9652 pm_buffer_append_bytes(&token_buffer->base.buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
9653 pm_buffer_append_bytes(&token_buffer->regexp_buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
9654 pm_regexp_token_buffer_copy(parser, token_buffer);
9655 }
9656}
9657
9658#define PM_TOKEN_BUFFER_DEFAULT_SIZE 16
9659
9668static void
9669pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9670 const uint8_t *start;
9671 if (token_buffer->cursor == NULL) {
9672 pm_buffer_init_capacity(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9673 start = parser->current.start;
9674 } else {
9675 start = token_buffer->cursor;
9676 }
9677
9678 const uint8_t *end = parser->current.end - 1;
9679 assert(end >= start);
9680 pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
9681
9682 token_buffer->cursor = end;
9683}
9684
9685static void
9686pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9687 const uint8_t *start;
9688 if (token_buffer->base.cursor == NULL) {
9689 pm_buffer_init_capacity(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9690 pm_buffer_init_capacity(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9691 start = parser->current.start;
9692 } else {
9693 start = token_buffer->base.cursor;
9694 }
9695
9696 const uint8_t *end = parser->current.end - 1;
9697 pm_buffer_append_bytes(&token_buffer->base.buffer, start, (size_t) (end - start));
9698 pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, (size_t) (end - start));
9699
9700 token_buffer->base.cursor = end;
9701}
9702
9703#undef PM_TOKEN_BUFFER_DEFAULT_SIZE
9704
9709static inline size_t
9710pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
9711 size_t whitespace = 0;
9712
9713 switch (indent) {
9714 case PM_HEREDOC_INDENT_NONE:
9715 // Do nothing, we can't match a terminator with
9716 // indentation and there's no need to calculate common
9717 // whitespace.
9718 break;
9719 case PM_HEREDOC_INDENT_DASH:
9720 // Skip past inline whitespace.
9721 *cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
9722 break;
9723 case PM_HEREDOC_INDENT_TILDE:
9724 // Skip past inline whitespace and calculate common
9725 // whitespace.
9726 while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
9727 if (**cursor == '\t') {
9728 whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
9729 } else {
9730 whitespace++;
9731 }
9732 (*cursor)++;
9733 }
9734
9735 break;
9736 }
9737
9738 return whitespace;
9739}
9740
9745static uint8_t
9746pm_lex_percent_delimiter(pm_parser_t *parser) {
9747 size_t eol_length = match_eol(parser);
9748
9749 if (eol_length) {
9750 if (parser->heredoc_end) {
9751 // If we have already lexed a heredoc, then the newline has already
9752 // been added to the list. In this case we want to just flush the
9753 // heredoc end.
9754 parser_flush_heredoc_end(parser);
9755 } else {
9756 // Otherwise, we'll add the newline to the list of newlines.
9757 pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + U32(eol_length));
9758 }
9759
9760 uint8_t delimiter = *parser->current.end;
9761
9762 // If our delimiter is \r\n, we want to treat it as if it's \n.
9763 // For example, %\r\nfoo\r\n should be "foo"
9764 if (eol_length == 2) {
9765 delimiter = *(parser->current.end + 1);
9766 }
9767
9768 parser->current.end += eol_length;
9769 return delimiter;
9770 }
9771
9772 return *parser->current.end++;
9773}
9774
9779#define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
9780
9787static void
9788parser_lex(pm_parser_t *parser) {
9789 assert(parser->current.end <= parser->end);
9790 parser->previous = parser->current;
9791
9792 // This value mirrors cmd_state from CRuby.
9793 bool previous_command_start = parser->command_start;
9794 parser->command_start = false;
9795
9796 // This is used to communicate to the newline lexing function that we've
9797 // already seen a comment.
9798 bool lexed_comment = false;
9799
9800 // Here we cache the current value of the semantic token seen flag. This is
9801 // used to reset it in case we find a token that shouldn't flip this flag.
9802 unsigned int semantic_token_seen = parser->semantic_token_seen;
9803 parser->semantic_token_seen = true;
9804
9805 // We'll jump to this label when we are about to encounter an EOF.
9806 // If we still have lex_modes on the stack, we pop them so that cleanup
9807 // can happen. For example, we should still continue parsing after a heredoc
9808 // identifier, even if the heredoc body was syntax invalid.
9809 switch_lex_modes:
9810
9811 switch (parser->lex_modes.current->mode) {
9812 case PM_LEX_DEFAULT:
9813 case PM_LEX_EMBEXPR:
9814 case PM_LEX_EMBVAR:
9815
9816 // We have a specific named label here because we are going to jump back to
9817 // this location in the event that we have lexed a token that should not be
9818 // returned to the parser. This includes comments, ignored newlines, and
9819 // invalid tokens of some form.
9820 lex_next_token: {
9821 // If we have the special next_start pointer set, then we're going to jump
9822 // to that location and start lexing from there.
9823 if (parser->next_start != NULL) {
9824 parser->current.end = parser->next_start;
9825 parser->next_start = NULL;
9826 }
9827
9828 // This value mirrors space_seen from CRuby. It tracks whether or not
9829 // space has been eaten before the start of the next token.
9830 bool space_seen = false;
9831
9832 // First, we're going to skip past any whitespace at the front of the next
9833 // token.
9834 bool chomping = true;
9835 while (parser->current.end < parser->end && chomping) {
9836 switch (*parser->current.end) {
9837 case ' ':
9838 case '\t':
9839 case '\f':
9840 case '\v':
9841 parser->current.end++;
9842 space_seen = true;
9843 break;
9844 case '\r':
9845 if (match_eol_offset(parser, 1)) {
9846 chomping = false;
9847 } else {
9848 pm_parser_warn(parser, PM_TOKEN_END(parser, &parser->current), 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
9849 parser->current.end++;
9850 space_seen = true;
9851 }
9852 break;
9853 case '\\': {
9854 size_t eol_length = match_eol_offset(parser, 1);
9855 if (eol_length) {
9856 if (parser->heredoc_end) {
9857 parser->current.end = parser->heredoc_end;
9858 parser->heredoc_end = NULL;
9859 } else {
9860 parser->current.end += eol_length + 1;
9861 pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
9862 space_seen = true;
9863 }
9864 } else if (pm_char_is_inline_whitespace(*parser->current.end)) {
9865 parser->current.end += 2;
9866 } else {
9867 chomping = false;
9868 }
9869
9870 break;
9871 }
9872 default:
9873 chomping = false;
9874 break;
9875 }
9876 }
9877
9878 // Next, we'll set to start of this token to be the current end.
9879 parser->current.start = parser->current.end;
9880
9881 // We'll check if we're at the end of the file. If we are, then we
9882 // need to return the EOF token.
9883 if (parser->current.end >= parser->end) {
9884 // We may be missing closing tokens. We should pop modes one by one
9885 // to do the appropriate cleanup like moving next_start for heredocs.
9886 // Only when no mode is remaining will we actually emit the EOF token.
9887 if (parser->lex_modes.current->mode != PM_LEX_DEFAULT) {
9888 lex_mode_pop(parser);
9889 goto switch_lex_modes;
9890 }
9891
9892 // If we hit EOF, but the EOF came immediately after a newline,
9893 // set the start of the token to the newline. This way any EOF
9894 // errors will be reported as happening on that line rather than
9895 // a line after. For example "foo(\n" should report an error
9896 // on line 1 even though EOF technically occurs on line 2.
9897 if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
9898 parser->current.start -= 1;
9899 }
9900 LEX(PM_TOKEN_EOF);
9901 }
9902
9903 // Finally, we'll check the current character to determine the next
9904 // token.
9905 switch (*parser->current.end++) {
9906 case '\0': // NUL or end of script
9907 case '\004': // ^D
9908 case '\032': // ^Z
9909 parser->current.end--;
9910 LEX(PM_TOKEN_EOF);
9911
9912 case '#': { // comments
9913 const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
9914 parser->current.end = ending == NULL ? parser->end : ending;
9915
9916 // If we found a comment while lexing, then we're going to
9917 // add it to the list of comments in the file and keep
9918 // lexing.
9919 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_INLINE);
9920 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9921
9922 if (ending) parser->current.end++;
9923 parser->current.type = PM_TOKEN_COMMENT;
9924 parser_lex_callback(parser);
9925
9926 // Here, parse the comment to see if it's a magic comment
9927 // and potentially change state on the parser.
9928 if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
9929 ptrdiff_t length = parser->current.end - parser->current.start;
9930
9931 // If we didn't find a magic comment within the first
9932 // pass and we're at the start of the file, then we need
9933 // to do another pass to potentially find other patterns
9934 // for encoding comments.
9935 if (length >= 10 && !parser->encoding_locked) {
9936 parser_lex_magic_comment_encoding(parser);
9937 }
9938 }
9939
9940 lexed_comment = true;
9941 }
9943 case '\r':
9944 case '\n': {
9945 parser->semantic_token_seen = semantic_token_seen & 0x1;
9946 size_t eol_length = match_eol_at(parser, parser->current.end - 1);
9947
9948 if (eol_length) {
9949 // The only way you can have carriage returns in this
9950 // particular loop is if you have a carriage return
9951 // followed by a newline. In that case we'll just skip
9952 // over the carriage return and continue lexing, in
9953 // order to make it so that the newline token
9954 // encapsulates both the carriage return and the
9955 // newline. Note that we need to check that we haven't
9956 // already lexed a comment here because that falls
9957 // through into here as well.
9958 if (!lexed_comment) {
9959 parser->current.end += eol_length - 1; // skip CR
9960 }
9961
9962 if (parser->heredoc_end == NULL) {
9963 pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
9964 }
9965 }
9966
9967 if (parser->heredoc_end) {
9968 parser_flush_heredoc_end(parser);
9969 }
9970
9971 // If this is an ignored newline, then we can continue lexing after
9972 // calling the callback with the ignored newline token.
9973 switch (lex_state_ignored_p(parser)) {
9974 case PM_IGNORED_NEWLINE_NONE:
9975 break;
9976 case PM_IGNORED_NEWLINE_PATTERN:
9977 if (parser->pattern_matching_newlines || parser->in_keyword_arg) {
9978 if (!lexed_comment) parser_lex_ignored_newline(parser);
9979 lex_state_set(parser, PM_LEX_STATE_BEG);
9980 parser->command_start = true;
9981 parser->current.type = PM_TOKEN_NEWLINE;
9982 return;
9983 }
9985 case PM_IGNORED_NEWLINE_ALL:
9986 if (!lexed_comment) parser_lex_ignored_newline(parser);
9987 lexed_comment = false;
9988 goto lex_next_token;
9989 }
9990
9991 // Here we need to look ahead and see if there is a call operator
9992 // (either . or &.) that starts the next line. If there is, then this
9993 // is going to become an ignored newline and we're going to instead
9994 // return the call operator.
9995 const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
9996 next_content += pm_strspn_inline_whitespace(next_content, parser->end - next_content);
9997
9998 if (next_content < parser->end) {
9999 // If we hit a comment after a newline, then we're going to check
10000 // if it's ignored or if it's followed by a method call ('.').
10001 // If it is, then we're going to call the
10002 // callback with an ignored newline and then continue lexing.
10003 // Otherwise we'll return a regular newline.
10004 if (next_content[0] == '#') {
10005 // Here we look for a "." or "&." following a "\n".
10006 const uint8_t *following = next_newline(next_content, parser->end - next_content);
10007
10008 while (following && (following + 1 < parser->end)) {
10009 following++;
10010 following += pm_strspn_inline_whitespace(following, parser->end - following);
10011
10012 // If this is not followed by a comment, then we can break out
10013 // of this loop.
10014 if (peek_at(parser, following) != '#') break;
10015
10016 // If there is a comment, then we need to find the end of the
10017 // comment and continue searching from there.
10018 following = next_newline(following, parser->end - following);
10019 }
10020
10021 // If the lex state was ignored, we will lex the
10022 // ignored newline.
10023 if (lex_state_ignored_p(parser)) {
10024 if (!lexed_comment) parser_lex_ignored_newline(parser);
10025 lexed_comment = false;
10026 goto lex_next_token;
10027 }
10028
10029 // If we hit a '.' or a '&.' we will lex the ignored
10030 // newline.
10031 if (following && (
10032 (peek_at(parser, following) == '.') ||
10033 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
10034 )) {
10035 if (!lexed_comment) parser_lex_ignored_newline(parser);
10036 lexed_comment = false;
10037 goto lex_next_token;
10038 }
10039
10040
10041 // If we are parsing as CRuby 4.0 or later and we
10042 // hit a '&&' or a '||' then we will lex the ignored
10043 // newline.
10044 if (
10046 following && (
10047 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '&') ||
10048 (peek_at(parser, following) == '|' && peek_at(parser, following + 1) == '|') ||
10049 (peek_at(parser, following) == 'a' && peek_at(parser, following + 1) == 'n' && peek_at(parser, following + 2) == 'd' && !char_is_identifier(parser, following + 3, parser->end - (following + 3))) ||
10050 (peek_at(parser, following) == 'o' && peek_at(parser, following + 1) == 'r' && !char_is_identifier(parser, following + 2, parser->end - (following + 2)))
10051 )
10052 ) {
10053 if (!lexed_comment) parser_lex_ignored_newline(parser);
10054 lexed_comment = false;
10055 goto lex_next_token;
10056 }
10057 }
10058
10059 // If we hit a . after a newline, then we're in a call chain and
10060 // we need to return the call operator.
10061 if (next_content[0] == '.') {
10062 // To match ripper, we need to emit an ignored newline even though
10063 // it's a real newline in the case that we have a beginless range
10064 // on a subsequent line.
10065 if (peek_at(parser, next_content + 1) == '.') {
10066 if (!lexed_comment) parser_lex_ignored_newline(parser);
10067 lex_state_set(parser, PM_LEX_STATE_BEG);
10068 parser->command_start = true;
10069 parser->current.type = PM_TOKEN_NEWLINE;
10070 return;
10071 }
10072
10073 if (!lexed_comment) parser_lex_ignored_newline(parser);
10074 lex_state_set(parser, PM_LEX_STATE_DOT);
10075 parser->current.start = next_content;
10076 parser->current.end = next_content + 1;
10077 parser->next_start = NULL;
10078 LEX(PM_TOKEN_DOT);
10079 }
10080
10081 // If we hit a &. after a newline, then we're in a call chain and
10082 // we need to return the call operator.
10083 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
10084 if (!lexed_comment) parser_lex_ignored_newline(parser);
10085 lex_state_set(parser, PM_LEX_STATE_DOT);
10086 parser->current.start = next_content;
10087 parser->current.end = next_content + 2;
10088 parser->next_start = NULL;
10089 LEX(PM_TOKEN_AMPERSAND_DOT);
10090 }
10091
10092 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
10093 // If we hit an && then we are in a logical chain
10094 // and we need to return the logical operator.
10095 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '&') {
10096 if (!lexed_comment) parser_lex_ignored_newline(parser);
10097 lex_state_set(parser, PM_LEX_STATE_BEG);
10098 parser->current.start = next_content;
10099 parser->current.end = next_content + 2;
10100 parser->next_start = NULL;
10101 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
10102 }
10103
10104 // If we hit a || then we are in a logical chain and
10105 // we need to return the logical operator.
10106 if (peek_at(parser, next_content) == '|' && peek_at(parser, next_content + 1) == '|') {
10107 if (!lexed_comment) parser_lex_ignored_newline(parser);
10108 lex_state_set(parser, PM_LEX_STATE_BEG);
10109 parser->current.start = next_content;
10110 parser->current.end = next_content + 2;
10111 parser->next_start = NULL;
10112 LEX(PM_TOKEN_PIPE_PIPE);
10113 }
10114
10115 // If we hit an 'and' then we are in a logical chain
10116 // and we need to return the logical operator.
10117 if (
10118 peek_at(parser, next_content) == 'a' &&
10119 peek_at(parser, next_content + 1) == 'n' &&
10120 peek_at(parser, next_content + 2) == 'd' &&
10121 !char_is_identifier(parser, next_content + 3, parser->end - (next_content + 3))
10122 ) {
10123 if (!lexed_comment) parser_lex_ignored_newline(parser);
10124 lex_state_set(parser, PM_LEX_STATE_BEG);
10125 parser->current.start = next_content;
10126 parser->current.end = next_content + 3;
10127 parser->next_start = NULL;
10128 parser->command_start = true;
10129 LEX(PM_TOKEN_KEYWORD_AND);
10130 }
10131
10132 // If we hit a 'or' then we are in a logical chain
10133 // and we need to return the logical operator.
10134 if (
10135 peek_at(parser, next_content) == 'o' &&
10136 peek_at(parser, next_content + 1) == 'r' &&
10137 !char_is_identifier(parser, next_content + 2, parser->end - (next_content + 2))
10138 ) {
10139 if (!lexed_comment) parser_lex_ignored_newline(parser);
10140 lex_state_set(parser, PM_LEX_STATE_BEG);
10141 parser->current.start = next_content;
10142 parser->current.end = next_content + 2;
10143 parser->next_start = NULL;
10144 parser->command_start = true;
10145 LEX(PM_TOKEN_KEYWORD_OR);
10146 }
10147 }
10148 }
10149
10150 // At this point we know this is a regular newline, and we can set the
10151 // necessary state and return the token.
10152 lex_state_set(parser, PM_LEX_STATE_BEG);
10153 parser->command_start = true;
10154 parser->current.type = PM_TOKEN_NEWLINE;
10155 if (!lexed_comment) parser_lex_callback(parser);
10156 return;
10157 }
10158
10159 // ,
10160 case ',':
10161 if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
10162 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
10163 }
10164
10165 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10166 LEX(PM_TOKEN_COMMA);
10167
10168 // (
10169 case '(': {
10170 pm_token_type_t type = PM_TOKEN_PARENTHESIS_LEFT;
10171
10172 if (space_seen && (lex_state_arg_p(parser) || parser->lex_state == (PM_LEX_STATE_END | PM_LEX_STATE_LABEL))) {
10173 type = PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES;
10174 }
10175
10176 parser->enclosure_nesting++;
10177 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10178 pm_do_loop_stack_push(parser, false);
10179 LEX(type);
10180 }
10181
10182 // )
10183 case ')':
10184 parser->enclosure_nesting--;
10185 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10186 pm_do_loop_stack_pop(parser);
10187 LEX(PM_TOKEN_PARENTHESIS_RIGHT);
10188
10189 // ;
10190 case ';':
10191 lex_state_set(parser, PM_LEX_STATE_BEG);
10192 parser->command_start = true;
10193 LEX(PM_TOKEN_SEMICOLON);
10194
10195 // [ [] []=
10196 case '[':
10197 parser->enclosure_nesting++;
10198 pm_token_type_t type = PM_TOKEN_BRACKET_LEFT;
10199
10200 if (lex_state_operator_p(parser)) {
10201 if (match(parser, ']')) {
10202 parser->enclosure_nesting--;
10203 lex_state_set(parser, PM_LEX_STATE_ARG);
10204 LEX(match(parser, '=') ? PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL : PM_TOKEN_BRACKET_LEFT_RIGHT);
10205 }
10206
10207 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABEL);
10208 LEX(type);
10209 }
10210
10211 if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, PM_LEX_STATE_LABELED)))) {
10212 type = PM_TOKEN_BRACKET_LEFT_ARRAY;
10213 }
10214
10215 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10216 pm_do_loop_stack_push(parser, false);
10217 LEX(type);
10218
10219 // ]
10220 case ']':
10221 parser->enclosure_nesting--;
10222 lex_state_set(parser, PM_LEX_STATE_END);
10223 pm_do_loop_stack_pop(parser);
10224 LEX(PM_TOKEN_BRACKET_RIGHT);
10225
10226 // {
10227 case '{': {
10228 pm_token_type_t type = PM_TOKEN_BRACE_LEFT;
10229
10230 if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
10231 // This { begins a lambda
10232 parser->command_start = true;
10233 lex_state_set(parser, PM_LEX_STATE_BEG);
10234 type = PM_TOKEN_LAMBDA_BEGIN;
10235 } else if (lex_state_p(parser, PM_LEX_STATE_LABELED)) {
10236 // This { begins a hash literal
10237 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10238 } else if (lex_state_p(parser, PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_END | PM_LEX_STATE_ENDFN)) {
10239 // This { begins a block
10240 parser->command_start = true;
10241 lex_state_set(parser, PM_LEX_STATE_BEG);
10242 } else if (lex_state_p(parser, PM_LEX_STATE_ENDARG)) {
10243 // This { begins a block on a command
10244 parser->command_start = true;
10245 lex_state_set(parser, PM_LEX_STATE_BEG);
10246 } else {
10247 // This { begins a hash literal
10248 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10249 }
10250
10251 parser->enclosure_nesting++;
10252 parser->brace_nesting++;
10253 pm_do_loop_stack_push(parser, false);
10254
10255 LEX(type);
10256 }
10257
10258 // }
10259 case '}':
10260 parser->enclosure_nesting--;
10261 pm_do_loop_stack_pop(parser);
10262
10263 if ((parser->lex_modes.current->mode == PM_LEX_EMBEXPR) && (parser->brace_nesting == 0)) {
10264 lex_mode_pop(parser);
10265 LEX(PM_TOKEN_EMBEXPR_END);
10266 }
10267
10268 parser->brace_nesting--;
10269 lex_state_set(parser, PM_LEX_STATE_END);
10270 LEX(PM_TOKEN_BRACE_RIGHT);
10271
10272 // * ** **= *=
10273 case '*': {
10274 if (match(parser, '*')) {
10275 if (match(parser, '=')) {
10276 lex_state_set(parser, PM_LEX_STATE_BEG);
10277 LEX(PM_TOKEN_STAR_STAR_EQUAL);
10278 }
10279
10280 pm_token_type_t type = PM_TOKEN_STAR_STAR;
10281
10282 if (lex_state_spcarg_p(parser, space_seen)) {
10283 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR);
10284 type = PM_TOKEN_USTAR_STAR;
10285 } else if (lex_state_beg_p(parser)) {
10286 type = PM_TOKEN_USTAR_STAR;
10287 } else if (ambiguous_operator_p(parser, space_seen)) {
10288 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
10289 }
10290
10291 if (lex_state_operator_p(parser)) {
10292 lex_state_set(parser, PM_LEX_STATE_ARG);
10293 } else {
10294 lex_state_set(parser, PM_LEX_STATE_BEG);
10295 }
10296
10297 LEX(type);
10298 }
10299
10300 if (match(parser, '=')) {
10301 lex_state_set(parser, PM_LEX_STATE_BEG);
10302 LEX(PM_TOKEN_STAR_EQUAL);
10303 }
10304
10305 pm_token_type_t type = PM_TOKEN_STAR;
10306
10307 if (lex_state_spcarg_p(parser, space_seen)) {
10308 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
10309 type = PM_TOKEN_USTAR;
10310 } else if (lex_state_beg_p(parser)) {
10311 type = PM_TOKEN_USTAR;
10312 } else if (ambiguous_operator_p(parser, space_seen)) {
10313 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
10314 }
10315
10316 if (lex_state_operator_p(parser)) {
10317 lex_state_set(parser, PM_LEX_STATE_ARG);
10318 } else {
10319 lex_state_set(parser, PM_LEX_STATE_BEG);
10320 }
10321
10322 LEX(type);
10323 }
10324
10325 // ! != !~ !@
10326 case '!':
10327 if (lex_state_operator_p(parser)) {
10328 lex_state_set(parser, PM_LEX_STATE_ARG);
10329 if (match(parser, '@')) {
10330 LEX(PM_TOKEN_BANG);
10331 }
10332 } else {
10333 lex_state_set(parser, PM_LEX_STATE_BEG);
10334 }
10335
10336 if (match(parser, '=')) {
10337 LEX(PM_TOKEN_BANG_EQUAL);
10338 }
10339
10340 if (match(parser, '~')) {
10341 LEX(PM_TOKEN_BANG_TILDE);
10342 }
10343
10344 LEX(PM_TOKEN_BANG);
10345
10346 // = => =~ == === =begin
10347 case '=':
10348 if (
10349 current_token_starts_line(parser) &&
10350 (parser->current.end + 5 <= parser->end) &&
10351 memcmp(parser->current.end, "begin", 5) == 0 &&
10352 (pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) == '\0'))
10353 ) {
10354 pm_token_type_t type = lex_embdoc(parser);
10355 if (type == PM_TOKEN_EOF) {
10356 LEX(type);
10357 }
10358
10359 goto lex_next_token;
10360 }
10361
10362 if (lex_state_operator_p(parser)) {
10363 lex_state_set(parser, PM_LEX_STATE_ARG);
10364 } else {
10365 lex_state_set(parser, PM_LEX_STATE_BEG);
10366 }
10367
10368 if (match(parser, '>')) {
10369 LEX(PM_TOKEN_EQUAL_GREATER);
10370 }
10371
10372 if (match(parser, '~')) {
10373 LEX(PM_TOKEN_EQUAL_TILDE);
10374 }
10375
10376 if (match(parser, '=')) {
10377 LEX(match(parser, '=') ? PM_TOKEN_EQUAL_EQUAL_EQUAL : PM_TOKEN_EQUAL_EQUAL);
10378 }
10379
10380 LEX(PM_TOKEN_EQUAL);
10381
10382 // < << <<= <= <=>
10383 case '<':
10384 if (match(parser, '<')) {
10385 if (
10386 !lex_state_p(parser, PM_LEX_STATE_DOT | PM_LEX_STATE_CLASS) &&
10387 !lex_state_end_p(parser) &&
10388 (!lex_state_p(parser, PM_LEX_STATE_ARG_ANY) || lex_state_p(parser, PM_LEX_STATE_LABELED) || space_seen)
10389 ) {
10390 const uint8_t *end = parser->current.end;
10391
10392 pm_heredoc_quote_t quote = PM_HEREDOC_QUOTE_NONE;
10393 pm_heredoc_indent_t indent = PM_HEREDOC_INDENT_NONE;
10394
10395 if (match(parser, '-')) {
10396 indent = PM_HEREDOC_INDENT_DASH;
10397 }
10398 else if (match(parser, '~')) {
10399 indent = PM_HEREDOC_INDENT_TILDE;
10400 }
10401
10402 if (match(parser, '`')) {
10403 quote = PM_HEREDOC_QUOTE_BACKTICK;
10404 }
10405 else if (match(parser, '"')) {
10406 quote = PM_HEREDOC_QUOTE_DOUBLE;
10407 }
10408 else if (match(parser, '\'')) {
10409 quote = PM_HEREDOC_QUOTE_SINGLE;
10410 }
10411
10412 const uint8_t *ident_start = parser->current.end;
10413 size_t width = 0;
10414
10415 if (parser->current.end >= parser->end) {
10416 parser->current.end = end;
10417 } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) == 0) {
10418 parser->current.end = end;
10419 } else {
10420 if (quote == PM_HEREDOC_QUOTE_NONE) {
10421 parser->current.end += width;
10422
10423 while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end))) {
10424 parser->current.end += width;
10425 }
10426 } else {
10427 // If we have quotes, then we're going to go until we find the
10428 // end quote.
10429 while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
10430 if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
10431 parser->current.end++;
10432 }
10433 }
10434
10435 size_t ident_length = (size_t) (parser->current.end - ident_start);
10436 bool ident_error = false;
10437
10438 if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
10439 pm_parser_err(parser, U32(ident_start - parser->start), U32(ident_length), PM_ERR_HEREDOC_IDENTIFIER);
10440 ident_error = true;
10441 }
10442
10443 parser->explicit_encoding = NULL;
10444 lex_mode_push(parser, (pm_lex_mode_t) {
10445 .mode = PM_LEX_HEREDOC,
10446 .as.heredoc = {
10447 .base = {
10448 .ident_start = ident_start,
10449 .ident_length = ident_length,
10450 .quote = quote,
10451 .indent = indent
10452 },
10453 .next_start = parser->current.end,
10454 .common_whitespace = NULL,
10455 .line_continuation = false
10456 }
10457 });
10458
10459 if (parser->heredoc_end == NULL) {
10460 const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
10461
10462 if (body_start == NULL) {
10463 // If there is no newline after the heredoc identifier, then
10464 // this is not a valid heredoc declaration. In this case we
10465 // will add an error, but we will still return a heredoc
10466 // start.
10467 if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
10468 body_start = parser->end;
10469 } else {
10470 // Otherwise, we want to indicate that the body of the
10471 // heredoc starts on the character after the next newline.
10472 pm_line_offset_list_append(&parser->line_offsets, U32(body_start - parser->start + 1));
10473 body_start++;
10474 }
10475
10476 parser->next_start = body_start;
10477 } else {
10478 parser->next_start = parser->heredoc_end;
10479 }
10480
10481 LEX(PM_TOKEN_HEREDOC_START);
10482 }
10483 }
10484
10485 if (match(parser, '=')) {
10486 lex_state_set(parser, PM_LEX_STATE_BEG);
10487 LEX(PM_TOKEN_LESS_LESS_EQUAL);
10488 }
10489
10490 if (ambiguous_operator_p(parser, space_seen)) {
10491 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
10492 }
10493
10494 if (lex_state_operator_p(parser)) {
10495 lex_state_set(parser, PM_LEX_STATE_ARG);
10496 } else {
10497 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
10498 lex_state_set(parser, PM_LEX_STATE_BEG);
10499 }
10500
10501 LEX(PM_TOKEN_LESS_LESS);
10502 }
10503
10504 if (lex_state_operator_p(parser)) {
10505 lex_state_set(parser, PM_LEX_STATE_ARG);
10506 } else {
10507 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
10508 lex_state_set(parser, PM_LEX_STATE_BEG);
10509 }
10510
10511 if (match(parser, '=')) {
10512 if (match(parser, '>')) {
10513 LEX(PM_TOKEN_LESS_EQUAL_GREATER);
10514 }
10515
10516 LEX(PM_TOKEN_LESS_EQUAL);
10517 }
10518
10519 LEX(PM_TOKEN_LESS);
10520
10521 // > >> >>= >=
10522 case '>':
10523 if (match(parser, '>')) {
10524 if (lex_state_operator_p(parser)) {
10525 lex_state_set(parser, PM_LEX_STATE_ARG);
10526 } else {
10527 lex_state_set(parser, PM_LEX_STATE_BEG);
10528 }
10529 LEX(match(parser, '=') ? PM_TOKEN_GREATER_GREATER_EQUAL : PM_TOKEN_GREATER_GREATER);
10530 }
10531
10532 if (lex_state_operator_p(parser)) {
10533 lex_state_set(parser, PM_LEX_STATE_ARG);
10534 } else {
10535 lex_state_set(parser, PM_LEX_STATE_BEG);
10536 }
10537
10538 LEX(match(parser, '=') ? PM_TOKEN_GREATER_EQUAL : PM_TOKEN_GREATER);
10539
10540 // double-quoted string literal
10541 case '"': {
10542 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
10543 lex_mode_push_string(parser, true, label_allowed, '\0', '"');
10544 LEX(PM_TOKEN_STRING_BEGIN);
10545 }
10546
10547 // xstring literal
10548 case '`': {
10549 if (lex_state_p(parser, PM_LEX_STATE_FNAME)) {
10550 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10551 LEX(PM_TOKEN_BACKTICK);
10552 }
10553
10554 if (lex_state_p(parser, PM_LEX_STATE_DOT)) {
10555 if (previous_command_start) {
10556 lex_state_set(parser, PM_LEX_STATE_CMDARG);
10557 } else {
10558 lex_state_set(parser, PM_LEX_STATE_ARG);
10559 }
10560
10561 LEX(PM_TOKEN_BACKTICK);
10562 }
10563
10564 lex_mode_push_string(parser, true, false, '\0', '`');
10565 LEX(PM_TOKEN_BACKTICK);
10566 }
10567
10568 // single-quoted string literal
10569 case '\'': {
10570 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
10571 lex_mode_push_string(parser, false, label_allowed, '\0', '\'');
10572 LEX(PM_TOKEN_STRING_BEGIN);
10573 }
10574
10575 // ? character literal
10576 case '?':
10577 LEX(lex_question_mark(parser));
10578
10579 // & && &&= &=
10580 case '&': {
10581 if (match(parser, '&')) {
10582 lex_state_set(parser, PM_LEX_STATE_BEG);
10583
10584 if (match(parser, '=')) {
10585 LEX(PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
10586 }
10587
10588 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
10589 }
10590
10591 if (match(parser, '=')) {
10592 lex_state_set(parser, PM_LEX_STATE_BEG);
10593 LEX(PM_TOKEN_AMPERSAND_EQUAL);
10594 }
10595
10596 if (match(parser, '.')) {
10597 lex_state_set(parser, PM_LEX_STATE_DOT);
10598 LEX(PM_TOKEN_AMPERSAND_DOT);
10599 }
10600
10601 pm_token_type_t type = PM_TOKEN_AMPERSAND;
10602 if (lex_state_spcarg_p(parser, space_seen)) {
10603 if ((peek(parser) != ':') || (peek_offset(parser, 1) == '\0')) {
10604 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
10605 } else {
10606 const uint8_t delim = peek_offset(parser, 1);
10607
10608 if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1, parser->end - (parser->current.end + 1))) {
10609 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
10610 }
10611 }
10612
10613 type = PM_TOKEN_UAMPERSAND;
10614 } else if (lex_state_beg_p(parser)) {
10615 type = PM_TOKEN_UAMPERSAND;
10616 } else if (ambiguous_operator_p(parser, space_seen)) {
10617 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
10618 }
10619
10620 if (lex_state_operator_p(parser)) {
10621 lex_state_set(parser, PM_LEX_STATE_ARG);
10622 } else {
10623 lex_state_set(parser, PM_LEX_STATE_BEG);
10624 }
10625
10626 LEX(type);
10627 }
10628
10629 // | || ||= |=
10630 case '|':
10631 if (match(parser, '|')) {
10632 if (match(parser, '=')) {
10633 lex_state_set(parser, PM_LEX_STATE_BEG);
10634 LEX(PM_TOKEN_PIPE_PIPE_EQUAL);
10635 }
10636
10637 if (lex_state_p(parser, PM_LEX_STATE_BEG)) {
10638 parser->current.end--;
10639 LEX(PM_TOKEN_PIPE);
10640 }
10641
10642 lex_state_set(parser, PM_LEX_STATE_BEG);
10643 LEX(PM_TOKEN_PIPE_PIPE);
10644 }
10645
10646 if (match(parser, '=')) {
10647 lex_state_set(parser, PM_LEX_STATE_BEG);
10648 LEX(PM_TOKEN_PIPE_EQUAL);
10649 }
10650
10651 if (lex_state_operator_p(parser)) {
10652 lex_state_set(parser, PM_LEX_STATE_ARG);
10653 } else {
10654 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10655 }
10656
10657 LEX(PM_TOKEN_PIPE);
10658
10659 // + += +@
10660 case '+': {
10661 if (lex_state_operator_p(parser)) {
10662 lex_state_set(parser, PM_LEX_STATE_ARG);
10663
10664 if (match(parser, '@')) {
10665 LEX(PM_TOKEN_UPLUS);
10666 }
10667
10668 LEX(PM_TOKEN_PLUS);
10669 }
10670
10671 if (match(parser, '=')) {
10672 lex_state_set(parser, PM_LEX_STATE_BEG);
10673 LEX(PM_TOKEN_PLUS_EQUAL);
10674 }
10675
10676 if (
10677 lex_state_beg_p(parser) ||
10678 (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
10679 ) {
10680 lex_state_set(parser, PM_LEX_STATE_BEG);
10681
10682 if (pm_char_is_decimal_digit(peek(parser))) {
10683 parser->current.end++;
10684 pm_token_type_t type = lex_numeric(parser);
10685 lex_state_set(parser, PM_LEX_STATE_END);
10686 LEX(type);
10687 }
10688
10689 LEX(PM_TOKEN_UPLUS);
10690 }
10691
10692 if (ambiguous_operator_p(parser, space_seen)) {
10693 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
10694 }
10695
10696 lex_state_set(parser, PM_LEX_STATE_BEG);
10697 LEX(PM_TOKEN_PLUS);
10698 }
10699
10700 // - -= -@
10701 case '-': {
10702 if (lex_state_operator_p(parser)) {
10703 lex_state_set(parser, PM_LEX_STATE_ARG);
10704
10705 if (match(parser, '@')) {
10706 LEX(PM_TOKEN_UMINUS);
10707 }
10708
10709 LEX(PM_TOKEN_MINUS);
10710 }
10711
10712 if (match(parser, '=')) {
10713 lex_state_set(parser, PM_LEX_STATE_BEG);
10714 LEX(PM_TOKEN_MINUS_EQUAL);
10715 }
10716
10717 if (match(parser, '>')) {
10718 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10719 LEX(PM_TOKEN_MINUS_GREATER);
10720 }
10721
10722 bool spcarg = lex_state_spcarg_p(parser, space_seen);
10723 bool is_beg = lex_state_beg_p(parser);
10724 if (!is_beg && spcarg) {
10725 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
10726 }
10727
10728 if (is_beg || spcarg) {
10729 lex_state_set(parser, PM_LEX_STATE_BEG);
10730 LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
10731 }
10732
10733 if (ambiguous_operator_p(parser, space_seen)) {
10734 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
10735 }
10736
10737 lex_state_set(parser, PM_LEX_STATE_BEG);
10738 LEX(PM_TOKEN_MINUS);
10739 }
10740
10741 // . .. ...
10742 case '.': {
10743 bool beg_p = lex_state_beg_p(parser);
10744
10745 if (match(parser, '.')) {
10746 if (match(parser, '.')) {
10747 // If we're _not_ inside a range within default parameters
10748 if (!context_p(parser, PM_CONTEXT_DEFAULT_PARAMS) && context_p(parser, PM_CONTEXT_DEF_PARAMS)) {
10749 if (lex_state_p(parser, PM_LEX_STATE_END)) {
10750 lex_state_set(parser, PM_LEX_STATE_BEG);
10751 } else {
10752 lex_state_set(parser, PM_LEX_STATE_ENDARG);
10753 }
10754 LEX(PM_TOKEN_UDOT_DOT_DOT);
10755 }
10756
10757 if (parser->enclosure_nesting == 0 && parser_end_of_line_p(parser)) {
10758 pm_parser_warn_token(parser, &parser->current, PM_WARN_DOT_DOT_DOT_EOL);
10759 }
10760
10761 lex_state_set(parser, PM_LEX_STATE_BEG);
10762 LEX(beg_p ? PM_TOKEN_UDOT_DOT_DOT : PM_TOKEN_DOT_DOT_DOT);
10763 }
10764
10765 lex_state_set(parser, PM_LEX_STATE_BEG);
10766 LEX(beg_p ? PM_TOKEN_UDOT_DOT : PM_TOKEN_DOT_DOT);
10767 }
10768
10769 lex_state_set(parser, PM_LEX_STATE_DOT);
10770 LEX(PM_TOKEN_DOT);
10771 }
10772
10773 // integer
10774 case '0':
10775 case '1':
10776 case '2':
10777 case '3':
10778 case '4':
10779 case '5':
10780 case '6':
10781 case '7':
10782 case '8':
10783 case '9': {
10784 pm_token_type_t type = lex_numeric(parser);
10785 lex_state_set(parser, PM_LEX_STATE_END);
10786 LEX(type);
10787 }
10788
10789 // :: symbol
10790 case ':':
10791 if (match(parser, ':')) {
10792 if (lex_state_beg_p(parser) || lex_state_p(parser, PM_LEX_STATE_CLASS) || (lex_state_p(parser, PM_LEX_STATE_ARG_ANY) && space_seen)) {
10793 lex_state_set(parser, PM_LEX_STATE_BEG);
10794 LEX(PM_TOKEN_UCOLON_COLON);
10795 }
10796
10797 lex_state_set(parser, PM_LEX_STATE_DOT);
10798 LEX(PM_TOKEN_COLON_COLON);
10799 }
10800
10801 if (lex_state_end_p(parser) || pm_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
10802 lex_state_set(parser, PM_LEX_STATE_BEG);
10803 LEX(PM_TOKEN_COLON);
10804 }
10805
10806 if (peek(parser) == '"' || peek(parser) == '\'') {
10807 lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
10808 parser->current.end++;
10809 }
10810
10811 lex_state_set(parser, PM_LEX_STATE_FNAME);
10812 LEX(PM_TOKEN_SYMBOL_BEGIN);
10813
10814 // / /=
10815 case '/':
10816 if (lex_state_beg_p(parser)) {
10817 lex_mode_push_regexp(parser, '\0', '/');
10818 LEX(PM_TOKEN_REGEXP_BEGIN);
10819 }
10820
10821 if (match(parser, '=')) {
10822 lex_state_set(parser, PM_LEX_STATE_BEG);
10823 LEX(PM_TOKEN_SLASH_EQUAL);
10824 }
10825
10826 if (lex_state_spcarg_p(parser, space_seen)) {
10827 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
10828 lex_mode_push_regexp(parser, '\0', '/');
10829 LEX(PM_TOKEN_REGEXP_BEGIN);
10830 }
10831
10832 if (ambiguous_operator_p(parser, space_seen)) {
10833 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
10834 }
10835
10836 if (lex_state_operator_p(parser)) {
10837 lex_state_set(parser, PM_LEX_STATE_ARG);
10838 } else {
10839 lex_state_set(parser, PM_LEX_STATE_BEG);
10840 }
10841
10842 LEX(PM_TOKEN_SLASH);
10843
10844 // ^ ^=
10845 case '^':
10846 if (lex_state_operator_p(parser)) {
10847 lex_state_set(parser, PM_LEX_STATE_ARG);
10848 } else {
10849 lex_state_set(parser, PM_LEX_STATE_BEG);
10850 }
10851 LEX(match(parser, '=') ? PM_TOKEN_CARET_EQUAL : PM_TOKEN_CARET);
10852
10853 // ~ ~@
10854 case '~':
10855 if (lex_state_operator_p(parser)) {
10856 (void) match(parser, '@');
10857 lex_state_set(parser, PM_LEX_STATE_ARG);
10858 } else {
10859 lex_state_set(parser, PM_LEX_STATE_BEG);
10860 }
10861
10862 LEX(PM_TOKEN_TILDE);
10863
10864 // % %= %i %I %q %Q %w %W
10865 case '%': {
10866 // If there is no subsequent character then we have an
10867 // invalid token. We're going to say it's the percent
10868 // operator because we don't want to move into the string
10869 // lex mode unnecessarily.
10870 if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
10871 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
10872 LEX(PM_TOKEN_PERCENT);
10873 }
10874
10875 if (!lex_state_beg_p(parser) && match(parser, '=')) {
10876 lex_state_set(parser, PM_LEX_STATE_BEG);
10877 LEX(PM_TOKEN_PERCENT_EQUAL);
10878 } else if (
10879 lex_state_beg_p(parser) ||
10880 (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
10881 lex_state_spcarg_p(parser, space_seen)
10882 ) {
10883 if (!parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end)) {
10884 if (*parser->current.end >= 0x80) {
10885 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
10886 }
10887
10888 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10889 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10890 LEX(PM_TOKEN_STRING_BEGIN);
10891 }
10892
10893 // Delimiters for %-literals cannot be alphanumeric. We
10894 // validate that here.
10895 uint8_t delimiter = peek_offset(parser, 1);
10896 if (delimiter >= 0x80 || parser->encoding->alnum_char(&delimiter, 1)) {
10897 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
10898 goto lex_next_token;
10899 }
10900
10901 switch (peek(parser)) {
10902 case 'i': {
10903 parser->current.end++;
10904
10905 if (parser->current.end < parser->end) {
10906 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
10907 } else {
10908 lex_mode_push_list_eof(parser);
10909 }
10910
10911 LEX(PM_TOKEN_PERCENT_LOWER_I);
10912 }
10913 case 'I': {
10914 parser->current.end++;
10915
10916 if (parser->current.end < parser->end) {
10917 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
10918 } else {
10919 lex_mode_push_list_eof(parser);
10920 }
10921
10922 LEX(PM_TOKEN_PERCENT_UPPER_I);
10923 }
10924 case 'r': {
10925 parser->current.end++;
10926
10927 if (parser->current.end < parser->end) {
10928 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10929 lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10930 } else {
10931 lex_mode_push_regexp(parser, '\0', '\0');
10932 }
10933
10934 LEX(PM_TOKEN_REGEXP_BEGIN);
10935 }
10936 case 'q': {
10937 parser->current.end++;
10938
10939 if (parser->current.end < parser->end) {
10940 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10941 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10942 } else {
10943 lex_mode_push_string_eof(parser);
10944 }
10945
10946 LEX(PM_TOKEN_STRING_BEGIN);
10947 }
10948 case 'Q': {
10949 parser->current.end++;
10950
10951 if (parser->current.end < parser->end) {
10952 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10953 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10954 } else {
10955 lex_mode_push_string_eof(parser);
10956 }
10957
10958 LEX(PM_TOKEN_STRING_BEGIN);
10959 }
10960 case 's': {
10961 parser->current.end++;
10962
10963 if (parser->current.end < parser->end) {
10964 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10965 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10966 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
10967 } else {
10968 lex_mode_push_string_eof(parser);
10969 }
10970
10971 LEX(PM_TOKEN_SYMBOL_BEGIN);
10972 }
10973 case 'w': {
10974 parser->current.end++;
10975
10976 if (parser->current.end < parser->end) {
10977 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
10978 } else {
10979 lex_mode_push_list_eof(parser);
10980 }
10981
10982 LEX(PM_TOKEN_PERCENT_LOWER_W);
10983 }
10984 case 'W': {
10985 parser->current.end++;
10986
10987 if (parser->current.end < parser->end) {
10988 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
10989 } else {
10990 lex_mode_push_list_eof(parser);
10991 }
10992
10993 LEX(PM_TOKEN_PERCENT_UPPER_W);
10994 }
10995 case 'x': {
10996 parser->current.end++;
10997
10998 if (parser->current.end < parser->end) {
10999 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11000 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11001 } else {
11002 lex_mode_push_string_eof(parser);
11003 }
11004
11005 LEX(PM_TOKEN_PERCENT_LOWER_X);
11006 }
11007 default:
11008 // If we get to this point, then we have a % that is completely
11009 // unparsable. In this case we'll just drop it from the parser
11010 // and skip past it and hope that the next token is something
11011 // that we can parse.
11012 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11013 goto lex_next_token;
11014 }
11015 }
11016
11017 if (ambiguous_operator_p(parser, space_seen)) {
11018 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
11019 }
11020
11021 lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
11022 LEX(PM_TOKEN_PERCENT);
11023 }
11024
11025 // global variable
11026 case '$': {
11027 pm_token_type_t type = lex_global_variable(parser);
11028
11029 // If we're lexing an embedded variable, then we need to pop back into
11030 // the parent lex context.
11031 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
11032 lex_mode_pop(parser);
11033 }
11034
11035 lex_state_set(parser, PM_LEX_STATE_END);
11036 LEX(type);
11037 }
11038
11039 // instance variable, class variable
11040 case '@':
11041 lex_state_set(parser, parser->lex_state & PM_LEX_STATE_FNAME ? PM_LEX_STATE_ENDFN : PM_LEX_STATE_END);
11042 LEX(lex_at_variable(parser));
11043
11044 default: {
11045 if (*parser->current.start != '_') {
11046 size_t width = char_is_identifier_start(parser, parser->current.start, parser->end - parser->current.start);
11047
11048 // If this isn't the beginning of an identifier, then
11049 // it's an invalid token as we've exhausted all of the
11050 // other options. We'll skip past it and return the next
11051 // token after adding an appropriate error message.
11052 if (!width) {
11053 if (*parser->current.start >= 0x80) {
11054 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
11055 } else if (*parser->current.start == '\\') {
11056 switch (peek_at(parser, parser->current.start + 1)) {
11057 case ' ':
11058 parser->current.end++;
11059 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
11060 break;
11061 case '\f':
11062 parser->current.end++;
11063 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
11064 break;
11065 case '\t':
11066 parser->current.end++;
11067 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
11068 break;
11069 case '\v':
11070 parser->current.end++;
11071 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
11072 break;
11073 case '\r':
11074 if (peek_at(parser, parser->current.start + 2) != '\n') {
11075 parser->current.end++;
11076 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
11077 break;
11078 }
11080 default:
11081 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
11082 break;
11083 }
11084 } else if (char_is_ascii_printable(*parser->current.start)) {
11085 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
11086 } else {
11087 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
11088 }
11089
11090 goto lex_next_token;
11091 }
11092
11093 parser->current.end = parser->current.start + width;
11094 }
11095
11096 pm_token_type_t type = lex_identifier(parser, previous_command_start);
11097
11098 // If we've hit a __END__ and it was at the start of the
11099 // line or the start of the file and it is followed by
11100 // either a \n or a \r\n, then this is the last token of the
11101 // file.
11102 if (
11103 ((parser->current.end - parser->current.start) == 7) &&
11104 current_token_starts_line(parser) &&
11105 (memcmp(parser->current.start, "__END__", 7) == 0) &&
11106 (parser->current.end == parser->end || match_eol(parser))
11107 ) {
11108 // Since we know we're about to add an __END__ comment,
11109 // we know we need to add all of the newlines to get the
11110 // correct column information for it.
11111 const uint8_t *cursor = parser->current.end;
11112 while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
11113 pm_line_offset_list_append(&parser->line_offsets, U32(++cursor - parser->start));
11114 }
11115
11116 parser->current.end = parser->end;
11117 parser->current.type = PM_TOKEN___END__;
11118 parser_lex_callback(parser);
11119
11120 parser->data_loc.start = PM_TOKEN_START(parser, &parser->current);
11121 parser->data_loc.length = PM_TOKEN_LENGTH(&parser->current);
11122
11123 LEX(PM_TOKEN_EOF);
11124 }
11125
11126 pm_lex_state_t last_state = parser->lex_state;
11127
11128 if (type == PM_TOKEN_IDENTIFIER || type == PM_TOKEN_CONSTANT || type == PM_TOKEN_METHOD_NAME) {
11129 if (lex_state_p(parser, PM_LEX_STATE_BEG_ANY | PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_DOT)) {
11130 if (previous_command_start) {
11131 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11132 } else {
11133 lex_state_set(parser, PM_LEX_STATE_ARG);
11134 }
11135 } else if (parser->lex_state == PM_LEX_STATE_FNAME) {
11136 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11137 } else {
11138 lex_state_set(parser, PM_LEX_STATE_END);
11139 }
11140 }
11141
11142 if (
11143 !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
11144 (type == PM_TOKEN_IDENTIFIER) &&
11145 ((pm_parser_local_depth(parser, &parser->current) != -1) ||
11146 pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)))
11147 ) {
11148 lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
11149 }
11150
11151 LEX(type);
11152 }
11153 }
11154 }
11155 case PM_LEX_LIST: {
11156 if (parser->next_start != NULL) {
11157 parser->current.end = parser->next_start;
11158 parser->next_start = NULL;
11159 }
11160
11161 // First we'll set the beginning of the token.
11162 parser->current.start = parser->current.end;
11163
11164 // If there's any whitespace at the start of the list, then we're
11165 // going to trim it off the beginning and create a new token.
11166 size_t whitespace;
11167
11168 if (parser->heredoc_end) {
11169 whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
11170 if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') {
11171 whitespace += 1;
11172 }
11173 } else {
11174 whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
11175 }
11176
11177 if (whitespace > 0) {
11178 parser->current.end += whitespace;
11179 if (peek_offset(parser, -1) == '\n') {
11180 // mutates next_start
11181 parser_flush_heredoc_end(parser);
11182 }
11183 LEX(PM_TOKEN_WORDS_SEP);
11184 }
11185
11186 // We'll check if we're at the end of the file. If we are, then we
11187 // need to return the EOF token.
11188 if (parser->current.end >= parser->end) {
11189 LEX(PM_TOKEN_EOF);
11190 }
11191
11192 // Here we'll get a list of the places where strpbrk should break,
11193 // and then find the first one.
11194 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11195 const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
11196 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11197
11198 // If we haven't found an escape yet, then this buffer will be
11199 // unallocated since we can refer directly to the source string.
11200 pm_token_buffer_t token_buffer = { 0 };
11201
11202 while (breakpoint != NULL) {
11203 // If we hit whitespace, then we must have received content by
11204 // now, so we can return an element of the list.
11205 if (pm_char_is_whitespace(*breakpoint)) {
11206 parser->current.end = breakpoint;
11207 pm_token_buffer_flush(parser, &token_buffer);
11208 LEX(PM_TOKEN_STRING_CONTENT);
11209 }
11210
11211 // If we hit the terminator, we need to check which token to
11212 // return.
11213 if (*breakpoint == lex_mode->as.list.terminator) {
11214 // If this terminator doesn't actually close the list, then
11215 // we need to continue on past it.
11216 if (lex_mode->as.list.nesting > 0) {
11217 parser->current.end = breakpoint + 1;
11218 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11219 lex_mode->as.list.nesting--;
11220 continue;
11221 }
11222
11223 // If we've hit the terminator and we've already skipped
11224 // past content, then we can return a list node.
11225 if (breakpoint > parser->current.start) {
11226 parser->current.end = breakpoint;
11227 pm_token_buffer_flush(parser, &token_buffer);
11228 LEX(PM_TOKEN_STRING_CONTENT);
11229 }
11230
11231 // Otherwise, switch back to the default state and return
11232 // the end of the list.
11233 parser->current.end = breakpoint + 1;
11234 lex_mode_pop(parser);
11235 lex_state_set(parser, PM_LEX_STATE_END);
11236 LEX(PM_TOKEN_STRING_END);
11237 }
11238
11239 // If we hit a null byte, skip directly past it.
11240 if (*breakpoint == '\0') {
11241 breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
11242 continue;
11243 }
11244
11245 // If we hit escapes, then we need to treat the next token
11246 // literally. In this case we'll skip past the next character
11247 // and find the next breakpoint.
11248 if (*breakpoint == '\\') {
11249 parser->current.end = breakpoint + 1;
11250
11251 // If we've hit the end of the file, then break out of the
11252 // loop by setting the breakpoint to NULL.
11253 if (parser->current.end == parser->end) {
11254 breakpoint = NULL;
11255 continue;
11256 }
11257
11258 pm_token_buffer_escape(parser, &token_buffer);
11259 uint8_t peeked = peek(parser);
11260
11261 switch (peeked) {
11262 case ' ':
11263 case '\f':
11264 case '\t':
11265 case '\v':
11266 case '\\':
11267 pm_token_buffer_push_byte(&token_buffer, peeked);
11268 parser->current.end++;
11269 break;
11270 case '\r':
11271 parser->current.end++;
11272 if (peek(parser) != '\n') {
11273 pm_token_buffer_push_byte(&token_buffer, '\r');
11274 break;
11275 }
11277 case '\n':
11278 pm_token_buffer_push_byte(&token_buffer, '\n');
11279
11280 if (parser->heredoc_end) {
11281 // ... if we are on the same line as a heredoc,
11282 // flush the heredoc and continue parsing after
11283 // heredoc_end.
11284 parser_flush_heredoc_end(parser);
11285 pm_token_buffer_copy(parser, &token_buffer);
11286 LEX(PM_TOKEN_STRING_CONTENT);
11287 } else {
11288 // ... else track the newline.
11289 pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
11290 }
11291
11292 parser->current.end++;
11293 break;
11294 default:
11295 if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
11296 pm_token_buffer_push_byte(&token_buffer, peeked);
11297 parser->current.end++;
11298 } else if (lex_mode->as.list.interpolation) {
11299 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
11300 } else {
11301 pm_token_buffer_push_byte(&token_buffer, '\\');
11302 pm_token_buffer_push_escaped(&token_buffer, parser);
11303 }
11304
11305 break;
11306 }
11307
11308 token_buffer.cursor = parser->current.end;
11309 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11310 continue;
11311 }
11312
11313 // If we hit a #, then we will attempt to lex interpolation.
11314 if (*breakpoint == '#') {
11315 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11316
11317 if (!type) {
11318 // If we haven't returned at this point then we had something
11319 // that looked like an interpolated class or instance variable
11320 // like "#@" but wasn't actually. In this case we'll just skip
11321 // to the next breakpoint.
11322 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11323 continue;
11324 }
11325
11326 if (type == PM_TOKEN_STRING_CONTENT) {
11327 pm_token_buffer_flush(parser, &token_buffer);
11328 }
11329
11330 LEX(type);
11331 }
11332
11333 // If we've hit the incrementor, then we need to skip past it
11334 // and find the next breakpoint.
11335 assert(*breakpoint == lex_mode->as.list.incrementor);
11336 parser->current.end = breakpoint + 1;
11337 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11338 lex_mode->as.list.nesting++;
11339 continue;
11340 }
11341
11342 if (parser->current.end > parser->current.start) {
11343 pm_token_buffer_flush(parser, &token_buffer);
11344 LEX(PM_TOKEN_STRING_CONTENT);
11345 }
11346
11347 // If we were unable to find a breakpoint, then this token hits the
11348 // end of the file.
11349 parser->current.end = parser->end;
11350 pm_token_buffer_flush(parser, &token_buffer);
11351 LEX(PM_TOKEN_STRING_CONTENT);
11352 }
11353 case PM_LEX_REGEXP: {
11354 // First, we'll set to start of this token to be the current end.
11355 if (parser->next_start == NULL) {
11356 parser->current.start = parser->current.end;
11357 } else {
11358 parser->current.start = parser->next_start;
11359 parser->current.end = parser->next_start;
11360 parser->next_start = NULL;
11361 }
11362
11363 // We'll check if we're at the end of the file. If we are, then we
11364 // need to return the EOF token.
11365 if (parser->current.end >= parser->end) {
11366 LEX(PM_TOKEN_EOF);
11367 }
11368
11369 // Get a reference to the current mode.
11370 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11371
11372 // These are the places where we need to split up the content of the
11373 // regular expression. We'll use strpbrk to find the first of these
11374 // characters.
11375 const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
11376 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11377 pm_regexp_token_buffer_t token_buffer = { 0 };
11378
11379 while (breakpoint != NULL) {
11380 uint8_t term = lex_mode->as.regexp.terminator;
11381 bool is_terminator = (*breakpoint == term);
11382
11383 // If the terminator is newline, we need to consider \r\n _also_ a newline
11384 // For example: `%\nfoo\r\n`
11385 // The string should be "foo", not "foo\r"
11386 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
11387 if (term == '\n') {
11388 is_terminator = true;
11389 }
11390
11391 // If the terminator is a CR, but we see a CRLF, we need to
11392 // treat the CRLF as a newline, meaning this is _not_ the
11393 // terminator
11394 if (term == '\r') {
11395 is_terminator = false;
11396 }
11397 }
11398
11399 // If we hit the terminator, we need to determine what kind of
11400 // token to return.
11401 if (is_terminator) {
11402 if (lex_mode->as.regexp.nesting > 0) {
11403 parser->current.end = breakpoint + 1;
11404 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11405 lex_mode->as.regexp.nesting--;
11406 continue;
11407 }
11408
11409 // Here we've hit the terminator. If we have already consumed
11410 // content then we need to return that content as string content
11411 // first.
11412 if (breakpoint > parser->current.start) {
11413 parser->current.end = breakpoint;
11414 pm_regexp_token_buffer_flush(parser, &token_buffer);
11415 LEX(PM_TOKEN_STRING_CONTENT);
11416 }
11417
11418 // Check here if we need to track the newline.
11419 size_t eol_length = match_eol_at(parser, breakpoint);
11420 if (eol_length) {
11421 parser->current.end = breakpoint + eol_length;
11422
11423 // Track the newline if we're not in a heredoc that
11424 // would have already have added the newline to the
11425 // list.
11426 if (parser->heredoc_end == NULL) {
11427 pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
11428 }
11429 } else {
11430 parser->current.end = breakpoint + 1;
11431 }
11432
11433 // Since we've hit the terminator of the regular expression,
11434 // we now need to parse the options.
11435 parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
11436
11437 lex_mode_pop(parser);
11438 lex_state_set(parser, PM_LEX_STATE_END);
11439 LEX(PM_TOKEN_REGEXP_END);
11440 }
11441
11442 // If we've hit the incrementor, then we need to skip past it
11443 // and find the next breakpoint.
11444 if (*breakpoint && *breakpoint == lex_mode->as.regexp.incrementor) {
11445 parser->current.end = breakpoint + 1;
11446 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11447 lex_mode->as.regexp.nesting++;
11448 continue;
11449 }
11450
11451 switch (*breakpoint) {
11452 case '\0':
11453 // If we hit a null byte, skip directly past it.
11454 parser->current.end = breakpoint + 1;
11455 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11456 break;
11457 case '\r':
11458 if (peek_at(parser, breakpoint + 1) != '\n') {
11459 parser->current.end = breakpoint + 1;
11460 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11461 break;
11462 }
11463
11464 breakpoint++;
11465 parser->current.end = breakpoint;
11466 pm_regexp_token_buffer_escape(parser, &token_buffer);
11467 token_buffer.base.cursor = breakpoint;
11468
11470 case '\n':
11471 // If we've hit a newline, then we need to track that in
11472 // the list of newlines.
11473 if (parser->heredoc_end == NULL) {
11474 pm_line_offset_list_append(&parser->line_offsets, U32(breakpoint - parser->start + 1));
11475 parser->current.end = breakpoint + 1;
11476 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11477 break;
11478 }
11479
11480 parser->current.end = breakpoint + 1;
11481 parser_flush_heredoc_end(parser);
11482 pm_regexp_token_buffer_flush(parser, &token_buffer);
11483 LEX(PM_TOKEN_STRING_CONTENT);
11484 case '\\': {
11485 // If we hit escapes, then we need to treat the next
11486 // token literally. In this case we'll skip past the
11487 // next character and find the next breakpoint.
11488 parser->current.end = breakpoint + 1;
11489
11490 // If we've hit the end of the file, then break out of
11491 // the loop by setting the breakpoint to NULL.
11492 if (parser->current.end == parser->end) {
11493 breakpoint = NULL;
11494 break;
11495 }
11496
11497 pm_regexp_token_buffer_escape(parser, &token_buffer);
11498 uint8_t peeked = peek(parser);
11499
11500 switch (peeked) {
11501 case '\r':
11502 parser->current.end++;
11503 if (peek(parser) != '\n') {
11504 if (lex_mode->as.regexp.terminator != '\r') {
11505 pm_token_buffer_push_byte(&token_buffer.base, '\\');
11506 }
11507 pm_regexp_token_buffer_push_byte(&token_buffer, '\r');
11508 pm_token_buffer_push_byte(&token_buffer.base, '\r');
11509 break;
11510 }
11512 case '\n':
11513 if (parser->heredoc_end) {
11514 // ... if we are on the same line as a heredoc,
11515 // flush the heredoc and continue parsing after
11516 // heredoc_end.
11517 parser_flush_heredoc_end(parser);
11518 pm_regexp_token_buffer_copy(parser, &token_buffer);
11519 LEX(PM_TOKEN_STRING_CONTENT);
11520 } else {
11521 // ... else track the newline.
11522 pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
11523 }
11524
11525 parser->current.end++;
11526 break;
11527 case 'c':
11528 case 'C':
11529 case 'M':
11530 case 'u':
11531 case 'x':
11532 escape_read(parser, &token_buffer.regexp_buffer, &token_buffer.base.buffer, PM_ESCAPE_FLAG_REGEXP);
11533 break;
11534 default:
11535 if (lex_mode->as.regexp.terminator == peeked) {
11536 // Some characters when they are used as the
11537 // terminator also receive an escape. They are
11538 // enumerated here.
11539 switch (peeked) {
11540 case '$': case ')': case '*': case '+':
11541 case '.': case '>': case '?': case ']':
11542 case '^': case '|': case '}':
11543 pm_token_buffer_push_byte(&token_buffer.base, '\\');
11544 break;
11545 default:
11546 break;
11547 }
11548
11549 pm_regexp_token_buffer_push_byte(&token_buffer, peeked);
11550 pm_token_buffer_push_byte(&token_buffer.base, peeked);
11551 parser->current.end++;
11552 break;
11553 }
11554
11555 if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer.base, '\\');
11556 pm_regexp_token_buffer_push_escaped(&token_buffer, parser);
11557 break;
11558 }
11559
11560 token_buffer.base.cursor = parser->current.end;
11561 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11562 break;
11563 }
11564 case '#': {
11565 // If we hit a #, then we will attempt to lex
11566 // interpolation.
11567 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11568
11569 if (!type) {
11570 // If we haven't returned at this point then we had
11571 // something that looked like an interpolated class or
11572 // instance variable like "#@" but wasn't actually. In
11573 // this case we'll just skip to the next breakpoint.
11574 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11575 break;
11576 }
11577
11578 if (type == PM_TOKEN_STRING_CONTENT) {
11579 pm_regexp_token_buffer_flush(parser, &token_buffer);
11580 }
11581
11582 LEX(type);
11583 }
11584 default:
11585 assert(false && "unreachable");
11586 break;
11587 }
11588 }
11589
11590 if (parser->current.end > parser->current.start) {
11591 pm_regexp_token_buffer_flush(parser, &token_buffer);
11592 LEX(PM_TOKEN_STRING_CONTENT);
11593 }
11594
11595 // If we were unable to find a breakpoint, then this token hits the
11596 // end of the file.
11597 parser->current.end = parser->end;
11598 pm_regexp_token_buffer_flush(parser, &token_buffer);
11599 LEX(PM_TOKEN_STRING_CONTENT);
11600 }
11601 case PM_LEX_STRING: {
11602 // First, we'll set to start of this token to be the current end.
11603 if (parser->next_start == NULL) {
11604 parser->current.start = parser->current.end;
11605 } else {
11606 parser->current.start = parser->next_start;
11607 parser->current.end = parser->next_start;
11608 parser->next_start = NULL;
11609 }
11610
11611 // We'll check if we're at the end of the file. If we are, then we need to
11612 // return the EOF token.
11613 if (parser->current.end >= parser->end) {
11614 LEX(PM_TOKEN_EOF);
11615 }
11616
11617 // These are the places where we need to split up the content of the
11618 // string. We'll use strpbrk to find the first of these characters.
11619 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11620 const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
11621 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11622
11623 // If we haven't found an escape yet, then this buffer will be
11624 // unallocated since we can refer directly to the source string.
11625 pm_token_buffer_t token_buffer = { 0 };
11626
11627 while (breakpoint != NULL) {
11628 // If we hit the incrementor, then we'll increment then nesting and
11629 // continue lexing.
11630 if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
11631 lex_mode->as.string.nesting++;
11632 parser->current.end = breakpoint + 1;
11633 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11634 continue;
11635 }
11636
11637 uint8_t term = lex_mode->as.string.terminator;
11638 bool is_terminator = (*breakpoint == term);
11639
11640 // If the terminator is newline, we need to consider \r\n _also_ a newline
11641 // For example: `%r\nfoo\r\n`
11642 // The string should be /foo/, not /foo\r/
11643 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
11644 if (term == '\n') {
11645 is_terminator = true;
11646 }
11647
11648 // If the terminator is a CR, but we see a CRLF, we need to
11649 // treat the CRLF as a newline, meaning this is _not_ the
11650 // terminator
11651 if (term == '\r') {
11652 is_terminator = false;
11653 }
11654 }
11655
11656 // Note that we have to check the terminator here first because we could
11657 // potentially be parsing a % string that has a # character as the
11658 // terminator.
11659 if (is_terminator) {
11660 // If this terminator doesn't actually close the string, then we need
11661 // to continue on past it.
11662 if (lex_mode->as.string.nesting > 0) {
11663 parser->current.end = breakpoint + 1;
11664 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11665 lex_mode->as.string.nesting--;
11666 continue;
11667 }
11668
11669 // Here we've hit the terminator. If we have already consumed content
11670 // then we need to return that content as string content first.
11671 if (breakpoint > parser->current.start) {
11672 parser->current.end = breakpoint;
11673 pm_token_buffer_flush(parser, &token_buffer);
11674 LEX(PM_TOKEN_STRING_CONTENT);
11675 }
11676
11677 // Otherwise we need to switch back to the parent lex mode and
11678 // return the end of the string.
11679 size_t eol_length = match_eol_at(parser, breakpoint);
11680 if (eol_length) {
11681 parser->current.end = breakpoint + eol_length;
11682
11683 // Track the newline if we're not in a heredoc that
11684 // would have already have added the newline to the
11685 // list.
11686 if (parser->heredoc_end == NULL) {
11687 pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
11688 }
11689 } else {
11690 parser->current.end = breakpoint + 1;
11691 }
11692
11693 if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
11694 parser->current.end++;
11695 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
11696 lex_mode_pop(parser);
11697 LEX(PM_TOKEN_LABEL_END);
11698 }
11699
11700 // When the delimiter itself is a newline, we won't
11701 // get a chance to flush heredocs in the usual places since
11702 // the newline is already consumed.
11703 if (term == '\n' && parser->heredoc_end) {
11704 parser_flush_heredoc_end(parser);
11705 }
11706
11707 lex_state_set(parser, PM_LEX_STATE_END);
11708 lex_mode_pop(parser);
11709 LEX(PM_TOKEN_STRING_END);
11710 }
11711
11712 switch (*breakpoint) {
11713 case '\0':
11714 // Skip directly past the null character.
11715 parser->current.end = breakpoint + 1;
11716 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11717 break;
11718 case '\r':
11719 if (peek_at(parser, breakpoint + 1) != '\n') {
11720 parser->current.end = breakpoint + 1;
11721 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11722 break;
11723 }
11724
11725 // If we hit a \r\n sequence, then we need to treat it
11726 // as a newline.
11727 breakpoint++;
11728 parser->current.end = breakpoint;
11729 pm_token_buffer_escape(parser, &token_buffer);
11730 token_buffer.cursor = breakpoint;
11731
11733 case '\n':
11734 // When we hit a newline, we need to flush any potential
11735 // heredocs. Note that this has to happen after we check
11736 // for the terminator in case the terminator is a
11737 // newline character.
11738 if (parser->heredoc_end == NULL) {
11739 pm_line_offset_list_append(&parser->line_offsets, U32(breakpoint - parser->start + 1));
11740 parser->current.end = breakpoint + 1;
11741 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11742 break;
11743 }
11744
11745 parser->current.end = breakpoint + 1;
11746 parser_flush_heredoc_end(parser);
11747 pm_token_buffer_flush(parser, &token_buffer);
11748 LEX(PM_TOKEN_STRING_CONTENT);
11749 case '\\': {
11750 // Here we hit escapes.
11751 parser->current.end = breakpoint + 1;
11752
11753 // If we've hit the end of the file, then break out of
11754 // the loop by setting the breakpoint to NULL.
11755 if (parser->current.end == parser->end) {
11756 breakpoint = NULL;
11757 continue;
11758 }
11759
11760 pm_token_buffer_escape(parser, &token_buffer);
11761 uint8_t peeked = peek(parser);
11762
11763 switch (peeked) {
11764 case '\\':
11765 pm_token_buffer_push_byte(&token_buffer, '\\');
11766 parser->current.end++;
11767 break;
11768 case '\r':
11769 parser->current.end++;
11770 if (peek(parser) != '\n') {
11771 if (!lex_mode->as.string.interpolation) {
11772 pm_token_buffer_push_byte(&token_buffer, '\\');
11773 }
11774 pm_token_buffer_push_byte(&token_buffer, '\r');
11775 break;
11776 }
11778 case '\n':
11779 if (!lex_mode->as.string.interpolation) {
11780 pm_token_buffer_push_byte(&token_buffer, '\\');
11781 pm_token_buffer_push_byte(&token_buffer, '\n');
11782 }
11783
11784 if (parser->heredoc_end) {
11785 // ... if we are on the same line as a heredoc,
11786 // flush the heredoc and continue parsing after
11787 // heredoc_end.
11788 parser_flush_heredoc_end(parser);
11789 pm_token_buffer_copy(parser, &token_buffer);
11790 LEX(PM_TOKEN_STRING_CONTENT);
11791 } else {
11792 // ... else track the newline.
11793 pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
11794 }
11795
11796 parser->current.end++;
11797 break;
11798 default:
11799 if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
11800 pm_token_buffer_push_byte(&token_buffer, peeked);
11801 parser->current.end++;
11802 } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
11803 pm_token_buffer_push_byte(&token_buffer, peeked);
11804 parser->current.end++;
11805 } else if (lex_mode->as.string.interpolation) {
11806 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
11807 } else {
11808 pm_token_buffer_push_byte(&token_buffer, '\\');
11809 pm_token_buffer_push_escaped(&token_buffer, parser);
11810 }
11811
11812 break;
11813 }
11814
11815 token_buffer.cursor = parser->current.end;
11816 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11817 break;
11818 }
11819 case '#': {
11820 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11821
11822 if (!type) {
11823 // If we haven't returned at this point then we had something that
11824 // looked like an interpolated class or instance variable like "#@"
11825 // but wasn't actually. In this case we'll just skip to the next
11826 // breakpoint.
11827 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11828 break;
11829 }
11830
11831 if (type == PM_TOKEN_STRING_CONTENT) {
11832 pm_token_buffer_flush(parser, &token_buffer);
11833 }
11834
11835 LEX(type);
11836 }
11837 default:
11838 assert(false && "unreachable");
11839 }
11840 }
11841
11842 if (parser->current.end > parser->current.start) {
11843 pm_token_buffer_flush(parser, &token_buffer);
11844 LEX(PM_TOKEN_STRING_CONTENT);
11845 }
11846
11847 // If we've hit the end of the string, then this is an unterminated
11848 // string. In that case we'll return a string content token.
11849 parser->current.end = parser->end;
11850 pm_token_buffer_flush(parser, &token_buffer);
11851 LEX(PM_TOKEN_STRING_CONTENT);
11852 }
11853 case PM_LEX_HEREDOC: {
11854 // First, we'll set to start of this token.
11855 if (parser->next_start == NULL) {
11856 parser->current.start = parser->current.end;
11857 } else {
11858 parser->current.start = parser->next_start;
11859 parser->current.end = parser->next_start;
11860 parser->heredoc_end = NULL;
11861 parser->next_start = NULL;
11862 }
11863
11864 // Now let's grab the information about the identifier off of the
11865 // current lex mode.
11866 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11867 pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
11868
11869 bool line_continuation = lex_mode->as.heredoc.line_continuation;
11870 lex_mode->as.heredoc.line_continuation = false;
11871
11872 // We'll check if we're at the end of the file. If we are, then we
11873 // will add an error (because we weren't able to find the
11874 // terminator) but still continue parsing so that content after the
11875 // declaration of the heredoc can be parsed.
11876 if (parser->current.end >= parser->end) {
11877 pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
11878 parser->next_start = lex_mode->as.heredoc.next_start;
11879 parser->heredoc_end = parser->current.end;
11880 lex_state_set(parser, PM_LEX_STATE_END);
11881 lex_mode_pop(parser);
11882 LEX(PM_TOKEN_HEREDOC_END);
11883 }
11884
11885 const uint8_t *ident_start = heredoc_lex_mode->ident_start;
11886 size_t ident_length = heredoc_lex_mode->ident_length;
11887
11888 // If we are immediately following a newline and we have hit the
11889 // terminator, then we need to return the ending of the heredoc.
11890 if (current_token_starts_line(parser)) {
11891 const uint8_t *start = parser->current.start;
11892
11893 if (!line_continuation && (start + ident_length <= parser->end)) {
11894 const uint8_t *newline = next_newline(start, parser->end - start);
11895 const uint8_t *ident_end = newline;
11896 const uint8_t *terminator_end = newline;
11897
11898 if (newline == NULL) {
11899 terminator_end = parser->end;
11900 ident_end = parser->end;
11901 } else {
11902 terminator_end++;
11903 if (newline[-1] == '\r') {
11904 ident_end--; // Remove \r
11905 }
11906 }
11907
11908 const uint8_t *terminator_start = ident_end - ident_length;
11909 const uint8_t *cursor = start;
11910
11911 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
11912 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
11913 cursor++;
11914 }
11915 }
11916
11917 if (
11918 (cursor == terminator_start) &&
11919 (memcmp(terminator_start, ident_start, ident_length) == 0)
11920 ) {
11921 if (newline != NULL) {
11922 pm_line_offset_list_append(&parser->line_offsets, U32(newline - parser->start + 1));
11923 }
11924
11925 parser->current.end = terminator_end;
11926 if (*lex_mode->as.heredoc.next_start == '\\') {
11927 parser->next_start = NULL;
11928 } else {
11929 parser->next_start = lex_mode->as.heredoc.next_start;
11930 parser->heredoc_end = parser->current.end;
11931 }
11932
11933 lex_state_set(parser, PM_LEX_STATE_END);
11934 lex_mode_pop(parser);
11935 LEX(PM_TOKEN_HEREDOC_END);
11936 }
11937 }
11938
11939 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
11940 if (
11941 heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
11942 lex_mode->as.heredoc.common_whitespace != NULL &&
11943 (*lex_mode->as.heredoc.common_whitespace > whitespace) &&
11944 peek_at(parser, start) != '\n'
11945 ) {
11946 *lex_mode->as.heredoc.common_whitespace = whitespace;
11947 }
11948 }
11949
11950 // Otherwise we'll be parsing string content. These are the places
11951 // where we need to split up the content of the heredoc. We'll use
11952 // strpbrk to find the first of these characters.
11953 uint8_t breakpoints[] = "\r\n\\#";
11954
11955 pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
11956 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
11957 breakpoints[3] = '\0';
11958 }
11959
11960 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11961 pm_token_buffer_t token_buffer = { 0 };
11962 bool was_line_continuation = false;
11963
11964 while (breakpoint != NULL) {
11965 switch (*breakpoint) {
11966 case '\0':
11967 // Skip directly past the null character.
11968 parser->current.end = breakpoint + 1;
11969 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11970 break;
11971 case '\r':
11972 parser->current.end = breakpoint + 1;
11973
11974 if (peek_at(parser, breakpoint + 1) != '\n') {
11975 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11976 break;
11977 }
11978
11979 // If we hit a \r\n sequence, then we want to replace it
11980 // with a single \n character in the final string.
11981 breakpoint++;
11982 pm_token_buffer_escape(parser, &token_buffer);
11983 token_buffer.cursor = breakpoint;
11984
11986 case '\n': {
11987 if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
11988 parser_flush_heredoc_end(parser);
11989 parser->current.end = breakpoint + 1;
11990 pm_token_buffer_flush(parser, &token_buffer);
11991 LEX(PM_TOKEN_STRING_CONTENT);
11992 }
11993
11994 pm_line_offset_list_append(&parser->line_offsets, U32(breakpoint - parser->start + 1));
11995
11996 // If we have a - or ~ heredoc, then we can match after
11997 // some leading whitespace.
11998 const uint8_t *start = breakpoint + 1;
11999
12000 if (!was_line_continuation && (start + ident_length <= parser->end)) {
12001 // We want to match the terminator starting from the end of the line in case
12002 // there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
12003 const uint8_t *newline = next_newline(start, parser->end - start);
12004
12005 if (newline == NULL) {
12006 newline = parser->end;
12007 } else if (newline[-1] == '\r') {
12008 newline--; // Remove \r
12009 }
12010
12011 // Start of a possible terminator.
12012 const uint8_t *terminator_start = newline - ident_length;
12013
12014 // Cursor to check for the leading whitespace. We skip the
12015 // leading whitespace if we have a - or ~ heredoc.
12016 const uint8_t *cursor = start;
12017
12018 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12019 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12020 cursor++;
12021 }
12022 }
12023
12024 if (
12025 cursor == terminator_start &&
12026 (memcmp(terminator_start, ident_start, ident_length) == 0)
12027 ) {
12028 parser->current.end = breakpoint + 1;
12029 pm_token_buffer_flush(parser, &token_buffer);
12030 LEX(PM_TOKEN_STRING_CONTENT);
12031 }
12032 }
12033
12034 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
12035
12036 // If we have hit a newline that is followed by a valid
12037 // terminator, then we need to return the content of the
12038 // heredoc here as string content. Then, the next time a
12039 // token is lexed, it will match again and return the
12040 // end of the heredoc.
12041 if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
12042 if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
12043 *lex_mode->as.heredoc.common_whitespace = whitespace;
12044 }
12045
12046 parser->current.end = breakpoint + 1;
12047 pm_token_buffer_flush(parser, &token_buffer);
12048 LEX(PM_TOKEN_STRING_CONTENT);
12049 }
12050
12051 // Otherwise we hit a newline and it wasn't followed by
12052 // a terminator, so we can continue parsing.
12053 parser->current.end = breakpoint + 1;
12054 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12055 break;
12056 }
12057 case '\\': {
12058 // If we hit an escape, then we need to skip past
12059 // however many characters the escape takes up. However
12060 // it's important that if \n or \r\n are escaped, we
12061 // stop looping before the newline and not after the
12062 // newline so that we can still potentially find the
12063 // terminator of the heredoc.
12064 parser->current.end = breakpoint + 1;
12065
12066 // If we've hit the end of the file, then break out of
12067 // the loop by setting the breakpoint to NULL.
12068 if (parser->current.end == parser->end) {
12069 breakpoint = NULL;
12070 continue;
12071 }
12072
12073 pm_token_buffer_escape(parser, &token_buffer);
12074 uint8_t peeked = peek(parser);
12075
12076 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12077 switch (peeked) {
12078 case '\r':
12079 parser->current.end++;
12080 if (peek(parser) != '\n') {
12081 pm_token_buffer_push_byte(&token_buffer, '\\');
12082 pm_token_buffer_push_byte(&token_buffer, '\r');
12083 break;
12084 }
12086 case '\n':
12087 pm_token_buffer_push_byte(&token_buffer, '\\');
12088 pm_token_buffer_push_byte(&token_buffer, '\n');
12089 token_buffer.cursor = parser->current.end + 1;
12090 breakpoint = parser->current.end;
12091 continue;
12092 default:
12093 pm_token_buffer_push_byte(&token_buffer, '\\');
12094 pm_token_buffer_push_escaped(&token_buffer, parser);
12095 break;
12096 }
12097 } else {
12098 switch (peeked) {
12099 case '\r':
12100 parser->current.end++;
12101 if (peek(parser) != '\n') {
12102 pm_token_buffer_push_byte(&token_buffer, '\r');
12103 break;
12104 }
12106 case '\n':
12107 // If we are in a tilde here, we should
12108 // break out of the loop and return the
12109 // string content.
12110 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12111 const uint8_t *end = parser->current.end;
12112
12113 if (parser->heredoc_end == NULL) {
12114 pm_line_offset_list_append(&parser->line_offsets, U32(end - parser->start + 1));
12115 }
12116
12117 // Here we want the buffer to only
12118 // include up to the backslash.
12119 parser->current.end = breakpoint;
12120 pm_token_buffer_flush(parser, &token_buffer);
12121
12122 // Now we can advance the end of the
12123 // token past the newline.
12124 parser->current.end = end + 1;
12125 lex_mode->as.heredoc.line_continuation = true;
12126 LEX(PM_TOKEN_STRING_CONTENT);
12127 }
12128
12129 was_line_continuation = true;
12130 token_buffer.cursor = parser->current.end + 1;
12131 breakpoint = parser->current.end;
12132 continue;
12133 default:
12134 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12135 break;
12136 }
12137 }
12138
12139 token_buffer.cursor = parser->current.end;
12140 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12141 break;
12142 }
12143 case '#': {
12144 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12145
12146 if (!type) {
12147 // If we haven't returned at this point then we had
12148 // something that looked like an interpolated class
12149 // or instance variable like "#@" but wasn't
12150 // actually. In this case we'll just skip to the
12151 // next breakpoint.
12152 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12153 break;
12154 }
12155
12156 if (type == PM_TOKEN_STRING_CONTENT) {
12157 pm_token_buffer_flush(parser, &token_buffer);
12158 }
12159
12160 LEX(type);
12161 }
12162 default:
12163 assert(false && "unreachable");
12164 }
12165
12166 was_line_continuation = false;
12167 }
12168
12169 if (parser->current.end > parser->current.start) {
12170 parser->current.end = parser->end;
12171 pm_token_buffer_flush(parser, &token_buffer);
12172 LEX(PM_TOKEN_STRING_CONTENT);
12173 }
12174
12175 // If we've hit the end of the string, then this is an unterminated
12176 // heredoc. In that case we'll return a string content token.
12177 parser->current.end = parser->end;
12178 pm_token_buffer_flush(parser, &token_buffer);
12179 LEX(PM_TOKEN_STRING_CONTENT);
12180 }
12181 }
12182
12183 assert(false && "unreachable");
12184}
12185
12186#undef LEX
12187
12188/******************************************************************************/
12189/* Parse functions */
12190/******************************************************************************/
12191
12200typedef enum {
12201 PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
12202 PM_BINDING_POWER_STATEMENT = 2,
12203 PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
12204 PM_BINDING_POWER_MODIFIER = 6, // if unless until while
12205 PM_BINDING_POWER_COMPOSITION = 8, // and or
12206 PM_BINDING_POWER_NOT = 10, // not
12207 PM_BINDING_POWER_MATCH = 12, // => in
12208 PM_BINDING_POWER_DEFINED = 14, // defined?
12209 PM_BINDING_POWER_MULTI_ASSIGNMENT = 16, // =
12210 PM_BINDING_POWER_ASSIGNMENT = 18, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
12211 PM_BINDING_POWER_TERNARY = 20, // ?:
12212 PM_BINDING_POWER_RANGE = 22, // .. ...
12213 PM_BINDING_POWER_LOGICAL_OR = 24, // ||
12214 PM_BINDING_POWER_LOGICAL_AND = 26, // &&
12215 PM_BINDING_POWER_EQUALITY = 28, // <=> == === != =~ !~
12216 PM_BINDING_POWER_COMPARISON = 30, // > >= < <=
12217 PM_BINDING_POWER_BITWISE_OR = 32, // | ^
12218 PM_BINDING_POWER_BITWISE_AND = 34, // &
12219 PM_BINDING_POWER_SHIFT = 36, // << >>
12220 PM_BINDING_POWER_TERM = 38, // + -
12221 PM_BINDING_POWER_FACTOR = 40, // * / %
12222 PM_BINDING_POWER_UMINUS = 42, // -@
12223 PM_BINDING_POWER_EXPONENT = 44, // **
12224 PM_BINDING_POWER_UNARY = 46, // ! ~ +@
12225 PM_BINDING_POWER_INDEX = 48, // [] []=
12226 PM_BINDING_POWER_CALL = 50, // :: .
12227 PM_BINDING_POWER_MAX = 52
12228} pm_binding_power_t;
12229
12234typedef struct {
12236 pm_binding_power_t left;
12237
12239 pm_binding_power_t right;
12240
12243
12250
12251#define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
12252#define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
12253#define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
12254#define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
12255#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
12256
12257pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
12258 // rescue
12259 [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = { PM_BINDING_POWER_MODIFIER_RESCUE, PM_BINDING_POWER_COMPOSITION, true, false },
12260
12261 // if unless until while
12262 [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12263 [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12264 [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12265 [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12266
12267 // and or
12268 [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12269 [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12270
12271 // => in
12272 [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12273 [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12274
12275 // &&= &= ^= = >>= <<= -= %= |= ||= += /= *= **=
12276 [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12277 [PM_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12278 [PM_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
12279 [PM_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT,
12280 [PM_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT,
12281 [PM_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12282 [PM_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12283 [PM_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT,
12284 [PM_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12285 [PM_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12286 [PM_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12287 [PM_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT,
12288 [PM_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12289 [PM_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12290
12291 // ?:
12292 [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
12293
12294 // .. ...
12295 [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12296 [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12297 [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12298 [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12299
12300 // ||
12301 [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
12302
12303 // &&
12304 [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
12305
12306 // != !~ == === =~ <=>
12307 [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12308 [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12309 [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12310 [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12311 [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12312 [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12313
12314 // > >= < <=
12315 [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12316 [PM_TOKEN_GREATER_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12317 [PM_TOKEN_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12318 [PM_TOKEN_LESS_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12319
12320 // ^ |
12321 [PM_TOKEN_CARET] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12322 [PM_TOKEN_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12323
12324 // &
12325 [PM_TOKEN_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_AND),
12326
12327 // >> <<
12328 [PM_TOKEN_GREATER_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12329 [PM_TOKEN_LESS_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12330
12331 // - +
12332 [PM_TOKEN_MINUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12333 [PM_TOKEN_PLUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12334
12335 // % / *
12336 [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12337 [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12338 [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12339 [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
12340
12341 // -@
12342 [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
12343 [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false, false },
12344
12345 // **
12346 [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
12347 [PM_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12348
12349 // ! ~ +@
12350 [PM_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12351 [PM_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12352 [PM_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12353
12354 // [
12355 [PM_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_INDEX),
12356
12357 // :: . &.
12358 [PM_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
12359 [PM_TOKEN_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
12360 [PM_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL)
12361};
12362
12363#undef BINDING_POWER_ASSIGNMENT
12364#undef LEFT_ASSOCIATIVE
12365#undef RIGHT_ASSOCIATIVE
12366#undef RIGHT_ASSOCIATIVE_UNARY
12367
12371static inline bool
12372match1(const pm_parser_t *parser, pm_token_type_t type) {
12373 return parser->current.type == type;
12374}
12375
12379static inline bool
12380match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
12381 return match1(parser, type1) || match1(parser, type2);
12382}
12383
12387static inline bool
12388match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
12389 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
12390}
12391
12395static inline bool
12396match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
12397 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
12398}
12399
12403static inline bool
12404match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
12405 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
12406}
12407
12411static inline bool
12412match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
12413 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
12414}
12415
12422static bool
12423accept1(pm_parser_t *parser, pm_token_type_t type) {
12424 if (match1(parser, type)) {
12425 parser_lex(parser);
12426 return true;
12427 }
12428 return false;
12429}
12430
12435static inline bool
12436accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
12437 if (match2(parser, type1, type2)) {
12438 parser_lex(parser);
12439 return true;
12440 }
12441 return false;
12442}
12443
12455static void
12456expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
12457 if (accept1(parser, type)) return;
12458
12459 const uint8_t *location = parser->previous.end;
12460 pm_parser_err(parser, U32(location - parser->start), 0, diag_id);
12461
12462 parser->previous.start = location;
12463 parser->previous.type = 0;
12464}
12465
12470static void
12471expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_diagnostic_id_t diag_id) {
12472 if (accept2(parser, type1, type2)) return;
12473
12474 const uint8_t *location = parser->previous.end;
12475 pm_parser_err(parser, U32(location - parser->start), 0, diag_id);
12476
12477 parser->previous.start = location;
12478 parser->previous.type = 0;
12479}
12480
12485static void
12486expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
12487 if (match1(parser, PM_TOKEN_HEREDOC_END)) {
12488 parser_lex(parser);
12489 } else {
12490 pm_parser_err_heredoc_term(parser, ident_start, ident_length);
12491 parser->previous.start = parser->previous.end;
12492 parser->previous.type = 0;
12493 }
12494}
12495
12502static void
12503expect1_opening(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id, const pm_token_t *opening) {
12504 if (accept1(parser, type)) return;
12505
12506 const uint8_t *start = opening->start;
12507 pm_parser_err(parser, U32(start - parser->start), U32(opening->end - start), diag_id);
12508
12509 parser->previous.start = parser->previous.end;
12510 parser->previous.type = 0;
12511}
12512
12513static pm_node_t *
12514parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth);
12515
12520static pm_node_t *
12521parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
12522 pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
12523 pm_assert_value_expression(parser, node);
12524 return node;
12525}
12526
12545static inline bool
12546token_begins_expression_p(pm_token_type_t type) {
12547 switch (type) {
12548 case PM_TOKEN_EQUAL_GREATER:
12549 case PM_TOKEN_KEYWORD_IN:
12550 // We need to special case this because it is a binary operator that
12551 // should not be marked as beginning an expression.
12552 return false;
12553 case PM_TOKEN_BRACE_RIGHT:
12554 case PM_TOKEN_BRACKET_RIGHT:
12555 case PM_TOKEN_COLON:
12556 case PM_TOKEN_COMMA:
12557 case PM_TOKEN_EMBEXPR_END:
12558 case PM_TOKEN_EOF:
12559 case PM_TOKEN_LAMBDA_BEGIN:
12560 case PM_TOKEN_KEYWORD_DO:
12561 case PM_TOKEN_KEYWORD_DO_LOOP:
12562 case PM_TOKEN_KEYWORD_END:
12563 case PM_TOKEN_KEYWORD_ELSE:
12564 case PM_TOKEN_KEYWORD_ELSIF:
12565 case PM_TOKEN_KEYWORD_ENSURE:
12566 case PM_TOKEN_KEYWORD_THEN:
12567 case PM_TOKEN_KEYWORD_RESCUE:
12568 case PM_TOKEN_KEYWORD_WHEN:
12569 case PM_TOKEN_NEWLINE:
12570 case PM_TOKEN_PARENTHESIS_RIGHT:
12571 case PM_TOKEN_SEMICOLON:
12572 // The reason we need this short-circuit is because we're using the
12573 // binding powers table to tell us if the subsequent token could
12574 // potentially be the start of an expression. If there _is_ a binding
12575 // power for one of these tokens, then we should remove it from this list
12576 // and let it be handled by the default case below.
12577 assert(pm_binding_powers[type].left == PM_BINDING_POWER_UNSET);
12578 return false;
12579 case PM_TOKEN_UAMPERSAND:
12580 // This is a special case because this unary operator cannot appear
12581 // as a general operator, it only appears in certain circumstances.
12582 return false;
12583 case PM_TOKEN_UCOLON_COLON:
12584 case PM_TOKEN_UMINUS:
12585 case PM_TOKEN_UMINUS_NUM:
12586 case PM_TOKEN_UPLUS:
12587 case PM_TOKEN_BANG:
12588 case PM_TOKEN_TILDE:
12589 case PM_TOKEN_UDOT_DOT:
12590 case PM_TOKEN_UDOT_DOT_DOT:
12591 // These unary tokens actually do have binding power associated with them
12592 // so that we can correctly place them into the precedence order. But we
12593 // want them to be marked as beginning an expression, so we need to
12594 // special case them here.
12595 return true;
12596 default:
12597 return pm_binding_powers[type].left == PM_BINDING_POWER_UNSET;
12598 }
12599}
12600
12605static pm_node_t *
12606parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
12607 if (accept1(parser, PM_TOKEN_USTAR)) {
12608 pm_token_t operator = parser->previous;
12609 pm_node_t *expression = parse_value_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
12610 return UP(pm_splat_node_create(parser, &operator, expression));
12611 }
12612
12613 return parse_value_expression(parser, binding_power, accepts_command_call, false, diag_id, depth);
12614}
12615
12616static bool
12617pm_node_unreference_each(const pm_node_t *node, void *data) {
12618 switch (PM_NODE_TYPE(node)) {
12619 /* When we are about to destroy a set of nodes that could potentially
12620 * contain block exits for the current scope, we need to check if they
12621 * are contained in the list of block exits and remove them if they are.
12622 */
12623 case PM_BREAK_NODE:
12624 case PM_NEXT_NODE:
12625 case PM_REDO_NODE: {
12626 pm_parser_t *parser = (pm_parser_t *) data;
12627 size_t index = 0;
12628
12629 while (index < parser->current_block_exits->size) {
12630 pm_node_t *block_exit = parser->current_block_exits->nodes[index];
12631
12632 if (block_exit == node) {
12633 if (index + 1 < parser->current_block_exits->size) {
12634 memmove(
12635 &parser->current_block_exits->nodes[index],
12636 &parser->current_block_exits->nodes[index + 1],
12637 (parser->current_block_exits->size - index - 1) * sizeof(pm_node_t *)
12638 );
12639 }
12640 parser->current_block_exits->size--;
12641
12642 /* Note returning true here because these nodes could have
12643 * arguments that are themselves block exits. */
12644 return true;
12645 }
12646
12647 index++;
12648 }
12649
12650 return true;
12651 }
12652 /* When an implicit local variable is written to or targeted, it becomes
12653 * a regular, named local variable. This branch removes it from the list
12654 * of implicit parameters when that happens. */
12655 case PM_LOCAL_VARIABLE_READ_NODE:
12656 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12657 pm_parser_t *parser = (pm_parser_t *) data;
12658 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
12659
12660 for (size_t index = 0; index < implicit_parameters->size; index++) {
12661 if (implicit_parameters->nodes[index] == node) {
12662 /* If the node is not the last one in the list, we need to
12663 * shift the remaining nodes down to fill the gap. This is
12664 * extremely unlikely to happen. */
12665 if (index != implicit_parameters->size - 1) {
12666 memmove(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
12667 }
12668
12669 implicit_parameters->size--;
12670 break;
12671 }
12672 }
12673
12674 return false;
12675 }
12676 default:
12677 return true;
12678 }
12679}
12680
12686static void
12687pm_node_unreference(pm_parser_t *parser, const pm_node_t *node) {
12688 pm_visit_node(node, pm_node_unreference_each, parser);
12689}
12690
12695static void
12696parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
12697 // The method name needs to change. If we previously had
12698 // foo, we now need foo=. In this case we'll allocate a new
12699 // owned string, copy the previous method name in, and
12700 // append an =.
12701 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
12702 size_t length = constant->length;
12703 uint8_t *name = xcalloc(length + 1, sizeof(uint8_t));
12704 if (name == NULL) return;
12705
12706 memcpy(name, constant->start, length);
12707 name[length] = '=';
12708
12709 // Now switch the name to the new string.
12710 // This silences clang analyzer warning about leak of memory pointed by `name`.
12711 // NOLINTNEXTLINE(clang-analyzer-*)
12712 *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
12713}
12714
12721static pm_node_t *
12722parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
12723 switch (PM_NODE_TYPE(target)) {
12724 case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
12725 case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
12726 case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
12727 case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
12728 case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
12729 case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
12730 case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
12731 default: break;
12732 }
12733
12734 pm_constant_id_t name = pm_parser_constant_id_raw(parser, parser->start + PM_NODE_START(target), parser->start + PM_NODE_END(target));
12735 pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
12736
12737 pm_node_destroy(parser, target);
12738 return UP(result);
12739}
12740
12749static pm_node_t *
12750parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
12751 switch (PM_NODE_TYPE(target)) {
12752 case PM_MISSING_NODE:
12753 return target;
12754 case PM_SOURCE_ENCODING_NODE:
12755 case PM_FALSE_NODE:
12756 case PM_SOURCE_FILE_NODE:
12757 case PM_SOURCE_LINE_NODE:
12758 case PM_NIL_NODE:
12759 case PM_SELF_NODE:
12760 case PM_TRUE_NODE: {
12761 // In these special cases, we have specific error messages and we
12762 // will replace them with local variable writes.
12763 return parse_unwriteable_target(parser, target);
12764 }
12765 case PM_CLASS_VARIABLE_READ_NODE:
12767 target->type = PM_CLASS_VARIABLE_TARGET_NODE;
12768 return target;
12769 case PM_CONSTANT_PATH_NODE:
12770 if (context_def_p(parser)) {
12771 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
12772 }
12773
12775 target->type = PM_CONSTANT_PATH_TARGET_NODE;
12776
12777 return target;
12778 case PM_CONSTANT_READ_NODE:
12779 if (context_def_p(parser)) {
12780 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
12781 }
12782
12783 assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
12784 target->type = PM_CONSTANT_TARGET_NODE;
12785
12786 return target;
12787 case PM_BACK_REFERENCE_READ_NODE:
12788 case PM_NUMBERED_REFERENCE_READ_NODE:
12789 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
12790 return target;
12791 case PM_GLOBAL_VARIABLE_READ_NODE:
12793 target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
12794 return target;
12795 case PM_LOCAL_VARIABLE_READ_NODE: {
12796 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(target), PM_NODE_LENGTH(target))) {
12797 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(target), PM_NODE_LENGTH(target), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(target));
12798 pm_node_unreference(parser, target);
12799 }
12800
12801 const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
12802 uint32_t name = cast->name;
12803 uint32_t depth = cast->depth;
12804 pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
12805
12807 target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
12808
12809 return target;
12810 }
12811 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12812 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
12813 pm_node_t *node = UP(pm_local_variable_target_node_create(parser, &target->location, name, 0));
12814
12815 pm_node_unreference(parser, target);
12816 pm_node_destroy(parser, target);
12817
12818 return node;
12819 }
12820 case PM_INSTANCE_VARIABLE_READ_NODE:
12822 target->type = PM_INSTANCE_VARIABLE_TARGET_NODE;
12823 return target;
12824 case PM_MULTI_TARGET_NODE:
12825 if (splat_parent) {
12826 // Multi target is not accepted in all positions. If this is one
12827 // of them, then we need to add an error.
12828 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
12829 }
12830
12831 return target;
12832 case PM_SPLAT_NODE: {
12833 pm_splat_node_t *splat = (pm_splat_node_t *) target;
12834
12835 if (splat->expression != NULL) {
12836 splat->expression = parse_target(parser, splat->expression, multiple, true);
12837 }
12838
12839 return UP(splat);
12840 }
12841 case PM_CALL_NODE: {
12842 pm_call_node_t *call = (pm_call_node_t *) target;
12843
12844 // If we have no arguments to the call node and we need this to be a
12845 // target then this is either a method call or a local variable
12846 // write.
12847 if (
12848 (call->message_loc.length > 0) &&
12849 (parser->start[call->message_loc.start + call->message_loc.length - 1] != '!') &&
12850 (parser->start[call->message_loc.start + call->message_loc.length - 1] != '?') &&
12851 (call->opening_loc.length == 0) &&
12852 (call->arguments == NULL) &&
12853 (call->block == NULL)
12854 ) {
12855 if (call->receiver == NULL) {
12856 // When we get here, we have a local variable write, because it
12857 // was previously marked as a method call but now we have an =.
12858 // This looks like:
12859 //
12860 // foo = 1
12861 //
12862 // When it was parsed in the prefix position, foo was seen as a
12863 // method call with no receiver and no arguments. Now we have an
12864 // =, so we know it's a local variable write.
12865 pm_location_t message_loc = call->message_loc;
12866 pm_constant_id_t name = pm_parser_local_add_location(parser, &message_loc, 0);
12867 pm_node_destroy(parser, target);
12868
12869 return UP(pm_local_variable_target_node_create(parser, &message_loc, name, 0));
12870 }
12871
12872 if (peek_at(parser, parser->start + call->message_loc.start) == '_' || parser->encoding->alnum_char(parser->start + call->message_loc.start, (ptrdiff_t) call->message_loc.length)) {
12873 if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
12874 pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
12875 }
12876
12877 parse_write_name(parser, &call->name);
12878 return UP(pm_call_target_node_create(parser, call));
12879 }
12880 }
12881
12882 // If there is no call operator and the message is "[]" then this is
12883 // an aref expression, and we can transform it into an aset
12884 // expression.
12885 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
12886 return UP(pm_index_target_node_create(parser, call));
12887 }
12888 }
12890 default:
12891 // In this case we have a node that we don't know how to convert
12892 // into a target. We need to treat it as an error. For now, we'll
12893 // mark it as an error and just skip right past it.
12894 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
12895 return target;
12896 }
12897}
12898
12903static pm_node_t *
12904parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
12905 pm_node_t *result = parse_target(parser, target, multiple, false);
12906
12907 // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
12908 // parens after the targets.
12909 if (
12910 !match1(parser, PM_TOKEN_EQUAL) &&
12911 !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
12912 !(context_p(parser, PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
12913 ) {
12914 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
12915 }
12916
12917 return result;
12918}
12919
12924static pm_node_t *
12925parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
12926 pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
12927
12928 if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
12929 return UP(pm_shareable_constant_node_create(parser, write, shareable_constant));
12930 }
12931
12932 return write;
12933}
12934
12938static pm_node_t *
12939parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
12940 switch (PM_NODE_TYPE(target)) {
12941 case PM_MISSING_NODE:
12942 pm_node_destroy(parser, value);
12943 return target;
12944 case PM_CLASS_VARIABLE_READ_NODE: {
12945 pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
12946 pm_node_destroy(parser, target);
12947 return UP(node);
12948 }
12949 case PM_CONSTANT_PATH_NODE: {
12950 pm_node_t *node = UP(pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value));
12951
12952 if (context_def_p(parser)) {
12953 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
12954 }
12955
12956 return parse_shareable_constant_write(parser, node);
12957 }
12958 case PM_CONSTANT_READ_NODE: {
12959 pm_node_t *node = UP(pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value));
12960
12961 if (context_def_p(parser)) {
12962 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
12963 }
12964
12965 pm_node_destroy(parser, target);
12966 return parse_shareable_constant_write(parser, node);
12967 }
12968 case PM_BACK_REFERENCE_READ_NODE:
12969 case PM_NUMBERED_REFERENCE_READ_NODE:
12970 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
12972 case PM_GLOBAL_VARIABLE_READ_NODE: {
12973 pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
12974 pm_node_destroy(parser, target);
12975 return UP(node);
12976 }
12977 case PM_LOCAL_VARIABLE_READ_NODE: {
12979
12980 pm_location_t location = target->location;
12981 pm_constant_id_t name = local_read->name;
12982 uint32_t depth = local_read->depth;
12983 pm_scope_t *scope = pm_parser_scope_find(parser, depth);
12984
12985 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(target), PM_NODE_LENGTH(target))) {
12986 pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
12987 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(target), PM_NODE_LENGTH(target), diag_id, parser->start + PM_NODE_START(target));
12988 pm_node_unreference(parser, target);
12989 }
12990
12991 pm_locals_unread(&scope->locals, name);
12992 pm_node_destroy(parser, target);
12993
12994 return UP(pm_local_variable_write_node_create(parser, name, depth, value, &location, operator));
12995 }
12996 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12997 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
12998 pm_node_t *node = UP(pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator));
12999
13000 pm_node_unreference(parser, target);
13001 pm_node_destroy(parser, target);
13002
13003 return node;
13004 }
13005 case PM_INSTANCE_VARIABLE_READ_NODE: {
13006 pm_node_t *write_node = UP(pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value));
13007 pm_node_destroy(parser, target);
13008 return write_node;
13009 }
13010 case PM_MULTI_TARGET_NODE:
13011 return UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value));
13012 case PM_SPLAT_NODE: {
13013 pm_splat_node_t *splat = (pm_splat_node_t *) target;
13014
13015 if (splat->expression != NULL) {
13016 splat->expression = parse_write(parser, splat->expression, operator, value);
13017 }
13018
13019 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
13020 pm_multi_target_node_targets_append(parser, multi_target, UP(splat));
13021
13022 return UP(pm_multi_write_node_create(parser, multi_target, operator, value));
13023 }
13024 case PM_CALL_NODE: {
13025 pm_call_node_t *call = (pm_call_node_t *) target;
13026
13027 // If we have no arguments to the call node and we need this to be a
13028 // target then this is either a method call or a local variable
13029 // write.
13030 if (
13031 (call->message_loc.length > 0) &&
13032 (parser->start[call->message_loc.start + call->message_loc.length - 1] != '!') &&
13033 (parser->start[call->message_loc.start + call->message_loc.length - 1] != '?') &&
13034 (call->opening_loc.length == 0) &&
13035 (call->arguments == NULL) &&
13036 (call->block == NULL)
13037 ) {
13038 if (call->receiver == NULL) {
13039 // When we get here, we have a local variable write, because it
13040 // was previously marked as a method call but now we have an =.
13041 // This looks like:
13042 //
13043 // foo = 1
13044 //
13045 // When it was parsed in the prefix position, foo was seen as a
13046 // method call with no receiver and no arguments. Now we have an
13047 // =, so we know it's a local variable write.
13048 pm_location_t message_loc = call->message_loc;
13049
13050 pm_refute_numbered_parameter(parser, message_loc.start, message_loc.length);
13051 pm_parser_local_add_location(parser, &message_loc, 0);
13052 pm_node_destroy(parser, target);
13053
13054 pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, parser->start + PM_LOCATION_START(&message_loc), parser->start + PM_LOCATION_END(&message_loc));
13055 target = UP(pm_local_variable_write_node_create(parser, constant_id, 0, value, &message_loc, operator));
13056
13057 return target;
13058 }
13059
13060 if (char_is_identifier_start(parser, parser->start + call->message_loc.start, (ptrdiff_t) call->message_loc.length)) {
13061 // When we get here, we have a method call, because it was
13062 // previously marked as a method call but now we have an =. This
13063 // looks like:
13064 //
13065 // foo.bar = 1
13066 //
13067 // When it was parsed in the prefix position, foo.bar was seen as a
13068 // method call with no arguments. Now we have an =, so we know it's
13069 // a method call with an argument. In this case we will create the
13070 // arguments node, parse the argument, and add it to the list.
13071 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
13072 call->arguments = arguments;
13073
13074 pm_arguments_node_arguments_append(arguments, value);
13075 PM_NODE_LENGTH_SET_NODE(call, arguments);
13076 call->equal_loc = TOK2LOC(parser, operator);
13077
13078 parse_write_name(parser, &call->name);
13079 pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13080
13081 return UP(call);
13082 }
13083 }
13084
13085 // If there is no call operator and the message is "[]" then this is
13086 // an aref expression, and we can transform it into an aset
13087 // expression.
13088 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13089 if (call->arguments == NULL) {
13090 call->arguments = pm_arguments_node_create(parser);
13091 }
13092
13093 pm_arguments_node_arguments_append(call->arguments, value);
13094 PM_NODE_LENGTH_SET_NODE(target, value);
13095
13096 // Replace the name with "[]=".
13097 call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
13098 call->equal_loc = TOK2LOC(parser, operator);
13099
13100 // Ensure that the arguments for []= don't contain keywords
13101 pm_index_arguments_check(parser, call->arguments, call->block);
13102 pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13103
13104 return target;
13105 }
13106
13107 // If there are arguments on the call node, then it can't be a
13108 // method call ending with = or a local variable write, so it must
13109 // be a syntax error. In this case we'll fall through to our default
13110 // handling. We need to free the value that we parsed because there
13111 // is no way for us to attach it to the tree at this point.
13112 //
13113 // Since it is possible for the value to contain an implicit
13114 // parameter somewhere in its subtree, we need to walk it and remove
13115 // any implicit parameters from the list of implicit parameters for
13116 // the current scope.
13117 pm_node_unreference(parser, value);
13118 pm_node_destroy(parser, value);
13119 }
13121 default:
13122 // In this case we have a node that we don't know how to convert into a
13123 // target. We need to treat it as an error. For now, we'll mark it as an
13124 // error and just skip right past it.
13125 pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
13126 return target;
13127 }
13128}
13129
13136static pm_node_t *
13137parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t *equals, pm_node_t *value) {
13138 switch (PM_NODE_TYPE(target)) {
13139 case PM_SOURCE_ENCODING_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13140 case PM_FALSE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13141 case PM_SOURCE_FILE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13142 case PM_SOURCE_LINE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13143 case PM_NIL_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13144 case PM_SELF_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13145 case PM_TRUE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13146 default: break;
13147 }
13148
13149 pm_constant_id_t name = pm_parser_local_add_location(parser, &target->location, 1);
13150 pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
13151
13152 pm_node_destroy(parser, target);
13153 return UP(result);
13154}
13155
13166static pm_node_t *
13167parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13168 bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13169
13170 pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13171 pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
13172
13173 while (accept1(parser, PM_TOKEN_COMMA)) {
13174 if (accept1(parser, PM_TOKEN_USTAR)) {
13175 // Here we have a splat operator. It can have a name or be
13176 // anonymous. It can be the final target or be in the middle if
13177 // there haven't been any others yet.
13178 if (has_rest) {
13179 pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
13180 }
13181
13182 pm_token_t star_operator = parser->previous;
13183 pm_node_t *name = NULL;
13184
13185 if (token_begins_expression_p(parser->current.type)) {
13186 name = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13187 name = parse_target(parser, name, true, true);
13188 }
13189
13190 pm_node_t *splat = UP(pm_splat_node_create(parser, &star_operator, name));
13191 pm_multi_target_node_targets_append(parser, result, splat);
13192 has_rest = true;
13193 } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13194 context_push(parser, PM_CONTEXT_MULTI_TARGET);
13195 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13196 target = parse_target(parser, target, true, false);
13197
13198 pm_multi_target_node_targets_append(parser, result, target);
13199 context_pop(parser);
13200 } else if (token_begins_expression_p(parser->current.type)) {
13201 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13202 target = parse_target(parser, target, true, false);
13203
13204 pm_multi_target_node_targets_append(parser, result, target);
13205 } else if (!match1(parser, PM_TOKEN_EOF)) {
13206 // If we get here, then we have a trailing , in a multi target node.
13207 // We'll add an implicit rest node to represent this.
13208 pm_node_t *rest = UP(pm_implicit_rest_node_create(parser, &parser->previous));
13209 pm_multi_target_node_targets_append(parser, result, rest);
13210 break;
13211 }
13212 }
13213
13214 return UP(result);
13215}
13216
13221static pm_node_t *
13222parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13223 pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
13224 accept1(parser, PM_TOKEN_NEWLINE);
13225
13226 // Ensure that we have either an = or a ) after the targets.
13227 if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
13228 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13229 }
13230
13231 return result;
13232}
13233
13237static pm_statements_node_t *
13238parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
13239 // First, skip past any optional terminators that might be at the beginning
13240 // of the statements.
13241 while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13242
13243 // If we have a terminator, then we can just return NULL.
13244 if (context_terminator(context, &parser->current)) return NULL;
13245
13246 pm_statements_node_t *statements = pm_statements_node_create(parser);
13247
13248 // At this point we know we have at least one statement, and that it
13249 // immediately follows the current token.
13250 context_push(parser, context);
13251
13252 while (true) {
13253 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
13254 pm_statements_node_body_append(parser, statements, node, true);
13255
13256 // If we're recovering from a syntax error, then we need to stop parsing
13257 // the statements now.
13258 if (parser->recovering) {
13259 // If this is the level of context where the recovery has happened,
13260 // then we can mark the parser as done recovering.
13261 if (context_terminator(context, &parser->current)) parser->recovering = false;
13262 break;
13263 }
13264
13265 // If we have a terminator, then we will parse all consecutive
13266 // terminators and then continue parsing the statements list.
13267 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13268 // If we have a terminator, then we will continue parsing the
13269 // statements list.
13270 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13271 if (context_terminator(context, &parser->current)) break;
13272
13273 // Now we can continue parsing the list of statements.
13274 continue;
13275 }
13276
13277 // At this point we have a list of statements that are not terminated by
13278 // a newline or semicolon. At this point we need to check if we're at
13279 // the end of the statements list. If we are, then we should break out
13280 // of the loop.
13281 if (context_terminator(context, &parser->current)) break;
13282
13283 // At this point, we have a syntax error, because the statement was not
13284 // terminated by a newline or semicolon, and we're not at the end of the
13285 // statements list. Ideally we should scan forward to determine if we
13286 // should insert a missing terminator or break out of parsing the
13287 // statements list at this point.
13288 //
13289 // We don't have that yet, so instead we'll do a more naive approach. If
13290 // we were unable to parse an expression, then we will skip past this
13291 // token and continue parsing the statements list. Otherwise we'll add
13292 // an error and continue parsing the statements list.
13293 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
13294 parser_lex(parser);
13295
13296 // If we are at the end of the file, then we need to stop parsing
13297 // the statements entirely at this point. Mark the parser as
13298 // recovering, as we know that EOF closes the top-level context, and
13299 // then break out of the loop.
13300 if (match1(parser, PM_TOKEN_EOF)) {
13301 parser->recovering = true;
13302 break;
13303 }
13304
13305 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13306 if (context_terminator(context, &parser->current)) break;
13307 } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
13308 // This is an inlined version of accept1 because the error that we
13309 // want to add has varargs. If this happens again, we should
13310 // probably extract a helper function.
13311 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
13312 parser->previous.start = parser->previous.end;
13313 parser->previous.type = 0;
13314 }
13315 }
13316
13317 context_pop(parser);
13318 bool last_value = true;
13319 switch (context) {
13322 last_value = false;
13323 break;
13324 default:
13325 break;
13326 }
13327 pm_void_statements_check(parser, statements, last_value);
13328
13329 return statements;
13330}
13331
13336static void
13337pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13338 const pm_node_t *duplicated = pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, literals, node, true);
13339
13340 if (duplicated != NULL) {
13341 pm_buffer_t buffer = { 0 };
13342 pm_static_literal_inspect(&buffer, &parser->line_offsets, parser->start, parser->start_line, parser->encoding->name, duplicated);
13343
13344 pm_diagnostic_list_append_format(
13345 &parser->warning_list,
13346 duplicated->location.start,
13347 duplicated->location.length,
13348 PM_WARN_DUPLICATED_HASH_KEY,
13349 (int) pm_buffer_length(&buffer),
13350 pm_buffer_value(&buffer),
13351 pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(node), parser->start_line).line
13352 );
13353
13354 pm_buffer_free(&buffer);
13355 }
13356}
13357
13362static void
13363pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13364 pm_node_t *previous;
13365
13366 if ((previous = pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, literals, node, false)) != NULL) {
13367 pm_diagnostic_list_append_format(
13368 &parser->warning_list,
13369 PM_NODE_START(node),
13370 PM_NODE_LENGTH(node),
13371 PM_WARN_DUPLICATED_WHEN_CLAUSE,
13372 pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(node), parser->start_line).line,
13373 pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(previous), parser->start_line).line
13374 );
13375 }
13376}
13377
13381static bool
13382parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node, uint16_t depth) {
13383 assert(PM_NODE_TYPE_P(node, PM_HASH_NODE) || PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE));
13384 bool contains_keyword_splat = false;
13385
13386 while (true) {
13387 pm_node_t *element;
13388
13389 switch (parser->current.type) {
13390 case PM_TOKEN_USTAR_STAR: {
13391 parser_lex(parser);
13392 pm_token_t operator = parser->previous;
13393 pm_node_t *value = NULL;
13394
13395 if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
13396 // If we're about to parse a nested hash that is being
13397 // pushed into this hash directly with **, then we want the
13398 // inner hash to share the static literals with the outer
13399 // hash.
13400 parser->current_hash_keys = literals;
13401 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
13402 } else if (token_begins_expression_p(parser->current.type)) {
13403 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
13404 } else {
13405 pm_parser_scope_forwarding_keywords_check(parser, &operator);
13406 }
13407
13408 element = UP(pm_assoc_splat_node_create(parser, value, &operator));
13409 contains_keyword_splat = true;
13410 break;
13411 }
13412 case PM_TOKEN_LABEL: {
13413 pm_token_t label = parser->current;
13414 parser_lex(parser);
13415
13416 pm_node_t *key = UP(pm_symbol_node_label_create(parser, &label));
13417 pm_hash_key_static_literals_add(parser, literals, key);
13418
13419 pm_node_t *value = NULL;
13420
13421 if (token_begins_expression_p(parser->current.type)) {
13422 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
13423 } else {
13424 if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
13425 pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
13426 value = UP(pm_constant_read_node_create(parser, &constant));
13427 } else {
13428 int depth = -1;
13429 pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
13430
13431 if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
13432 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
13433 } else {
13434 depth = pm_parser_local_depth(parser, &identifier);
13435 }
13436
13437 if (depth == -1) {
13438 value = UP(pm_call_node_variable_call_create(parser, &identifier));
13439 } else {
13440 value = UP(pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth));
13441 }
13442 }
13443
13444 value->location.length++;
13445 value = UP(pm_implicit_node_create(parser, value));
13446 }
13447
13448 element = UP(pm_assoc_node_create(parser, key, NULL, value));
13449 break;
13450 }
13451 default: {
13452 pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
13453
13454 // Hash keys that are strings are automatically frozen. We will
13455 // mark that here.
13456 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
13457 pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
13458 }
13459
13460 pm_hash_key_static_literals_add(parser, literals, key);
13461
13462 pm_token_t operator = { 0 };
13463 if (!pm_symbol_node_label_p(parser, key)) {
13464 expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
13465 operator = parser->previous;
13466 }
13467
13468 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
13469 element = UP(pm_assoc_node_create(parser, key, NTOK2PTR(operator), value));
13470 break;
13471 }
13472 }
13473
13474 if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
13475 pm_hash_node_elements_append((pm_hash_node_t *) node, element);
13476 } else {
13477 pm_keyword_hash_node_elements_append((pm_keyword_hash_node_t *) node, element);
13478 }
13479
13480 // If there's no comma after the element, then we're done.
13481 if (!accept1(parser, PM_TOKEN_COMMA)) break;
13482
13483 // If the next element starts with a label or a **, then we know we have
13484 // another element in the hash, so we'll continue parsing.
13485 if (match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)) continue;
13486
13487 // Otherwise we need to check if the subsequent token begins an expression.
13488 // If it does, then we'll continue parsing.
13489 if (token_begins_expression_p(parser->current.type)) continue;
13490
13491 // Otherwise by default we will exit out of this loop.
13492 break;
13493 }
13494
13495 return contains_keyword_splat;
13496}
13497
13498static inline bool
13499argument_allowed_for_bare_hash(pm_parser_t *parser, pm_node_t *argument) {
13500 if (pm_symbol_node_label_p(parser, argument)) {
13501 return true;
13502 }
13503
13504 switch (PM_NODE_TYPE(argument)) {
13505 case PM_CALL_NODE: {
13506 pm_call_node_t *cast = (pm_call_node_t *) argument;
13507 if (cast->opening_loc.length == 0 && cast->arguments != NULL) {
13508 if (PM_NODE_FLAG_P(cast->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS | PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
13509 return false;
13510 }
13511 if (cast->block != NULL) {
13512 return false;
13513 }
13514 }
13515 break;
13516 }
13517 default: break;
13518 }
13519 return accept1(parser, PM_TOKEN_EQUAL_GREATER);
13520}
13521
13525static inline void
13526parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
13527 if (arguments->arguments == NULL) {
13528 arguments->arguments = pm_arguments_node_create(parser);
13529 }
13530
13531 pm_arguments_node_arguments_append(arguments->arguments, argument);
13532}
13533
13537static void
13538parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
13539 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
13540
13541 // First we need to check if the next token is one that could be the start
13542 // of an argument. If it's not, then we can just return.
13543 if (
13544 match2(parser, terminator, PM_TOKEN_EOF) ||
13545 (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
13546 context_terminator(parser->current_context->context, &parser->current)
13547 ) {
13548 return;
13549 }
13550
13551 bool parsed_first_argument = false;
13552 bool parsed_bare_hash = false;
13553 bool parsed_block_argument = false;
13554 bool parsed_forwarding_arguments = false;
13555
13556 while (!match1(parser, PM_TOKEN_EOF)) {
13557 if (parsed_forwarding_arguments) {
13558 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
13559 }
13560
13561 pm_node_t *argument = NULL;
13562
13563 switch (parser->current.type) {
13564 case PM_TOKEN_USTAR_STAR:
13565 case PM_TOKEN_LABEL: {
13566 if (parsed_bare_hash) {
13567 pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
13568 }
13569
13570 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
13571 argument = UP(hash);
13572
13573 pm_static_literals_t hash_keys = { 0 };
13574 bool contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(hash), (uint16_t) (depth + 1));
13575
13576 parse_arguments_append(parser, arguments, argument);
13577
13578 pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13579 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13580 pm_node_flag_set(UP(arguments->arguments), flags);
13581
13582 pm_static_literals_free(&hash_keys);
13583 parsed_bare_hash = true;
13584
13585 break;
13586 }
13587 case PM_TOKEN_UAMPERSAND: {
13588 parser_lex(parser);
13589 pm_token_t operator = parser->previous;
13590 pm_node_t *expression = NULL;
13591
13592 if (token_begins_expression_p(parser->current.type)) {
13593 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
13594 } else {
13595 pm_parser_scope_forwarding_block_check(parser, &operator);
13596 }
13597
13598 argument = UP(pm_block_argument_node_create(parser, &operator, expression));
13599 if (parsed_block_argument) {
13600 parse_arguments_append(parser, arguments, argument);
13601 } else {
13602 arguments->block = argument;
13603 }
13604
13605 if (match1(parser, PM_TOKEN_COMMA)) {
13606 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
13607 }
13608
13609 parsed_block_argument = true;
13610 break;
13611 }
13612 case PM_TOKEN_USTAR: {
13613 parser_lex(parser);
13614 pm_token_t operator = parser->previous;
13615
13616 if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
13617 pm_parser_scope_forwarding_positionals_check(parser, &operator);
13618 argument = UP(pm_splat_node_create(parser, &operator, NULL));
13619 if (parsed_bare_hash) {
13620 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
13621 }
13622 } else {
13623 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
13624
13625 if (parsed_bare_hash) {
13626 pm_parser_err(parser, PM_TOKEN_START(parser, &operator), PM_NODE_END(expression) - PM_TOKEN_START(parser, &operator), PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
13627 }
13628
13629 argument = UP(pm_splat_node_create(parser, &operator, expression));
13630 }
13631
13632 parse_arguments_append(parser, arguments, argument);
13633 break;
13634 }
13635 case PM_TOKEN_UDOT_DOT_DOT: {
13636 if (accepts_forwarding) {
13637 parser_lex(parser);
13638
13639 if (token_begins_expression_p(parser->current.type)) {
13640 // If the token begins an expression then this ... was
13641 // not actually argument forwarding but was instead a
13642 // range.
13643 pm_token_t operator = parser->previous;
13644 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
13645
13646 // If we parse a range, we need to validate that we
13647 // didn't accidentally violate the nonassoc rules of the
13648 // ... operator.
13649 if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
13650 pm_range_node_t *range = (pm_range_node_t *) right;
13651 pm_parser_err(parser, range->operator_loc.start, range->operator_loc.length, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
13652 }
13653
13654 argument = UP(pm_range_node_create(parser, NULL, &operator, right));
13655 } else {
13656 pm_parser_scope_forwarding_all_check(parser, &parser->previous);
13657 if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
13658 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
13659 }
13660
13661 argument = UP(pm_forwarding_arguments_node_create(parser, &parser->previous));
13662 parse_arguments_append(parser, arguments, argument);
13663 pm_node_flag_set(UP(arguments->arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
13664 arguments->has_forwarding = true;
13665 parsed_forwarding_arguments = true;
13666 break;
13667 }
13668 }
13669 }
13671 default: {
13672 if (argument == NULL) {
13673 argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
13674 }
13675
13676 bool contains_keywords = false;
13677 bool contains_keyword_splat = false;
13678
13679 if (argument_allowed_for_bare_hash(parser, argument)) {
13680 if (parsed_bare_hash) {
13681 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
13682 }
13683
13684 pm_token_t operator = { 0 };
13685 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
13686 operator = parser->previous;
13687 }
13688
13689 pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
13690 contains_keywords = true;
13691
13692 // Create the set of static literals for this hash.
13693 pm_static_literals_t hash_keys = { 0 };
13694 pm_hash_key_static_literals_add(parser, &hash_keys, argument);
13695
13696 // Finish parsing the one we are part way through.
13697 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
13698 argument = UP(pm_assoc_node_create(parser, argument, NTOK2PTR(operator), value));
13699
13700 pm_keyword_hash_node_elements_append(bare_hash, argument);
13701 argument = UP(bare_hash);
13702
13703 // Then parse more if we have a comma
13704 if (accept1(parser, PM_TOKEN_COMMA) && (
13705 token_begins_expression_p(parser->current.type) ||
13706 match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
13707 )) {
13708 contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(bare_hash), (uint16_t) (depth + 1));
13709 }
13710
13711 pm_static_literals_free(&hash_keys);
13712 parsed_bare_hash = true;
13713 }
13714
13715 parse_arguments_append(parser, arguments, argument);
13716
13717 pm_node_flags_t flags = 0;
13718 if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13719 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13720 pm_node_flag_set(UP(arguments->arguments), flags);
13721
13722 break;
13723 }
13724 }
13725
13726 parsed_first_argument = true;
13727
13728 // If parsing the argument failed, we need to stop parsing arguments.
13729 if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
13730
13731 // If the terminator of these arguments is not EOF, then we have a
13732 // specific token we're looking for. In that case we can accept a
13733 // newline here because it is not functioning as a statement terminator.
13734 bool accepted_newline = false;
13735 if (terminator != PM_TOKEN_EOF) {
13736 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
13737 }
13738
13739 if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
13740 // If we previously were on a comma and we just parsed a bare hash,
13741 // then we want to continue parsing arguments. This is because the
13742 // comma was grabbed up by the hash parser.
13743 } else if (accept1(parser, PM_TOKEN_COMMA)) {
13744 // If there was a comma, then we need to check if we also accepted a
13745 // newline. If we did, then this is a syntax error.
13746 if (accepted_newline) {
13747 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
13748 }
13749
13750 // If this is a command call and an argument takes a block,
13751 // there can be no further arguments. For example,
13752 // `foo(bar 1 do end, 2)` should be rejected.
13753 if (PM_NODE_TYPE_P(argument, PM_CALL_NODE)) {
13754 pm_call_node_t *call = (pm_call_node_t *) argument;
13755 if (call->opening_loc.length == 0 && call->arguments != NULL && call->block != NULL) {
13756 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
13757 break;
13758 }
13759 }
13760 } else {
13761 // If there is no comma at the end of the argument list then we're
13762 // done parsing arguments and can break out of this loop.
13763 break;
13764 }
13765
13766 // If we hit the terminator, then that means we have a trailing comma so
13767 // we can accept that output as well.
13768 if (match1(parser, terminator)) break;
13769 }
13770}
13771
13783parse_required_destructured_parameter(pm_parser_t *parser) {
13784 expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
13785
13786 pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
13787 pm_multi_target_node_opening_set(parser, node, &parser->previous);
13788
13789 do {
13790 pm_node_t *param;
13791
13792 // If we get here then we have a trailing comma, which isn't allowed in
13793 // the grammar. In other places, multi targets _do_ allow trailing
13794 // commas, so here we'll assume this is a mistake of the user not
13795 // knowing it's not allowed here.
13796 if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
13797 param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
13798 pm_multi_target_node_targets_append(parser, node, param);
13799 pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
13800 break;
13801 }
13802
13803 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13804 param = UP(parse_required_destructured_parameter(parser));
13805 } else if (accept1(parser, PM_TOKEN_USTAR)) {
13806 pm_token_t star = parser->previous;
13807 pm_node_t *value = NULL;
13808
13809 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
13810 pm_token_t name = parser->previous;
13811 value = UP(pm_required_parameter_node_create(parser, &name));
13812 if (pm_parser_parameter_name_check(parser, &name)) {
13813 pm_node_flag_set_repeated_parameter(value);
13814 }
13815 pm_parser_local_add_token(parser, &name, 1);
13816 }
13817
13818 param = UP(pm_splat_node_create(parser, &star, value));
13819 } else {
13820 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
13821 pm_token_t name = parser->previous;
13822
13823 param = UP(pm_required_parameter_node_create(parser, &name));
13824 if (pm_parser_parameter_name_check(parser, &name)) {
13825 pm_node_flag_set_repeated_parameter(param);
13826 }
13827 pm_parser_local_add_token(parser, &name, 1);
13828 }
13829
13830 pm_multi_target_node_targets_append(parser, node, param);
13831 } while (accept1(parser, PM_TOKEN_COMMA));
13832
13833 accept1(parser, PM_TOKEN_NEWLINE);
13834 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
13835 pm_multi_target_node_closing_set(parser, node, &parser->previous);
13836
13837 return node;
13838}
13839
13844typedef enum {
13845 PM_PARAMETERS_NO_CHANGE = 0, // Extra state for tokens that should not change the state
13846 PM_PARAMETERS_ORDER_NOTHING_AFTER = 1,
13847 PM_PARAMETERS_ORDER_KEYWORDS_REST,
13848 PM_PARAMETERS_ORDER_KEYWORDS,
13849 PM_PARAMETERS_ORDER_REST,
13850 PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
13851 PM_PARAMETERS_ORDER_OPTIONAL,
13852 PM_PARAMETERS_ORDER_NAMED,
13853 PM_PARAMETERS_ORDER_NONE,
13854} pm_parameters_order_t;
13855
13859static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
13860 [0] = PM_PARAMETERS_NO_CHANGE,
13861 [PM_TOKEN_UAMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
13862 [PM_TOKEN_AMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
13863 [PM_TOKEN_UDOT_DOT_DOT] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
13864 [PM_TOKEN_IDENTIFIER] = PM_PARAMETERS_ORDER_NAMED,
13865 [PM_TOKEN_PARENTHESIS_LEFT] = PM_PARAMETERS_ORDER_NAMED,
13866 [PM_TOKEN_EQUAL] = PM_PARAMETERS_ORDER_OPTIONAL,
13867 [PM_TOKEN_LABEL] = PM_PARAMETERS_ORDER_KEYWORDS,
13868 [PM_TOKEN_USTAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
13869 [PM_TOKEN_STAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
13870 [PM_TOKEN_USTAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST,
13871 [PM_TOKEN_STAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST
13872};
13873
13881static bool
13882update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
13883 pm_parameters_order_t state = parameters_ordering[token->type];
13884 if (state == PM_PARAMETERS_NO_CHANGE) return true;
13885
13886 // If we see another ordered argument after a optional argument
13887 // we only continue parsing ordered arguments until we stop seeing ordered arguments.
13888 if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
13889 *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
13890 return true;
13891 } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
13892 return true;
13893 }
13894
13895 if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
13896 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
13897 return false;
13898 } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
13899 pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
13900 return false;
13901 } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
13902 // We know what transition we failed on, so we can provide a better error here.
13903 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
13904 return false;
13905 }
13906
13907 if (state < *current) *current = state;
13908 return true;
13909}
13910
13911static inline void
13912parse_parameters_handle_trailing_comma(
13913 pm_parser_t *parser,
13914 pm_parameters_node_t *params,
13915 pm_parameters_order_t order,
13916 bool in_block,
13917 bool allows_trailing_comma
13918) {
13919 if (!allows_trailing_comma) {
13920 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
13921 return;
13922 }
13923
13924 if (in_block) {
13925 if (order >= PM_PARAMETERS_ORDER_NAMED) {
13926 // foo do |bar,|; end
13927 pm_node_t *param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
13928
13929 if (params->rest == NULL) {
13930 pm_parameters_node_rest_set(params, param);
13931 } else {
13932 pm_parser_err_node(parser, UP(param), PM_ERR_PARAMETER_SPLAT_MULTI);
13933 pm_parameters_node_posts_append(params, UP(param));
13934 }
13935 } else {
13936 // foo do |*bar,|; end
13937 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
13938 }
13939 } else {
13940 // https://bugs.ruby-lang.org/issues/19107
13941 // Allow `def foo(bar,); end`, `def foo(*bar,); end`, etc. but not `def foo(...,); end`
13942 if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1 || order == PM_PARAMETERS_ORDER_NOTHING_AFTER) {
13943 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
13944 }
13945 }
13946}
13947
13951static pm_parameters_node_t *
13952parse_parameters(
13953 pm_parser_t *parser,
13954 pm_binding_power_t binding_power,
13955 bool uses_parentheses,
13956 bool allows_trailing_comma,
13957 bool allows_forwarding_parameters,
13958 bool accepts_blocks_in_defaults,
13959 bool in_block,
13960 pm_diagnostic_id_t diag_id_forwarding,
13961 uint16_t depth
13962) {
13963 pm_do_loop_stack_push(parser, false);
13964
13965 pm_parameters_node_t *params = pm_parameters_node_create(parser);
13966 pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
13967
13968 while (true) {
13969 bool parsing = true;
13970
13971 switch (parser->current.type) {
13972 case PM_TOKEN_PARENTHESIS_LEFT: {
13973 update_parameter_state(parser, &parser->current, &order);
13974 pm_node_t *param = UP(parse_required_destructured_parameter(parser));
13975
13976 if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
13977 pm_parameters_node_requireds_append(params, param);
13978 } else {
13979 pm_parameters_node_posts_append(params, param);
13980 }
13981 break;
13982 }
13983 case PM_TOKEN_UAMPERSAND:
13984 case PM_TOKEN_AMPERSAND: {
13985 update_parameter_state(parser, &parser->current, &order);
13986 parser_lex(parser);
13987
13988 pm_token_t operator = parser->previous;
13989 pm_node_t *param;
13990
13991 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1 && accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
13992 param = (pm_node_t *) pm_no_block_parameter_node_create(parser, &operator, &parser->previous);
13993 } else {
13994 pm_token_t name = {0};
13995
13996 bool repeated = false;
13997 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
13998 name = parser->previous;
13999 repeated = pm_parser_parameter_name_check(parser, &name);
14000 pm_parser_local_add_token(parser, &name, 1);
14001 } else {
14002 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
14003 }
14004
14005 param = (pm_node_t *) pm_block_parameter_node_create(parser, NTOK2PTR(name), &operator);
14006 if (repeated) {
14007 pm_node_flag_set_repeated_parameter(param);
14008 }
14009 }
14010
14011 if (params->block == NULL) {
14012 pm_parameters_node_block_set(params, param);
14013 } else {
14014 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_BLOCK_MULTI);
14015 pm_parameters_node_posts_append(params, param);
14016 }
14017
14018 break;
14019 }
14020 case PM_TOKEN_UDOT_DOT_DOT: {
14021 if (!allows_forwarding_parameters) {
14022 pm_parser_err_current(parser, diag_id_forwarding);
14023 }
14024
14025 bool succeeded = update_parameter_state(parser, &parser->current, &order);
14026 parser_lex(parser);
14027
14028 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14029 pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
14030
14031 if (params->keyword_rest != NULL) {
14032 // If we already have a keyword rest parameter, then we replace it with the
14033 // forwarding parameter and move the keyword rest parameter to the posts list.
14034 pm_node_t *keyword_rest = params->keyword_rest;
14035 pm_parameters_node_posts_append(params, keyword_rest);
14036 if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
14037 params->keyword_rest = NULL;
14038 }
14039
14040 pm_parameters_node_keyword_rest_set(params, UP(param));
14041 break;
14042 }
14043 case PM_TOKEN_CLASS_VARIABLE:
14044 case PM_TOKEN_IDENTIFIER:
14045 case PM_TOKEN_CONSTANT:
14046 case PM_TOKEN_INSTANCE_VARIABLE:
14047 case PM_TOKEN_GLOBAL_VARIABLE:
14048 case PM_TOKEN_METHOD_NAME: {
14049 parser_lex(parser);
14050 switch (parser->previous.type) {
14051 case PM_TOKEN_CONSTANT:
14052 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14053 break;
14054 case PM_TOKEN_INSTANCE_VARIABLE:
14055 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14056 break;
14057 case PM_TOKEN_GLOBAL_VARIABLE:
14058 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14059 break;
14060 case PM_TOKEN_CLASS_VARIABLE:
14061 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14062 break;
14063 case PM_TOKEN_METHOD_NAME:
14064 pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
14065 break;
14066 default: break;
14067 }
14068
14069 if (parser->current.type == PM_TOKEN_EQUAL) {
14070 update_parameter_state(parser, &parser->current, &order);
14071 } else {
14072 update_parameter_state(parser, &parser->previous, &order);
14073 }
14074
14075 pm_token_t name = parser->previous;
14076 bool repeated = pm_parser_parameter_name_check(parser, &name);
14077 pm_parser_local_add_token(parser, &name, 1);
14078
14079 if (match1(parser, PM_TOKEN_EQUAL)) {
14080 pm_token_t operator = parser->current;
14081 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14082 parser_lex(parser);
14083
14084 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
14085 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14086
14087 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14088 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
14089 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14090
14091 pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
14092
14093 if (repeated) {
14094 pm_node_flag_set_repeated_parameter(UP(param));
14095 }
14096 pm_parameters_node_optionals_append(params, param);
14097
14098 // If the value of the parameter increased the number of
14099 // reads of that parameter, then we need to warn that we
14100 // have a circular definition.
14101 if ((parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14102 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &name, PM_ERR_PARAMETER_CIRCULAR);
14103 }
14104
14105 context_pop(parser);
14106
14107 // If parsing the value of the parameter resulted in error recovery,
14108 // then we can put a missing node in its place and stop parsing the
14109 // parameters entirely now.
14110 if (parser->recovering) {
14111 parsing = false;
14112 break;
14113 }
14114 } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14115 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14116 if (repeated) {
14117 pm_node_flag_set_repeated_parameter(UP(param));
14118 }
14119 pm_parameters_node_requireds_append(params, UP(param));
14120 } else {
14121 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14122 if (repeated) {
14123 pm_node_flag_set_repeated_parameter(UP(param));
14124 }
14125 pm_parameters_node_posts_append(params, UP(param));
14126 }
14127
14128 break;
14129 }
14130 case PM_TOKEN_LABEL: {
14131 if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
14132 update_parameter_state(parser, &parser->current, &order);
14133
14134 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14135 parser_lex(parser);
14136
14137 pm_token_t name = parser->previous;
14138 pm_token_t local = name;
14139 local.end -= 1;
14140
14141 if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
14142 pm_parser_err(parser, PM_TOKEN_START(parser, &local), PM_TOKEN_LENGTH(&local), PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14143 } else if (local.end[-1] == '!' || local.end[-1] == '?') {
14144 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14145 }
14146
14147 bool repeated = pm_parser_parameter_name_check(parser, &local);
14148 pm_parser_local_add_token(parser, &local, 1);
14149
14150 switch (parser->current.type) {
14151 case PM_TOKEN_COMMA:
14152 case PM_TOKEN_PARENTHESIS_RIGHT:
14153 case PM_TOKEN_PIPE: {
14154 context_pop(parser);
14155
14156 pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14157 if (repeated) {
14158 pm_node_flag_set_repeated_parameter(param);
14159 }
14160
14161 pm_parameters_node_keywords_append(params, param);
14162 break;
14163 }
14164 case PM_TOKEN_SEMICOLON:
14165 case PM_TOKEN_NEWLINE: {
14166 context_pop(parser);
14167
14168 if (uses_parentheses) {
14169 parsing = false;
14170 break;
14171 }
14172
14173 pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14174 if (repeated) {
14175 pm_node_flag_set_repeated_parameter(param);
14176 }
14177
14178 pm_parameters_node_keywords_append(params, param);
14179 break;
14180 }
14181 default: {
14182 pm_node_t *param;
14183
14184 if (token_begins_expression_p(parser->current.type)) {
14185 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14186 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14187
14188 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14189 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
14190 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14191
14192 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14193 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &local, PM_ERR_PARAMETER_CIRCULAR);
14194 }
14195
14196 param = UP(pm_optional_keyword_parameter_node_create(parser, &name, value));
14197 }
14198 else {
14199 param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14200 }
14201
14202 if (repeated) {
14203 pm_node_flag_set_repeated_parameter(param);
14204 }
14205
14206 context_pop(parser);
14207 pm_parameters_node_keywords_append(params, param);
14208
14209 // If parsing the value of the parameter resulted in error recovery,
14210 // then we can put a missing node in its place and stop parsing the
14211 // parameters entirely now.
14212 if (parser->recovering) {
14213 parsing = false;
14214 break;
14215 }
14216 }
14217 }
14218
14219 parser->in_keyword_arg = false;
14220 break;
14221 }
14222 case PM_TOKEN_USTAR:
14223 case PM_TOKEN_STAR: {
14224 update_parameter_state(parser, &parser->current, &order);
14225 parser_lex(parser);
14226
14227 pm_token_t operator = parser->previous;
14228 pm_token_t name = { 0 };
14229 bool repeated = false;
14230
14231 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14232 name = parser->previous;
14233 repeated = pm_parser_parameter_name_check(parser, &name);
14234 pm_parser_local_add_token(parser, &name, 1);
14235 } else {
14236 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
14237 }
14238
14239 pm_node_t *param = UP(pm_rest_parameter_node_create(parser, &operator, NTOK2PTR(name)));
14240 if (repeated) {
14241 pm_node_flag_set_repeated_parameter(param);
14242 }
14243
14244 if (params->rest == NULL) {
14245 pm_parameters_node_rest_set(params, param);
14246 } else {
14247 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
14248 pm_parameters_node_posts_append(params, param);
14249 }
14250
14251 break;
14252 }
14253 case PM_TOKEN_STAR_STAR:
14254 case PM_TOKEN_USTAR_STAR: {
14255 pm_parameters_order_t previous_order = order;
14256 update_parameter_state(parser, &parser->current, &order);
14257 parser_lex(parser);
14258
14259 pm_token_t operator = parser->previous;
14260 pm_node_t *param;
14261
14262 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14263 if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14264 pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14265 }
14266
14267 param = UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
14268 } else {
14269 pm_token_t name = { 0 };
14270
14271 bool repeated = false;
14272 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14273 name = parser->previous;
14274 repeated = pm_parser_parameter_name_check(parser, &name);
14275 pm_parser_local_add_token(parser, &name, 1);
14276 } else {
14277 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
14278 }
14279
14280 param = UP(pm_keyword_rest_parameter_node_create(parser, &operator, NTOK2PTR(name)));
14281 if (repeated) {
14282 pm_node_flag_set_repeated_parameter(param);
14283 }
14284 }
14285
14286 if (params->keyword_rest == NULL) {
14287 pm_parameters_node_keyword_rest_set(params, param);
14288 } else {
14289 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
14290 pm_parameters_node_posts_append(params, param);
14291 }
14292
14293 break;
14294 }
14295 default:
14296 if (parser->previous.type == PM_TOKEN_COMMA) {
14297 parse_parameters_handle_trailing_comma(parser, params, order, in_block, allows_trailing_comma);
14298 }
14299
14300 parsing = false;
14301 break;
14302 }
14303
14304 // If we hit some kind of issue while parsing the parameter, this would
14305 // have been set to false. In that case, we need to break out of the
14306 // loop.
14307 if (!parsing) break;
14308
14309 bool accepted_newline = false;
14310 if (uses_parentheses) {
14311 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14312 }
14313
14314 if (accept1(parser, PM_TOKEN_COMMA)) {
14315 // If there was a comma, but we also accepted a newline, then this
14316 // is a syntax error.
14317 if (accepted_newline) {
14318 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14319 }
14320 } else {
14321 // If there was no comma, then we're done parsing parameters.
14322 break;
14323 }
14324 }
14325
14326 pm_do_loop_stack_pop(parser);
14327
14328 // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
14329 if (PM_NODE_START(params) == PM_NODE_END(params)) {
14330 pm_node_destroy(parser, UP(params));
14331 return NULL;
14332 }
14333
14334 return params;
14335}
14336
14341static size_t
14342token_newline_index(const pm_parser_t *parser) {
14343 if (parser->heredoc_end == NULL) {
14344 // This is the common case. In this case we can look at the previously
14345 // recorded newline in the newline list and subtract from the current
14346 // offset.
14347 return parser->line_offsets.size - 1;
14348 } else {
14349 // This is unlikely. This is the case that we have already parsed the
14350 // start of a heredoc, so we cannot rely on looking at the previous
14351 // offset of the newline list, and instead must go through the whole
14352 // process of a binary search for the line number.
14353 return (size_t) pm_line_offset_list_line(&parser->line_offsets, PM_TOKEN_START(parser, &parser->current), 0);
14354 }
14355}
14356
14361static int64_t
14362token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
14363 const uint8_t *cursor = parser->start + parser->line_offsets.offsets[newline_index];
14364 const uint8_t *end = token->start;
14365
14366 // Skip over the BOM if it is present.
14367 if (
14368 newline_index == 0 &&
14369 parser->start[0] == 0xef &&
14370 parser->start[1] == 0xbb &&
14371 parser->start[2] == 0xbf
14372 ) cursor += 3;
14373
14374 int64_t column = 0;
14375 for (; cursor < end; cursor++) {
14376 switch (*cursor) {
14377 case '\t':
14378 column = ((column / PM_TAB_WHITESPACE_SIZE) + 1) * PM_TAB_WHITESPACE_SIZE;
14379 break;
14380 case ' ':
14381 column++;
14382 break;
14383 default:
14384 column++;
14385 if (break_on_non_space) return -1;
14386 break;
14387 }
14388 }
14389
14390 return column;
14391}
14392
14397static void
14398parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening_token, bool if_after_else, bool allow_indent) {
14399 // If these warnings are disabled (unlikely), then we can just return.
14400 if (!parser->warn_mismatched_indentation) return;
14401
14402 // If the tokens are on the same line, we do not warn.
14403 size_t closing_newline_index = token_newline_index(parser);
14404 if (opening_newline_index == closing_newline_index) return;
14405
14406 // If the opening token has anything other than spaces or tabs before it,
14407 // then we do not warn. This is unless we are matching up an `if`/`end` pair
14408 // and the `if` immediately follows an `else` keyword.
14409 int64_t opening_column = token_column(parser, opening_newline_index, opening_token, !if_after_else);
14410 if (!if_after_else && (opening_column == -1)) return;
14411
14412 // Get a reference to the closing token off the current parser. This assumes
14413 // that the caller has placed this in the correct position.
14414 pm_token_t *closing_token = &parser->current;
14415
14416 // If the tokens are at the same indentation, we do not warn.
14417 int64_t closing_column = token_column(parser, closing_newline_index, closing_token, true);
14418 if ((closing_column == -1) || (opening_column == closing_column)) return;
14419
14420 // If the closing column is greater than the opening column and we are
14421 // allowing indentation, then we do not warn.
14422 if (allow_indent && (closing_column > opening_column)) return;
14423
14424 // Otherwise, add a warning.
14425 PM_PARSER_WARN_FORMAT(
14426 parser,
14427 PM_TOKEN_START(parser, closing_token),
14428 PM_TOKEN_LENGTH(closing_token),
14429 PM_WARN_INDENTATION_MISMATCH,
14430 (int) (closing_token->end - closing_token->start),
14431 (const char *) closing_token->start,
14432 (int) (opening_token->end - opening_token->start),
14433 (const char *) opening_token->start,
14434 ((int32_t) opening_newline_index) + parser->start_line
14435 );
14436}
14437
14438typedef enum {
14439 PM_RESCUES_BEGIN = 1,
14440 PM_RESCUES_BLOCK,
14441 PM_RESCUES_CLASS,
14442 PM_RESCUES_DEF,
14443 PM_RESCUES_LAMBDA,
14444 PM_RESCUES_MODULE,
14445 PM_RESCUES_SCLASS
14446} pm_rescues_type_t;
14447
14452static inline void
14453parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
14454 pm_rescue_node_t *current = NULL;
14455
14456 while (match1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
14457 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14458 parser_lex(parser);
14459
14460 pm_rescue_node_t *rescue = pm_rescue_node_create(parser, &parser->previous);
14461
14462 switch (parser->current.type) {
14463 case PM_TOKEN_EQUAL_GREATER: {
14464 // Here we have an immediate => after the rescue keyword, in which case
14465 // we're going to have an empty list of exceptions to rescue (which
14466 // implies StandardError).
14467 parser_lex(parser);
14468 pm_rescue_node_operator_set(parser, rescue, &parser->previous);
14469
14470 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
14471 reference = parse_target(parser, reference, false, false);
14472
14473 pm_rescue_node_reference_set(rescue, reference);
14474 break;
14475 }
14476 case PM_TOKEN_NEWLINE:
14477 case PM_TOKEN_SEMICOLON:
14478 case PM_TOKEN_KEYWORD_THEN:
14479 // Here we have a terminator for the rescue keyword, in which
14480 // case we're going to just continue on.
14481 break;
14482 default: {
14483 if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
14484 // Here we have something that could be an exception expression, so
14485 // we'll attempt to parse it here and any others delimited by commas.
14486
14487 do {
14488 pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
14489 pm_rescue_node_exceptions_append(rescue, expression);
14490
14491 // If we hit a newline, then this is the end of the rescue expression. We
14492 // can continue on to parse the statements.
14493 if (match3(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_THEN)) break;
14494
14495 // If we hit a `=>` then we're going to parse the exception variable. Once
14496 // we've done that, we'll break out of the loop and parse the statements.
14497 if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
14498 pm_rescue_node_operator_set(parser, rescue, &parser->previous);
14499
14500 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
14501 reference = parse_target(parser, reference, false, false);
14502
14503 pm_rescue_node_reference_set(rescue, reference);
14504 break;
14505 }
14506 } while (accept1(parser, PM_TOKEN_COMMA));
14507 }
14508 }
14509 }
14510
14511 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
14512 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
14513 rescue->then_keyword_loc = TOK2LOC(parser, &parser->previous);
14514 }
14515 } else {
14516 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
14517 rescue->then_keyword_loc = TOK2LOC(parser, &parser->previous);
14518 }
14519
14520 if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
14521 pm_accepts_block_stack_push(parser, true);
14522 pm_context_t context;
14523
14524 switch (type) {
14525 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_RESCUE; break;
14526 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_RESCUE; break;
14527 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_RESCUE; break;
14528 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_RESCUE; break;
14529 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_RESCUE; break;
14530 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_RESCUE; break;
14531 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_RESCUE; break;
14532 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
14533 }
14534
14535 pm_statements_node_t *statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14536 if (statements != NULL) pm_rescue_node_statements_set(rescue, statements);
14537
14538 pm_accepts_block_stack_pop(parser);
14539 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14540 }
14541
14542 if (current == NULL) {
14543 pm_begin_node_rescue_clause_set(parent_node, rescue);
14544 } else {
14545 pm_rescue_node_subsequent_set(current, rescue);
14546 }
14547
14548 current = rescue;
14549 }
14550
14551 // The end node locations on rescue nodes will not be set correctly
14552 // since we won't know the end until we've found all subsequent
14553 // clauses. This sets the end location on all rescues once we know it.
14554 if (current != NULL) {
14555 pm_rescue_node_t *clause = parent_node->rescue_clause;
14556
14557 while (clause != NULL) {
14558 PM_NODE_LENGTH_SET_NODE(clause, current);
14559 clause = clause->subsequent;
14560 }
14561 }
14562
14563 pm_token_t else_keyword;
14564 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
14565 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14566 opening_newline_index = token_newline_index(parser);
14567
14568 else_keyword = parser->current;
14569 opening = &else_keyword;
14570
14571 parser_lex(parser);
14572 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14573
14574 pm_statements_node_t *else_statements = NULL;
14575 if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
14576 pm_accepts_block_stack_push(parser, true);
14577 pm_context_t context;
14578
14579 switch (type) {
14580 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ELSE; break;
14581 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ELSE; break;
14582 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ELSE; break;
14583 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ELSE; break;
14584 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
14585 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
14586 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
14587 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
14588 }
14589
14590 else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14591 pm_accepts_block_stack_pop(parser);
14592
14593 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14594 }
14595
14596 pm_else_node_t *else_clause = pm_else_node_create(parser, &else_keyword, else_statements, &parser->current);
14597 pm_begin_node_else_clause_set(parent_node, else_clause);
14598
14599 // If we don't have a `current` rescue node, then this is a dangling
14600 // else, and it's an error.
14601 if (current == NULL) pm_parser_err_node(parser, UP(else_clause), PM_ERR_BEGIN_LONELY_ELSE);
14602 }
14603
14604 if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
14605 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14606 pm_token_t ensure_keyword = parser->current;
14607
14608 parser_lex(parser);
14609 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14610
14611 pm_statements_node_t *ensure_statements = NULL;
14612 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
14613 pm_accepts_block_stack_push(parser, true);
14614 pm_context_t context;
14615
14616 switch (type) {
14617 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ENSURE; break;
14618 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ENSURE; break;
14619 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ENSURE; break;
14620 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ENSURE; break;
14621 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ENSURE; break;
14622 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ENSURE; break;
14623 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ENSURE; break;
14624 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
14625 }
14626
14627 ensure_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14628 pm_accepts_block_stack_pop(parser);
14629
14630 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14631 }
14632
14633 pm_ensure_node_t *ensure_clause = pm_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->current);
14634 pm_begin_node_ensure_clause_set(parent_node, ensure_clause);
14635 }
14636
14637 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
14638 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14639 pm_begin_node_end_keyword_set(parser, parent_node, &parser->current);
14640 } else {
14641 pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_KEYWORD_END, .start = parser->previous.end, .end = parser->previous.end };
14642 pm_begin_node_end_keyword_set(parser, parent_node, &end_keyword);
14643 }
14644}
14645
14650static pm_begin_node_t *
14651parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
14652 pm_begin_node_t *node = pm_begin_node_create(parser, NULL, statements);
14653 parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
14654
14655 node->base.location.start = U32(start - parser->start);
14656 PM_NODE_LENGTH_SET_TOKEN(parser, node, &parser->current);
14657
14658 return node;
14659}
14660
14665parse_block_parameters(
14666 pm_parser_t *parser,
14667 bool allows_trailing_comma,
14668 const pm_token_t *opening,
14669 bool is_lambda_literal,
14670 bool accepts_blocks_in_defaults,
14671 uint16_t depth
14672) {
14673 pm_parameters_node_t *parameters = NULL;
14674 if (!match1(parser, PM_TOKEN_SEMICOLON)) {
14675 if (!is_lambda_literal) {
14676 context_push(parser, PM_CONTEXT_BLOCK_PARAMETERS);
14677 }
14678 parameters = parse_parameters(
14679 parser,
14680 is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
14681 false,
14682 allows_trailing_comma,
14683 false,
14684 accepts_blocks_in_defaults,
14685 true,
14686 is_lambda_literal ? PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_LAMBDA : PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_BLOCK,
14687 (uint16_t) (depth + 1)
14688 );
14689 if (!is_lambda_literal) {
14690 context_pop(parser);
14691 }
14692 }
14693
14694 pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
14695 if (opening != NULL) {
14696 accept1(parser, PM_TOKEN_NEWLINE);
14697
14698 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
14699 do {
14700 switch (parser->current.type) {
14701 case PM_TOKEN_CONSTANT:
14702 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14703 parser_lex(parser);
14704 break;
14705 case PM_TOKEN_INSTANCE_VARIABLE:
14706 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14707 parser_lex(parser);
14708 break;
14709 case PM_TOKEN_GLOBAL_VARIABLE:
14710 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14711 parser_lex(parser);
14712 break;
14713 case PM_TOKEN_CLASS_VARIABLE:
14714 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14715 parser_lex(parser);
14716 break;
14717 default:
14718 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
14719 break;
14720 }
14721
14722 bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
14723 pm_parser_local_add_token(parser, &parser->previous, 1);
14724
14725 pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
14726 if (repeated) pm_node_flag_set_repeated_parameter(UP(local));
14727
14728 pm_block_parameters_node_append_local(block_parameters, local);
14729 } while (accept1(parser, PM_TOKEN_COMMA));
14730 }
14731 }
14732
14733 return block_parameters;
14734}
14735
14740static bool
14741outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
14742 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
14743 if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
14744 }
14745
14746 return false;
14747}
14748
14754static const char * const pm_numbered_parameter_names[] = {
14755 "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
14756};
14757
14763static pm_node_t *
14764parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
14765 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
14766
14767 // If we have ordinary parameters, then we will return them as the set of
14768 // parameters.
14769 if (parameters != NULL) {
14770 // If we also have implicit parameters, then this is an error.
14771 if (implicit_parameters->size > 0) {
14772 pm_node_t *node = implicit_parameters->nodes[0];
14773
14774 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14775 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
14776 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14777 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
14778 } else {
14779 assert(false && "unreachable");
14780 }
14781 }
14782
14783 return parameters;
14784 }
14785
14786 // If we don't have any implicit parameters, then the set of parameters is
14787 // NULL.
14788 if (implicit_parameters->size == 0) {
14789 return NULL;
14790 }
14791
14792 // If we don't have ordinary parameters, then we now must validate our set
14793 // of implicit parameters. We can only have numbered parameters or it, but
14794 // they cannot be mixed.
14795 uint8_t numbered_parameter = 0;
14796 bool it_parameter = false;
14797
14798 for (size_t index = 0; index < implicit_parameters->size; index++) {
14799 pm_node_t *node = implicit_parameters->nodes[index];
14800
14801 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14802 if (it_parameter) {
14803 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
14804 } else if (outer_scope_using_numbered_parameters_p(parser)) {
14805 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
14806 } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
14807 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
14808 } else if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
14809 numbered_parameter = MAX(numbered_parameter, (uint8_t) (parser->start[node->location.start + 1] - '0'));
14810 } else {
14811 assert(false && "unreachable");
14812 }
14813 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14814 if (numbered_parameter > 0) {
14815 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
14816 } else {
14817 it_parameter = true;
14818 }
14819 }
14820 }
14821
14822 if (numbered_parameter > 0) {
14823 // Go through the parent scopes and mark them as being disallowed from
14824 // using numbered parameters because this inner scope is using them.
14825 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
14826 scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
14827 }
14828 return UP(pm_numbered_parameters_node_create(parser, opening, closing, numbered_parameter));
14829 }
14830
14831 if (it_parameter) {
14832 return UP(pm_it_parameters_node_create(parser, opening, closing));
14833 }
14834
14835 return NULL;
14836}
14837
14841static pm_block_node_t *
14842parse_block(pm_parser_t *parser, uint16_t depth) {
14843 pm_token_t opening = parser->previous;
14844 accept1(parser, PM_TOKEN_NEWLINE);
14845
14846 pm_accepts_block_stack_push(parser, true);
14847 pm_parser_scope_push(parser, false);
14848
14849 pm_block_parameters_node_t *block_parameters = NULL;
14850
14851 if (accept1(parser, PM_TOKEN_PIPE)) {
14852 pm_token_t block_parameters_opening = parser->previous;
14853 if (match1(parser, PM_TOKEN_PIPE)) {
14854 block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
14855 parser->command_start = true;
14856 parser_lex(parser);
14857 } else {
14858 block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false, true, (uint16_t) (depth + 1));
14859 accept1(parser, PM_TOKEN_NEWLINE);
14860 parser->command_start = true;
14861 expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
14862 }
14863
14864 pm_block_parameters_node_closing_set(parser, block_parameters, &parser->previous);
14865 }
14866
14867 accept1(parser, PM_TOKEN_NEWLINE);
14868 pm_node_t *statements = NULL;
14869
14870 if (opening.type == PM_TOKEN_BRACE_LEFT) {
14871 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
14872 statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1)));
14873 }
14874
14875 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE, &opening);
14876 } else {
14877 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
14878 if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE)) {
14879 pm_accepts_block_stack_push(parser, true);
14880 statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1)));
14881 pm_accepts_block_stack_pop(parser);
14882 }
14883
14884 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
14885 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
14886 statements = UP(parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1)));
14887 }
14888 }
14889
14890 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END, &opening);
14891 }
14892
14893 pm_constant_id_list_t locals;
14894 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
14895 pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &opening, &parser->previous);
14896
14897 pm_parser_scope_pop(parser);
14898 pm_accepts_block_stack_pop(parser);
14899
14900 return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
14901}
14902
14908static bool
14909parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call, uint16_t depth) {
14910 bool found = false;
14911
14912 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
14913 found |= true;
14914 arguments->opening_loc = TOK2LOC(parser, &parser->previous);
14915
14916 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14917 arguments->closing_loc = TOK2LOC(parser, &parser->previous);
14918 } else {
14919 pm_accepts_block_stack_push(parser, true);
14920 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1));
14921
14922 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14923 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
14924 parser->previous.start = parser->previous.end;
14925 parser->previous.type = 0;
14926 }
14927
14928 pm_accepts_block_stack_pop(parser);
14929 arguments->closing_loc = TOK2LOC(parser, &parser->previous);
14930 }
14931 } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
14932 found |= true;
14933 pm_accepts_block_stack_push(parser, false);
14934
14935 // If we get here, then the subsequent token cannot be used as an infix
14936 // operator. In this case we assume the subsequent token is part of an
14937 // argument to this method call.
14938 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, (uint16_t) (depth + 1));
14939
14940 // If we have done with the arguments and still not consumed the comma,
14941 // then we have a trailing comma where we need to check whether it is
14942 // allowed or not.
14943 if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
14944 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type));
14945 }
14946
14947 pm_accepts_block_stack_pop(parser);
14948 }
14949
14950 // If we're at the end of the arguments, we can now check if there is a block
14951 // node that starts with a {. If there is, then we can parse it and add it to
14952 // the arguments.
14953 if (accepts_block) {
14954 pm_block_node_t *block = NULL;
14955
14956 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
14957 found |= true;
14958 block = parse_block(parser, (uint16_t) (depth + 1));
14959 pm_arguments_validate_block(parser, arguments, block);
14960 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
14961 found |= true;
14962 block = parse_block(parser, (uint16_t) (depth + 1));
14963 }
14964
14965 if (block != NULL) {
14966 if (arguments->block == NULL && !arguments->has_forwarding) {
14967 arguments->block = UP(block);
14968 } else {
14969 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_BLOCK_MULTI);
14970
14971 if (arguments->block != NULL) {
14972 if (arguments->arguments == NULL) {
14973 arguments->arguments = pm_arguments_node_create(parser);
14974 }
14975 pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
14976 }
14977 arguments->block = UP(block);
14978 }
14979 }
14980 }
14981
14982 return found;
14983}
14984
14989static void
14990parse_return(pm_parser_t *parser, pm_node_t *node) {
14991 bool in_sclass = false;
14992 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
14993 switch (context_node->context) {
14997 case PM_CONTEXT_BEGIN:
14998 case PM_CONTEXT_CASE_IN:
15001 case PM_CONTEXT_DEFINED:
15002 case PM_CONTEXT_ELSE:
15003 case PM_CONTEXT_ELSIF:
15004 case PM_CONTEXT_EMBEXPR:
15006 case PM_CONTEXT_FOR:
15007 case PM_CONTEXT_IF:
15009 case PM_CONTEXT_MAIN:
15011 case PM_CONTEXT_PARENS:
15012 case PM_CONTEXT_POSTEXE:
15014 case PM_CONTEXT_PREEXE:
15016 case PM_CONTEXT_TERNARY:
15017 case PM_CONTEXT_UNLESS:
15018 case PM_CONTEXT_UNTIL:
15019 case PM_CONTEXT_WHILE:
15020 // Keep iterating up the lists of contexts, because returns can
15021 // see through these.
15022 continue;
15026 case PM_CONTEXT_SCLASS:
15027 in_sclass = true;
15028 continue;
15032 case PM_CONTEXT_CLASS:
15036 case PM_CONTEXT_MODULE:
15037 // These contexts are invalid for a return.
15038 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15039 return;
15050 case PM_CONTEXT_DEF:
15056 // These contexts are valid for a return, and we should not
15057 // continue to loop.
15058 return;
15059 case PM_CONTEXT_NONE:
15060 // This case should never happen.
15061 assert(false && "unreachable");
15062 break;
15063 }
15064 }
15065 if (in_sclass && parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
15066 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15067 }
15068}
15069
15074static void
15075parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
15076 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15077 switch (context_node->context) {
15084 case PM_CONTEXT_DEFINED:
15085 case PM_CONTEXT_FOR:
15092 case PM_CONTEXT_POSTEXE:
15093 case PM_CONTEXT_UNTIL:
15094 case PM_CONTEXT_WHILE:
15095 // These are the good cases. We're allowed to have a block exit
15096 // in these contexts.
15097 return;
15098 case PM_CONTEXT_DEF:
15103 case PM_CONTEXT_MAIN:
15104 case PM_CONTEXT_PREEXE:
15105 case PM_CONTEXT_SCLASS:
15109 // These are the bad cases. We're not allowed to have a block
15110 // exit in these contexts.
15111 //
15112 // If we get here, then we're about to mark this block exit
15113 // as invalid. However, it could later _become_ valid if we
15114 // find a trailing while/until on the expression. In this
15115 // case instead of adding the error here, we'll add the
15116 // block exit to the list of exits for the expression, and
15117 // the node parsing will handle validating it instead.
15118 assert(parser->current_block_exits != NULL);
15119 pm_node_list_append(parser->current_block_exits, node);
15120 return;
15124 case PM_CONTEXT_BEGIN:
15125 case PM_CONTEXT_CASE_IN:
15130 case PM_CONTEXT_CLASS:
15132 case PM_CONTEXT_ELSE:
15133 case PM_CONTEXT_ELSIF:
15134 case PM_CONTEXT_EMBEXPR:
15136 case PM_CONTEXT_IF:
15140 case PM_CONTEXT_MODULE:
15142 case PM_CONTEXT_PARENS:
15145 case PM_CONTEXT_TERNARY:
15146 case PM_CONTEXT_UNLESS:
15147 // In these contexts we should continue walking up the list of
15148 // contexts.
15149 break;
15150 case PM_CONTEXT_NONE:
15151 // This case should never happen.
15152 assert(false && "unreachable");
15153 break;
15154 }
15155 }
15156}
15157
15162static pm_node_list_t *
15163push_block_exits(pm_parser_t *parser, pm_node_list_t *current_block_exits) {
15164 pm_node_list_t *previous_block_exits = parser->current_block_exits;
15165 parser->current_block_exits = current_block_exits;
15166 return previous_block_exits;
15167}
15168
15174static void
15175flush_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15176 pm_node_t *block_exit;
15177 PM_NODE_LIST_FOREACH(parser->current_block_exits, index, block_exit) {
15178 const char *type;
15179
15180 switch (PM_NODE_TYPE(block_exit)) {
15181 case PM_BREAK_NODE: type = "break"; break;
15182 case PM_NEXT_NODE: type = "next"; break;
15183 case PM_REDO_NODE: type = "redo"; break;
15184 default: assert(false && "unreachable"); type = ""; break;
15185 }
15186
15187 PM_PARSER_ERR_NODE_FORMAT(parser, block_exit, PM_ERR_INVALID_BLOCK_EXIT, type);
15188 }
15189
15190 parser->current_block_exits = previous_block_exits;
15191}
15192
15197static void
15198pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15199 if (match2(parser, PM_TOKEN_KEYWORD_WHILE_MODIFIER, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) {
15200 // If we matched a trailing while/until, then all of the block exits in
15201 // the contained list are valid. In this case we do not need to do
15202 // anything.
15203 parser->current_block_exits = previous_block_exits;
15204 } else if (previous_block_exits != NULL) {
15205 // If we did not matching a trailing while/until, then all of the block
15206 // exits contained in the list are invalid for this specific context.
15207 // However, they could still become valid in a higher level context if
15208 // there is another list above this one. In this case we'll push all of
15209 // the block exits up to the previous list.
15210 pm_node_list_concat(previous_block_exits, parser->current_block_exits);
15211 parser->current_block_exits = previous_block_exits;
15212 } else {
15213 // If we did not match a trailing while/until and this was the last
15214 // chance to do so, then all of the block exits in the list are invalid
15215 // and we need to add an error for each of them.
15216 flush_block_exits(parser, previous_block_exits);
15217 }
15218}
15219
15220static inline pm_node_t *
15221parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
15222 context_push(parser, PM_CONTEXT_PREDICATE);
15223 pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
15224 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, error_id, (uint16_t) (depth + 1));
15225
15226 // Predicates are closed by a term, a "then", or a term and then a "then".
15227 bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15228
15229 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15230 predicate_closed = true;
15231 *then_keyword = parser->previous;
15232 }
15233
15234 if (!predicate_closed) {
15235 pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
15236 }
15237
15238 context_pop(parser);
15239 return predicate;
15240}
15241
15242static inline pm_node_t *
15243parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
15244 pm_node_list_t current_block_exits = { 0 };
15245 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
15246
15247 pm_token_t keyword = parser->previous;
15248 pm_token_t then_keyword = { 0 };
15249
15250 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
15251 pm_statements_node_t *statements = NULL;
15252
15253 if (!match3(parser, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
15254 pm_accepts_block_stack_push(parser, true);
15255 statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15256 pm_accepts_block_stack_pop(parser);
15257 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15258 }
15259
15260 pm_node_t *parent = NULL;
15261
15262 switch (context) {
15263 case PM_CONTEXT_IF:
15264 parent = UP(pm_if_node_create(parser, &keyword, predicate, NTOK2PTR(then_keyword), statements, NULL, NULL));
15265 break;
15266 case PM_CONTEXT_UNLESS:
15267 parent = UP(pm_unless_node_create(parser, &keyword, predicate, NTOK2PTR(then_keyword), statements));
15268 break;
15269 default:
15270 assert(false && "unreachable");
15271 break;
15272 }
15273
15274 pm_node_t *current = parent;
15275
15276 // Parse any number of elsif clauses. This will form a linked list of if
15277 // nodes pointing to each other from the top.
15278 if (context == PM_CONTEXT_IF) {
15279 while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
15280 if (parser_end_of_line_p(parser)) {
15281 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_KEYWORD_EOL);
15282 }
15283
15284 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15285 pm_token_t elsif_keyword = parser->current;
15286 parser_lex(parser);
15287
15288 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
15289 pm_accepts_block_stack_push(parser, true);
15290
15291 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF, (uint16_t) (depth + 1));
15292 pm_accepts_block_stack_pop(parser);
15293 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15294
15295 pm_node_t *elsif = UP(pm_if_node_create(parser, &elsif_keyword, predicate, NTOK2PTR(then_keyword), statements, NULL, NULL));
15296 ((pm_if_node_t *) current)->subsequent = elsif;
15297 current = elsif;
15298 }
15299 }
15300
15301 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15302 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15303 opening_newline_index = token_newline_index(parser);
15304
15305 parser_lex(parser);
15306 pm_token_t else_keyword = parser->previous;
15307
15308 pm_accepts_block_stack_push(parser, true);
15309 pm_statements_node_t *else_statements = parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1));
15310 pm_accepts_block_stack_pop(parser);
15311
15312 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15313 parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
15314 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE, &keyword);
15315
15316 pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
15317
15318 switch (context) {
15319 case PM_CONTEXT_IF:
15320 ((pm_if_node_t *) current)->subsequent = UP(else_node);
15321 break;
15322 case PM_CONTEXT_UNLESS:
15323 ((pm_unless_node_t *) parent)->else_clause = else_node;
15324 break;
15325 default:
15326 assert(false && "unreachable");
15327 break;
15328 }
15329 } else {
15330 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
15331 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM, &keyword);
15332 }
15333
15334 // Set the appropriate end location for all of the nodes in the subtree.
15335 switch (context) {
15336 case PM_CONTEXT_IF: {
15337 pm_node_t *current = parent;
15338 bool recursing = true;
15339
15340 while (recursing) {
15341 switch (PM_NODE_TYPE(current)) {
15342 case PM_IF_NODE:
15343 pm_if_node_end_keyword_loc_set(parser, (pm_if_node_t *) current, &parser->previous);
15344 current = ((pm_if_node_t *) current)->subsequent;
15345 recursing = current != NULL;
15346 break;
15347 case PM_ELSE_NODE:
15348 pm_else_node_end_keyword_loc_set(parser, (pm_else_node_t *) current, &parser->previous);
15349 recursing = false;
15350 break;
15351 default: {
15352 recursing = false;
15353 break;
15354 }
15355 }
15356 }
15357 break;
15358 }
15359 case PM_CONTEXT_UNLESS:
15360 pm_unless_node_end_keyword_loc_set(parser, (pm_unless_node_t *) parent, &parser->previous);
15361 break;
15362 default:
15363 assert(false && "unreachable");
15364 break;
15365 }
15366
15367 pop_block_exits(parser, previous_block_exits);
15368 pm_node_list_free(&current_block_exits);
15369
15370 return parent;
15371}
15372
15377#define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15378 case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
15379 case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
15380 case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
15381 case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
15382 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
15383 case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
15384 case PM_TOKEN_KEYWORD_OR: case PM_TOKEN_KEYWORD_REDO: case PM_TOKEN_KEYWORD_RESCUE: case PM_TOKEN_KEYWORD_RETRY: \
15385 case PM_TOKEN_KEYWORD_RETURN: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_SUPER: case PM_TOKEN_KEYWORD_THEN: \
15386 case PM_TOKEN_KEYWORD_TRUE: case PM_TOKEN_KEYWORD_UNDEF: case PM_TOKEN_KEYWORD_UNLESS: case PM_TOKEN_KEYWORD_UNTIL: \
15387 case PM_TOKEN_KEYWORD_WHEN: case PM_TOKEN_KEYWORD_WHILE: case PM_TOKEN_KEYWORD_YIELD
15388
15393#define PM_CASE_OPERATOR PM_TOKEN_AMPERSAND: case PM_TOKEN_BACKTICK: case PM_TOKEN_BANG_EQUAL: \
15394 case PM_TOKEN_BANG_TILDE: case PM_TOKEN_BANG: case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \
15395 case PM_TOKEN_BRACKET_LEFT_RIGHT: case PM_TOKEN_CARET: case PM_TOKEN_EQUAL_EQUAL_EQUAL: case PM_TOKEN_EQUAL_EQUAL: \
15396 case PM_TOKEN_EQUAL_TILDE: case PM_TOKEN_GREATER_EQUAL: case PM_TOKEN_GREATER_GREATER: case PM_TOKEN_GREATER: \
15397 case PM_TOKEN_LESS_EQUAL_GREATER: case PM_TOKEN_LESS_EQUAL: case PM_TOKEN_LESS_LESS: case PM_TOKEN_LESS: \
15398 case PM_TOKEN_MINUS: case PM_TOKEN_PERCENT: case PM_TOKEN_PIPE: case PM_TOKEN_PLUS: case PM_TOKEN_SLASH: \
15399 case PM_TOKEN_STAR_STAR: case PM_TOKEN_STAR: case PM_TOKEN_TILDE: case PM_TOKEN_UAMPERSAND: case PM_TOKEN_UMINUS: \
15400 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_UPLUS: case PM_TOKEN_USTAR: case PM_TOKEN_USTAR_STAR
15401
15407#define PM_CASE_PRIMITIVE PM_TOKEN_INTEGER: case PM_TOKEN_INTEGER_IMAGINARY: case PM_TOKEN_INTEGER_RATIONAL: \
15408 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: case PM_TOKEN_FLOAT: case PM_TOKEN_FLOAT_IMAGINARY: \
15409 case PM_TOKEN_FLOAT_RATIONAL: case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: case PM_TOKEN_SYMBOL_BEGIN: \
15410 case PM_TOKEN_REGEXP_BEGIN: case PM_TOKEN_BACKTICK: case PM_TOKEN_PERCENT_LOWER_X: case PM_TOKEN_PERCENT_LOWER_I: \
15411 case PM_TOKEN_PERCENT_LOWER_W: case PM_TOKEN_PERCENT_UPPER_I: case PM_TOKEN_PERCENT_UPPER_W: \
15412 case PM_TOKEN_STRING_BEGIN: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_TRUE: \
15413 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15414 case PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_MINUS_GREATER: case PM_TOKEN_HEREDOC_START: \
15415 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_CHARACTER_LITERAL
15416
15421#define PM_CASE_PARAMETER PM_TOKEN_UAMPERSAND: case PM_TOKEN_AMPERSAND: case PM_TOKEN_UDOT_DOT_DOT: \
15422 case PM_TOKEN_IDENTIFIER: case PM_TOKEN_LABEL: case PM_TOKEN_USTAR: case PM_TOKEN_STAR: case PM_TOKEN_STAR_STAR: \
15423 case PM_TOKEN_USTAR_STAR: case PM_TOKEN_CONSTANT: case PM_TOKEN_INSTANCE_VARIABLE: case PM_TOKEN_GLOBAL_VARIABLE: \
15424 case PM_TOKEN_CLASS_VARIABLE
15425
15430#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
15431 case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
15432 case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
15433 case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
15434
15435// Assert here that the flags are the same so that we can safely switch the type
15436// of the node without having to move the flags.
15437PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING), "Expected the flags to match.");
15438
15443static inline pm_node_flags_t
15444parse_unescaped_encoding(const pm_parser_t *parser) {
15445 if (parser->explicit_encoding != NULL) {
15447 // If the there's an explicit encoding and it's using a UTF-8 escape
15448 // sequence, then mark the string as UTF-8.
15449 return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
15450 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
15451 // If there's a non-UTF-8 escape sequence being used, then the
15452 // string uses the source encoding, unless the source is marked as
15453 // US-ASCII. In that case the string is forced as ASCII-8BIT in
15454 // order to keep the string valid.
15455 return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
15456 }
15457 }
15458 return 0;
15459}
15460
15465static pm_node_t *
15466parse_string_part(pm_parser_t *parser, uint16_t depth) {
15467 switch (parser->current.type) {
15468 // Here the lexer has returned to us plain string content. In this case
15469 // we'll create a string node that has no opening or closing and return that
15470 // as the part. These kinds of parts look like:
15471 //
15472 // "aaa #{bbb} #@ccc ddd"
15473 // ^^^^ ^ ^^^^
15474 case PM_TOKEN_STRING_CONTENT: {
15475 pm_node_t *node = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
15476 pm_node_flag_set(node, parse_unescaped_encoding(parser));
15477
15478 parser_lex(parser);
15479 return node;
15480 }
15481 // Here the lexer has returned the beginning of an embedded expression. In
15482 // that case we'll parse the inner statements and return that as the part.
15483 // These kinds of parts look like:
15484 //
15485 // "aaa #{bbb} #@ccc ddd"
15486 // ^^^^^^
15487 case PM_TOKEN_EMBEXPR_BEGIN: {
15488 // Ruby disallows seeing encoding around interpolation in strings,
15489 // even though it is known at parse time.
15490 parser->explicit_encoding = NULL;
15491
15492 pm_lex_state_t state = parser->lex_state;
15493 int brace_nesting = parser->brace_nesting;
15494
15495 parser->brace_nesting = 0;
15496 lex_state_set(parser, PM_LEX_STATE_BEG);
15497 parser_lex(parser);
15498
15499 pm_token_t opening = parser->previous;
15500 pm_statements_node_t *statements = NULL;
15501
15502 if (!match3(parser, PM_TOKEN_EMBEXPR_END, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
15503 pm_accepts_block_stack_push(parser, true);
15504 statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
15505 pm_accepts_block_stack_pop(parser);
15506 }
15507
15508 parser->brace_nesting = brace_nesting;
15509 lex_state_set(parser, state);
15510 expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
15511
15512 // If this set of embedded statements only contains a single
15513 // statement, then Ruby does not consider it as a possible statement
15514 // that could emit a line event.
15515 if (statements != NULL && statements->body.size == 1) {
15516 pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
15517 }
15518
15519 return UP(pm_embedded_statements_node_create(parser, &opening, statements, &parser->previous));
15520 }
15521
15522 // Here the lexer has returned the beginning of an embedded variable.
15523 // In that case we'll parse the variable and create an appropriate node
15524 // for it and then return that node. These kinds of parts look like:
15525 //
15526 // "aaa #{bbb} #@ccc ddd"
15527 // ^^^^^
15528 case PM_TOKEN_EMBVAR: {
15529 // Ruby disallows seeing encoding around interpolation in strings,
15530 // even though it is known at parse time.
15531 parser->explicit_encoding = NULL;
15532
15533 lex_state_set(parser, PM_LEX_STATE_BEG);
15534 parser_lex(parser);
15535
15536 pm_token_t operator = parser->previous;
15537 pm_node_t *variable;
15538
15539 switch (parser->current.type) {
15540 // In this case a back reference is being interpolated. We'll
15541 // create a global variable read node.
15542 case PM_TOKEN_BACK_REFERENCE:
15543 parser_lex(parser);
15544 variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
15545 break;
15546 // In this case an nth reference is being interpolated. We'll
15547 // create a global variable read node.
15548 case PM_TOKEN_NUMBERED_REFERENCE:
15549 parser_lex(parser);
15550 variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
15551 break;
15552 // In this case a global variable is being interpolated. We'll
15553 // create a global variable read node.
15554 case PM_TOKEN_GLOBAL_VARIABLE:
15555 parser_lex(parser);
15556 variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
15557 break;
15558 // In this case an instance variable is being interpolated.
15559 // We'll create an instance variable read node.
15560 case PM_TOKEN_INSTANCE_VARIABLE:
15561 parser_lex(parser);
15562 variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
15563 break;
15564 // In this case a class variable is being interpolated. We'll
15565 // create a class variable read node.
15566 case PM_TOKEN_CLASS_VARIABLE:
15567 parser_lex(parser);
15568 variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
15569 break;
15570 // We can hit here if we got an invalid token. In that case
15571 // we'll not attempt to lex this token and instead just return a
15572 // missing node.
15573 default:
15574 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
15575 variable = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
15576 break;
15577 }
15578
15579 return UP(pm_embedded_variable_node_create(parser, &operator, variable));
15580 }
15581 default:
15582 parser_lex(parser);
15583 pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
15584 return NULL;
15585 }
15586}
15587
15593static const uint8_t *
15594parse_operator_symbol_name(const pm_token_t *name) {
15595 switch (name->type) {
15596 case PM_TOKEN_TILDE:
15597 case PM_TOKEN_BANG:
15598 if (name->end[-1] == '@') return name->end - 1;
15600 default:
15601 return name->end;
15602 }
15603}
15604
15605static pm_node_t *
15606parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
15607 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, NULL);
15608 const uint8_t *end = parse_operator_symbol_name(&parser->current);
15609
15610 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15611 parser_lex(parser);
15612
15613 pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
15614 pm_node_flag_set(UP(symbol), PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
15615
15616 return UP(symbol);
15617}
15618
15624static pm_node_t *
15625parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state, uint16_t depth) {
15626 const pm_token_t opening = parser->previous;
15627
15628 if (lex_mode->mode != PM_LEX_STRING) {
15629 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15630
15631 switch (parser->current.type) {
15632 case PM_CASE_OPERATOR:
15633 return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
15634 case PM_TOKEN_IDENTIFIER:
15635 case PM_TOKEN_CONSTANT:
15636 case PM_TOKEN_INSTANCE_VARIABLE:
15637 case PM_TOKEN_METHOD_NAME:
15638 case PM_TOKEN_CLASS_VARIABLE:
15639 case PM_TOKEN_GLOBAL_VARIABLE:
15640 case PM_TOKEN_NUMBERED_REFERENCE:
15641 case PM_TOKEN_BACK_REFERENCE:
15642 case PM_CASE_KEYWORD:
15643 parser_lex(parser);
15644 break;
15645 default:
15646 expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
15647 break;
15648 }
15649
15650 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, NULL);
15651 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15652 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15653
15654 return UP(symbol);
15655 }
15656
15657 if (lex_mode->as.string.interpolation) {
15658 // If we have the end of the symbol, then we can return an empty symbol.
15659 if (match1(parser, PM_TOKEN_STRING_END)) {
15660 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15661 parser_lex(parser);
15662 pm_token_t content = {
15663 .type = PM_TOKEN_STRING_CONTENT,
15664 .start = parser->previous.start,
15665 .end = parser->previous.start
15666 };
15667
15668 return UP(pm_symbol_node_create(parser, &opening, &content, &parser->previous));
15669 }
15670
15671 // Now we can parse the first part of the symbol.
15672 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
15673
15674 // If we got a string part, then it's possible that we could transform
15675 // what looks like an interpolated symbol into a regular symbol.
15676 if (part && PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15677 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15678 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15679
15680 return UP(pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous));
15681 }
15682
15683 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
15684 if (part) pm_interpolated_symbol_node_append(symbol, part);
15685
15686 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15687 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
15688 pm_interpolated_symbol_node_append(symbol, part);
15689 }
15690 }
15691
15692 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15693 if (match1(parser, PM_TOKEN_EOF)) {
15694 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15695 } else {
15696 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15697 }
15698
15699 pm_interpolated_symbol_node_closing_loc_set(parser, symbol, &parser->previous);
15700 return UP(symbol);
15701 }
15702
15703 pm_token_t content;
15704 pm_string_t unescaped;
15705
15706 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15707 content = parser->current;
15708 unescaped = parser->current_string;
15709 parser_lex(parser);
15710
15711 // If we have two string contents in a row, then the content of this
15712 // symbol is split because of heredoc contents. This looks like:
15713 //
15714 // <<A; :'a
15715 // A
15716 // b'
15717 //
15718 // In this case, the best way we have to represent this is as an
15719 // interpolated string node, so that's what we'll do here.
15720 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15721 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
15722 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &unescaped));
15723 pm_interpolated_symbol_node_append(symbol, part);
15724
15725 part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->current, NULL, &parser->current_string));
15726 pm_interpolated_symbol_node_append(symbol, part);
15727
15728 if (next_state != PM_LEX_STATE_NONE) {
15729 lex_state_set(parser, next_state);
15730 }
15731
15732 parser_lex(parser);
15733 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
15734
15735 pm_interpolated_symbol_node_closing_loc_set(parser, symbol, &parser->previous);
15736 return UP(symbol);
15737 }
15738 } else {
15739 content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
15740 pm_string_shared_init(&unescaped, content.start, content.end);
15741 }
15742
15743 if (next_state != PM_LEX_STATE_NONE) {
15744 lex_state_set(parser, next_state);
15745 }
15746
15747 if (match1(parser, PM_TOKEN_EOF)) {
15748 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
15749 } else {
15750 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
15751 }
15752
15753 return UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false)));
15754}
15755
15760static inline pm_node_t *
15761parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
15762 switch (parser->current.type) {
15763 case PM_CASE_OPERATOR:
15764 return parse_operator_symbol(parser, NULL, PM_LEX_STATE_NONE);
15765 case PM_CASE_KEYWORD:
15766 case PM_TOKEN_CONSTANT:
15767 case PM_TOKEN_IDENTIFIER:
15768 case PM_TOKEN_METHOD_NAME: {
15769 parser_lex(parser);
15770
15771 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, NULL, &parser->previous, NULL);
15772 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15773 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15774
15775 return UP(symbol);
15776 }
15777 case PM_TOKEN_SYMBOL_BEGIN: {
15778 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
15779 parser_lex(parser);
15780
15781 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
15782 }
15783 default:
15784 pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
15785 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
15786 }
15787}
15788
15795static inline pm_node_t *
15796parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
15797 switch (parser->current.type) {
15798 case PM_CASE_OPERATOR:
15799 return parse_operator_symbol(parser, NULL, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
15800 case PM_CASE_KEYWORD:
15801 case PM_TOKEN_CONSTANT:
15802 case PM_TOKEN_IDENTIFIER:
15803 case PM_TOKEN_METHOD_NAME: {
15804 if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
15805 parser_lex(parser);
15806
15807 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, NULL, &parser->previous, NULL);
15808 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15809 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15810
15811 return UP(symbol);
15812 }
15813 case PM_TOKEN_SYMBOL_BEGIN: {
15814 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
15815 parser_lex(parser);
15816
15817 return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
15818 }
15819 case PM_TOKEN_BACK_REFERENCE:
15820 parser_lex(parser);
15821 return UP(pm_back_reference_read_node_create(parser, &parser->previous));
15822 case PM_TOKEN_NUMBERED_REFERENCE:
15823 parser_lex(parser);
15824 return UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
15825 case PM_TOKEN_GLOBAL_VARIABLE:
15826 parser_lex(parser);
15827 return UP(pm_global_variable_read_node_create(parser, &parser->previous));
15828 default:
15829 pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
15830 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
15831 }
15832}
15833
15838static pm_node_t *
15839parse_variable(pm_parser_t *parser) {
15840 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
15841 int depth;
15842 bool is_numbered_param = pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous));
15843
15844 if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
15845 return UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false));
15846 }
15847
15848 pm_scope_t *current_scope = parser->current_scope;
15849 if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
15850 if (is_numbered_param) {
15851 // When you use a numbered parameter, it implies the existence of
15852 // all of the locals that exist before it. For example, referencing
15853 // _2 means that _1 must exist. Therefore here we loop through all
15854 // of the possibilities and add them into the constant pool.
15855 uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
15856 for (uint8_t number = 1; number <= maximum; number++) {
15857 pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
15858 }
15859
15860 if (!match1(parser, PM_TOKEN_EQUAL)) {
15861 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
15862 }
15863
15864 pm_node_t *node = UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false));
15865 pm_node_list_append(&current_scope->implicit_parameters, node);
15866
15867 return node;
15868 } else if ((parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
15869 pm_node_t *node = UP(pm_it_local_variable_read_node_create(parser, &parser->previous));
15870 pm_node_list_append(&current_scope->implicit_parameters, node);
15871
15872 return node;
15873 }
15874 }
15875
15876 return NULL;
15877}
15878
15882static pm_node_t *
15883parse_variable_call(pm_parser_t *parser) {
15884 pm_node_flags_t flags = 0;
15885
15886 if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
15887 pm_node_t *node = parse_variable(parser);
15888 if (node != NULL) return node;
15889 flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
15890 }
15891
15892 pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
15893 pm_node_flag_set(UP(node), flags);
15894
15895 return UP(node);
15896}
15897
15903static inline pm_token_t
15904parse_method_definition_name(pm_parser_t *parser) {
15905 switch (parser->current.type) {
15906 case PM_CASE_KEYWORD:
15907 case PM_TOKEN_CONSTANT:
15908 case PM_TOKEN_METHOD_NAME:
15909 parser_lex(parser);
15910 return parser->previous;
15911 case PM_TOKEN_IDENTIFIER:
15912 pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current));
15913 parser_lex(parser);
15914 return parser->previous;
15915 case PM_CASE_OPERATOR:
15916 lex_state_set(parser, PM_LEX_STATE_ENDFN);
15917 parser_lex(parser);
15918 return parser->previous;
15919 default:
15920 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type));
15921 return (pm_token_t) { .type = 0, .start = parser->current.start, .end = parser->current.end };
15922 }
15923}
15924
15925static void
15926parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
15927 // Get a reference to the string struct that is being held by the string
15928 // node. This is the value we're going to actually manipulate.
15929 pm_string_ensure_owned(string);
15930
15931 // Now get the bounds of the existing string. We'll use this as a
15932 // destination to move bytes into. We'll also use it for bounds checking
15933 // since we don't require that these strings be null terminated.
15934 size_t dest_length = pm_string_length(string);
15935 const uint8_t *source_cursor = (uint8_t *) string->source;
15936 const uint8_t *source_end = source_cursor + dest_length;
15937
15938 // We're going to move bytes backward in the string when we get leading
15939 // whitespace, so we'll maintain a pointer to the current position in the
15940 // string that we're writing to.
15941 size_t trimmed_whitespace = 0;
15942
15943 // While we haven't reached the amount of common whitespace that we need to
15944 // trim and we haven't reached the end of the string, we'll keep trimming
15945 // whitespace. Trimming in this context means skipping over these bytes such
15946 // that they aren't copied into the new string.
15947 while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
15948 if (*source_cursor == '\t') {
15949 trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
15950 if (trimmed_whitespace > common_whitespace) break;
15951 } else {
15952 trimmed_whitespace++;
15953 }
15954
15955 source_cursor++;
15956 dest_length--;
15957 }
15958
15959 memmove((uint8_t *) string->source, source_cursor, (size_t) (source_end - source_cursor));
15960 string->length = dest_length;
15961}
15962
15967static inline bool
15968heredoc_dedent_discard_string_node(pm_parser_t *parser, pm_string_node_t *string_node) {
15969 if (string_node->unescaped.length == 0) {
15970 const uint8_t *cursor = parser->start + PM_LOCATION_START(&string_node->content_loc);
15971 return pm_memchr(cursor, '\\', string_node->content_loc.length, parser->encoding_changed, parser->encoding) == NULL;
15972 }
15973 return false;
15974}
15975
15979static void
15980parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
15981 // The next node should be dedented if it's the first node in the list or if
15982 // it follows a string node.
15983 bool dedent_next = true;
15984
15985 // Iterate over all nodes, and trim whitespace accordingly. We're going to
15986 // keep around two indices: a read and a write.
15987 size_t write_index = 0;
15988
15989 pm_node_t *node;
15990 PM_NODE_LIST_FOREACH(nodes, read_index, node) {
15991 // We're not manipulating child nodes that aren't strings. In this case
15992 // we'll skip past it and indicate that the subsequent node should not
15993 // be dedented.
15994 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) {
15995 nodes->nodes[write_index++] = node;
15996 dedent_next = false;
15997 continue;
15998 }
15999
16000 pm_string_node_t *string_node = ((pm_string_node_t *) node);
16001 if (dedent_next) {
16002 parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace);
16003 }
16004
16005 if (heredoc_dedent_discard_string_node(parser, string_node)) {
16006 pm_node_destroy(parser, node);
16007 } else {
16008 nodes->nodes[write_index++] = node;
16009 }
16010
16011 // We always dedent the next node if it follows a string node.
16012 dedent_next = true;
16013 }
16014
16015 nodes->size = write_index;
16016}
16017
16021static pm_token_t
16022parse_strings_empty_content(const uint8_t *location) {
16023 return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
16024}
16025
16029static inline pm_node_t *
16030parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
16031 assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
16032 bool concating = false;
16033
16034 while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16035 pm_node_t *node = NULL;
16036
16037 // Here we have found a string literal. We'll parse it and add it to
16038 // the list of strings.
16039 const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
16040 assert(lex_mode->mode == PM_LEX_STRING);
16041 bool lex_interpolation = lex_mode->as.string.interpolation;
16042 bool label_allowed = lex_mode->as.string.label_allowed && accepts_label;
16043
16044 pm_token_t opening = parser->current;
16045 parser_lex(parser);
16046
16047 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16048 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16049 // If we get here, then we have an end immediately after a
16050 // start. In that case we'll create an empty content token and
16051 // return an uninterpolated string.
16052 pm_token_t content = parse_strings_empty_content(parser->previous.start);
16053 pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
16054
16055 pm_string_shared_init(&string->unescaped, content.start, content.end);
16056 node = UP(string);
16057 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16058 // If we get here, then we have an end of a label immediately
16059 // after a start. In that case we'll create an empty symbol
16060 // node.
16061 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, NULL, &parser->previous);
16062 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.start);
16063 node = UP(symbol);
16064
16065 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16066 } else if (!lex_interpolation) {
16067 // If we don't accept interpolation then we expect the string to
16068 // start with a single string content node.
16069 pm_string_t unescaped;
16070 pm_token_t content;
16071
16072 if (match1(parser, PM_TOKEN_EOF)) {
16073 unescaped = PM_STRING_EMPTY;
16074 content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->start, .end = parser->start };
16075 } else {
16076 unescaped = parser->current_string;
16077 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16078 content = parser->previous;
16079 }
16080
16081 // It is unfortunately possible to have multiple string content
16082 // nodes in a row in the case that there's heredoc content in
16083 // the middle of the string, like this cursed example:
16084 //
16085 // <<-END+'b
16086 // a
16087 // END
16088 // c'+'d'
16089 //
16090 // In that case we need to switch to an interpolated string to
16091 // be able to contain all of the parts.
16092 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16093 pm_node_list_t parts = { 0 };
16094 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &unescaped));
16095 pm_node_list_append(&parts, part);
16096
16097 do {
16098 part = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
16099 pm_node_list_append(&parts, part);
16100 parser_lex(parser);
16101 } while (match1(parser, PM_TOKEN_STRING_CONTENT));
16102
16103 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16104 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16105
16106 pm_node_list_free(&parts);
16107 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16108 node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
16109 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16110 } else if (match1(parser, PM_TOKEN_EOF)) {
16111 pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16112 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
16113 } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16114 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
16115 } else {
16116 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
16117 parser->previous.start = parser->previous.end;
16118 parser->previous.type = 0;
16119 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
16120 }
16121 } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16122 // In this case we've hit string content so we know the string
16123 // at least has something in it. We'll need to check if the
16124 // following token is the end (in which case we can return a
16125 // plain string) or if it's not then it has interpolation.
16126 pm_token_t content = parser->current;
16127 pm_string_t unescaped = parser->current_string;
16128 parser_lex(parser);
16129
16130 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16131 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
16132 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16133
16134 // Kind of odd behavior, but basically if we have an
16135 // unterminated string and it ends in a newline, we back up one
16136 // character so that the error message is on the last line of
16137 // content in the string.
16138 if (!accept1(parser, PM_TOKEN_STRING_END)) {
16139 const uint8_t *location = parser->previous.end;
16140 if (location > parser->start && location[-1] == '\n') location--;
16141 pm_parser_err(parser, U32(location - parser->start), 0, PM_ERR_STRING_LITERAL_EOF);
16142
16143 parser->previous.start = parser->previous.end;
16144 parser->previous.type = 0;
16145 }
16146 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16147 node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
16148 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16149 } else {
16150 // If we get here, then we have interpolation so we'll need
16151 // to create a string or symbol node with interpolation.
16152 pm_node_list_t parts = { 0 };
16153 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
16154 pm_node_flag_set(part, parse_unescaped_encoding(parser));
16155 pm_node_list_append(&parts, part);
16156
16157 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16158 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16159 pm_node_list_append(&parts, part);
16160 }
16161 }
16162
16163 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16164 node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
16165 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16166 } else if (match1(parser, PM_TOKEN_EOF)) {
16167 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16168 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
16169 } else {
16170 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16171 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16172 }
16173
16174 pm_node_list_free(&parts);
16175 }
16176 } else {
16177 // If we get here, then the first part of the string is not plain
16178 // string content, in which case we need to parse the string as an
16179 // interpolated string.
16180 pm_node_list_t parts = { 0 };
16181 pm_node_t *part;
16182
16183 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16184 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16185 pm_node_list_append(&parts, part);
16186 }
16187 }
16188
16189 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16190 node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
16191 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16192 } else if (match1(parser, PM_TOKEN_EOF)) {
16193 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16194 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
16195 } else {
16196 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16197 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16198 }
16199
16200 pm_node_list_free(&parts);
16201 }
16202
16203 if (current == NULL) {
16204 // If the node we just parsed is a symbol node, then we can't
16205 // concatenate it with anything else, so we can now return that
16206 // node.
16207 if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
16208 return node;
16209 }
16210
16211 // If we don't already have a node, then it's fine and we can just
16212 // set the result to be the node we just parsed.
16213 current = node;
16214 } else {
16215 // Otherwise we need to check the type of the node we just parsed.
16216 // If it cannot be concatenated with the previous node, then we'll
16217 // need to add a syntax error.
16218 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
16219 pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16220 }
16221
16222 // If we haven't already created our container for concatenation,
16223 // we'll do that now.
16224 if (!concating) {
16225 if (!PM_NODE_TYPE_P(current, PM_STRING_NODE) && !PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
16226 pm_parser_err_node(parser, current, PM_ERR_STRING_CONCATENATION);
16227 }
16228
16229 concating = true;
16230 pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
16231 pm_interpolated_string_node_append(container, current);
16232 current = UP(container);
16233 }
16234
16235 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
16236 }
16237 }
16238
16239 return current;
16240}
16241
16242#define PM_PARSE_PATTERN_SINGLE 0
16243#define PM_PARSE_PATTERN_TOP 1
16244#define PM_PARSE_PATTERN_MULTI 2
16245
16246static pm_node_t *
16247parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
16248
16254static void
16255parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
16256 // Skip this capture if it starts with an underscore.
16257 if (peek_at(parser, parser->start + location->start) == '_') return;
16258
16259 if (pm_constant_id_list_includes(captures, capture)) {
16260 pm_parser_err(parser, location->start, location->length, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
16261 } else {
16262 pm_constant_id_list_append(captures, capture);
16263 }
16264}
16265
16269static pm_node_t *
16270parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node, uint16_t depth) {
16271 // Now, if there are any :: operators that follow, parse them as constant
16272 // path nodes.
16273 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
16274 pm_token_t delimiter = parser->previous;
16275 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16276 node = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
16277 }
16278
16279 // If there is a [ or ( that follows, then this is part of a larger pattern
16280 // expression. We'll parse the inner pattern here, then modify the returned
16281 // inner pattern with our constant path attached.
16282 if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
16283 return node;
16284 }
16285
16286 pm_token_t opening;
16287 pm_token_t closing;
16288 pm_node_t *inner = NULL;
16289
16290 if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
16291 opening = parser->previous;
16292 accept1(parser, PM_TOKEN_NEWLINE);
16293
16294 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16295 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16296 accept1(parser, PM_TOKEN_NEWLINE);
16297 expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
16298 }
16299
16300 closing = parser->previous;
16301 } else {
16302 parser_lex(parser);
16303 opening = parser->previous;
16304 accept1(parser, PM_TOKEN_NEWLINE);
16305
16306 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
16307 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16308 accept1(parser, PM_TOKEN_NEWLINE);
16309 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
16310 }
16311
16312 closing = parser->previous;
16313 }
16314
16315 if (!inner) {
16316 // If there was no inner pattern, then we have something like Foo() or
16317 // Foo[]. In that case we'll create an array pattern with no requireds.
16318 return UP(pm_array_pattern_node_constant_create(parser, node, &opening, &closing));
16319 }
16320
16321 // Now that we have the inner pattern, check to see if it's an array, find,
16322 // or hash pattern. If it is, then we'll attach our constant path to it if
16323 // it doesn't already have a constant. If it's not one of those node types
16324 // or it does have a constant, then we'll create an array pattern.
16325 switch (PM_NODE_TYPE(inner)) {
16326 case PM_ARRAY_PATTERN_NODE: {
16327 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16328
16329 if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
16330 PM_NODE_START_SET_NODE(pattern_node, node);
16331 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16332
16333 pattern_node->constant = node;
16334 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16335 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16336
16337 return UP(pattern_node);
16338 }
16339
16340 break;
16341 }
16342 case PM_FIND_PATTERN_NODE: {
16343 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16344
16345 if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
16346 PM_NODE_START_SET_NODE(pattern_node, node);
16347 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16348
16349 pattern_node->constant = node;
16350 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16351 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16352
16353 return UP(pattern_node);
16354 }
16355
16356 break;
16357 }
16358 case PM_HASH_PATTERN_NODE: {
16359 pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
16360
16361 if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
16362 PM_NODE_START_SET_NODE(pattern_node, node);
16363 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16364
16365 pattern_node->constant = node;
16366 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16367 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16368
16369 return UP(pattern_node);
16370 }
16371
16372 break;
16373 }
16374 default:
16375 break;
16376 }
16377
16378 // If we got here, then we didn't return one of the inner patterns by
16379 // attaching its constant. In this case we'll create an array pattern and
16380 // attach our constant to it.
16381 pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16382 pm_array_pattern_node_requireds_append(pattern_node, inner);
16383 return UP(pattern_node);
16384}
16385
16389static pm_splat_node_t *
16390parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16391 assert(parser->previous.type == PM_TOKEN_USTAR);
16392 pm_token_t operator = parser->previous;
16393 pm_node_t *name = NULL;
16394
16395 // Rest patterns don't necessarily have a name associated with them. So we
16396 // will check for that here. If they do, then we'll add it to the local
16397 // table since this pattern will cause it to become a local variable.
16398 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16399 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16400
16401 int depth;
16402 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16403 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16404 }
16405
16406 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
16407 name = UP(pm_local_variable_target_node_create(
16408 parser,
16409 &TOK2LOC(parser, &parser->previous),
16410 constant_id,
16411 (uint32_t) (depth == -1 ? 0 : depth)
16412 ));
16413 }
16414
16415 // Finally we can return the created node.
16416 return pm_splat_node_create(parser, &operator, name);
16417}
16418
16422static pm_node_t *
16423parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16424 assert(parser->current.type == PM_TOKEN_USTAR_STAR);
16425 parser_lex(parser);
16426
16427 pm_token_t operator = parser->previous;
16428 pm_node_t *value = NULL;
16429
16430 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
16431 return UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
16432 }
16433
16434 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16435 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16436
16437 int depth;
16438 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16439 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16440 }
16441
16442 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
16443 value = UP(pm_local_variable_target_node_create(
16444 parser,
16445 &TOK2LOC(parser, &parser->previous),
16446 constant_id,
16447 (uint32_t) (depth == -1 ? 0 : depth)
16448 ));
16449 }
16450
16451 return UP(pm_assoc_splat_node_create(parser, value, &operator));
16452}
16453
16458static bool
16459pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
16460 ptrdiff_t length = end - start;
16461 if (length == 0) return false;
16462
16463 // First ensure that it starts with a valid identifier starting character.
16464 size_t width = char_is_identifier_start(parser, start, end - start);
16465 if (width == 0) return false;
16466
16467 // Next, ensure that it's not an uppercase character.
16468 if (parser->encoding_changed) {
16469 if (parser->encoding->isupper_char(start, length)) return false;
16470 } else {
16471 if (pm_encoding_utf_8_isupper_char(start, length)) return false;
16472 }
16473
16474 // Next, iterate through all of the bytes of the string to ensure that they
16475 // are all valid identifier characters.
16476 const uint8_t *cursor = start + width;
16477 while ((width = char_is_identifier(parser, cursor, end - cursor))) cursor += width;
16478 return cursor == end;
16479}
16480
16485static pm_node_t *
16486parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
16487 const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
16488 const uint8_t *start = parser->start + PM_LOCATION_START(value_loc);
16489 const uint8_t *end = parser->start + PM_LOCATION_END(value_loc);
16490
16491 pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, start, end);
16492 int depth = -1;
16493
16494 if (pm_slice_is_valid_local(parser, start, end)) {
16495 depth = pm_parser_local_depth_constant_id(parser, constant_id);
16496 } else {
16497 pm_parser_err(parser, PM_NODE_START(key), PM_NODE_LENGTH(key), PM_ERR_PATTERN_HASH_KEY_LOCALS);
16498
16499 if ((end > start) && ((end[-1] == '!') || (end[-1] == '?'))) {
16500 PM_PARSER_ERR_FORMAT(parser, value_loc->start, value_loc->length, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (end - start), (const char *) start);
16501 }
16502 }
16503
16504 if (depth == -1) {
16505 pm_parser_local_add(parser, constant_id, start, end, 0);
16506 }
16507
16508 parse_pattern_capture(parser, captures, constant_id, value_loc);
16509 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
16510 parser,
16511 value_loc,
16512 constant_id,
16513 (uint32_t) (depth == -1 ? 0 : depth)
16514 );
16515
16516 return UP(pm_implicit_node_create(parser, UP(target)));
16517}
16518
16523static void
16524parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
16525 if (pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, keys, node, true) != NULL) {
16526 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
16527 }
16528}
16529
16534parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, uint16_t depth) {
16535 pm_node_list_t assocs = { 0 };
16536 pm_static_literals_t keys = { 0 };
16537 pm_node_t *rest = NULL;
16538
16539 switch (PM_NODE_TYPE(first_node)) {
16540 case PM_ASSOC_SPLAT_NODE:
16541 case PM_NO_KEYWORDS_PARAMETER_NODE:
16542 rest = first_node;
16543 break;
16544 case PM_SYMBOL_NODE: {
16545 if (pm_symbol_node_label_p(parser, first_node)) {
16546 parse_pattern_hash_key(parser, &keys, first_node);
16547 pm_node_t *value;
16548
16549 if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
16550 // Otherwise, we will create an implicit local variable
16551 // target for the value.
16552 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
16553 } else {
16554 // Here we have a value for the first assoc in the list, so
16555 // we will parse it now.
16556 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
16557 }
16558
16559 pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, NULL, value));
16560 pm_node_list_append(&assocs, assoc);
16561 break;
16562 }
16563 }
16565 default: {
16566 // If we get anything else, then this is an error. For this we'll
16567 // create a missing node for the value and create an assoc node for
16568 // the first node in the list.
16569 pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
16570 pm_parser_err_node(parser, first_node, diag_id);
16571
16572 pm_node_t *value = UP(pm_missing_node_create(parser, PM_NODE_START(first_node), PM_NODE_LENGTH(first_node)));
16573 pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, NULL, value));
16574
16575 pm_node_list_append(&assocs, assoc);
16576 break;
16577 }
16578 }
16579
16580 // If there are any other assocs, then we'll parse them now.
16581 while (accept1(parser, PM_TOKEN_COMMA)) {
16582 // Here we need to break to support trailing commas.
16583 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
16584 // Trailing commas are not allowed to follow a rest pattern.
16585 if (rest != NULL) {
16586 pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16587 }
16588
16589 break;
16590 }
16591
16592 if (match1(parser, PM_TOKEN_USTAR_STAR)) {
16593 pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures);
16594
16595 if (rest == NULL) {
16596 rest = assoc;
16597 } else {
16598 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16599 pm_node_list_append(&assocs, assoc);
16600 }
16601 } else {
16602 pm_node_t *key;
16603
16604 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16605 key = parse_strings(parser, NULL, true, (uint16_t) (depth + 1));
16606
16607 if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
16608 pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
16609 } else if (!pm_symbol_node_label_p(parser, key)) {
16610 pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16611 }
16612 } else if (accept1(parser, PM_TOKEN_LABEL)) {
16613 key = UP(pm_symbol_node_label_create(parser, &parser->previous));
16614 } else {
16615 expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16616
16617 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = parser->previous.end, .end = parser->previous.end };
16618 key = UP(pm_symbol_node_create(parser, NULL, &label, NULL));
16619 }
16620
16621 parse_pattern_hash_key(parser, &keys, key);
16622 pm_node_t *value = NULL;
16623
16624 if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
16625 if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) {
16626 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
16627 } else {
16628 value = UP(pm_missing_node_create(parser, PM_NODE_END(key), 0));
16629 }
16630 } else {
16631 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
16632 }
16633
16634 pm_node_t *assoc = UP(pm_assoc_node_create(parser, key, NULL, value));
16635
16636 if (rest != NULL) {
16637 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16638 }
16639
16640 pm_node_list_append(&assocs, assoc);
16641 }
16642 }
16643
16644 pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
16645 xfree_sized(assocs.nodes, assocs.capacity * sizeof(pm_node_t *));
16646
16647 pm_static_literals_free(&keys);
16648 return node;
16649}
16650
16654static pm_node_t *
16655parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id, uint16_t depth) {
16656 switch (parser->current.type) {
16657 case PM_TOKEN_IDENTIFIER:
16658 case PM_TOKEN_METHOD_NAME: {
16659 parser_lex(parser);
16660 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16661
16662 int depth;
16663 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16664 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16665 }
16666
16667 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
16668 return UP(pm_local_variable_target_node_create(
16669 parser,
16670 &TOK2LOC(parser, &parser->previous),
16671 constant_id,
16672 (uint32_t) (depth == -1 ? 0 : depth)
16673 ));
16674 }
16675 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
16676 pm_token_t opening = parser->current;
16677 parser_lex(parser);
16678
16679 if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16680 // If we have an empty array pattern, then we'll just return a new
16681 // array pattern node.
16682 return UP(pm_array_pattern_node_empty_create(parser, &opening, &parser->previous));
16683 }
16684
16685 // Otherwise, we'll parse the inner pattern, then deal with it depending
16686 // on the type it returns.
16687 pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16688
16689 accept1(parser, PM_TOKEN_NEWLINE);
16690 expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
16691 pm_token_t closing = parser->previous;
16692
16693 switch (PM_NODE_TYPE(inner)) {
16694 case PM_ARRAY_PATTERN_NODE: {
16695 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16696 if (pattern_node->opening_loc.length == 0) {
16697 PM_NODE_START_SET_TOKEN(parser, pattern_node, &opening);
16698 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16699
16700 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16701 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16702
16703 return UP(pattern_node);
16704 }
16705
16706 break;
16707 }
16708 case PM_FIND_PATTERN_NODE: {
16709 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16710 if (pattern_node->opening_loc.length == 0) {
16711 PM_NODE_START_SET_TOKEN(parser, pattern_node, &opening);
16712 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16713
16714 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16715 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16716
16717 return UP(pattern_node);
16718 }
16719
16720 break;
16721 }
16722 default:
16723 break;
16724 }
16725
16726 pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
16727 pm_array_pattern_node_requireds_append(node, inner);
16728 return UP(node);
16729 }
16730 case PM_TOKEN_BRACE_LEFT: {
16731 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
16732 parser->pattern_matching_newlines = false;
16733
16735 pm_token_t opening = parser->current;
16736 parser_lex(parser);
16737
16738 if (accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
16739 // If we have an empty hash pattern, then we'll just return a new hash
16740 // pattern node.
16741 node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
16742 } else {
16743 pm_node_t *first_node;
16744
16745 switch (parser->current.type) {
16746 case PM_TOKEN_LABEL:
16747 parser_lex(parser);
16748 first_node = UP(pm_symbol_node_label_create(parser, &parser->previous));
16749 break;
16750 case PM_TOKEN_USTAR_STAR:
16751 first_node = parse_pattern_keyword_rest(parser, captures);
16752 break;
16753 case PM_TOKEN_STRING_BEGIN:
16754 first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
16755 break;
16756 default: {
16757 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
16758 parser_lex(parser);
16759
16760 first_node = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
16761 break;
16762 }
16763 }
16764
16765 node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
16766
16767 accept1(parser, PM_TOKEN_NEWLINE);
16768 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE, &opening);
16769 pm_token_t closing = parser->previous;
16770
16771 PM_NODE_START_SET_TOKEN(parser, node, &opening);
16772 PM_NODE_LENGTH_SET_TOKEN(parser, node, &closing);
16773
16774 node->opening_loc = TOK2LOC(parser, &opening);
16775 node->closing_loc = TOK2LOC(parser, &closing);
16776 }
16777
16778 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
16779 return UP(node);
16780 }
16781 case PM_TOKEN_UDOT_DOT:
16782 case PM_TOKEN_UDOT_DOT_DOT: {
16783 pm_token_t operator = parser->current;
16784 parser_lex(parser);
16785
16786 // Since we have a unary range operator, we need to parse the subsequent
16787 // expression as the right side of the range.
16788 switch (parser->current.type) {
16789 case PM_CASE_PRIMITIVE: {
16790 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
16791 return UP(pm_range_node_create(parser, NULL, &operator, right));
16792 }
16793 default: {
16794 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
16795 pm_node_t *right = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &operator), PM_TOKEN_LENGTH(&operator)));
16796 return UP(pm_range_node_create(parser, NULL, &operator, right));
16797 }
16798 }
16799 }
16800 case PM_CASE_PRIMITIVE: {
16801 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, diag_id, (uint16_t) (depth + 1));
16802
16803 // If we found a label, we need to immediately return to the caller.
16804 if (pm_symbol_node_label_p(parser, node)) return node;
16805
16806 // Call nodes (arithmetic operations) are not allowed in patterns
16807 if (PM_NODE_TYPE(node) == PM_CALL_NODE) {
16808 pm_parser_err_node(parser, node, diag_id);
16809 pm_missing_node_t *missing_node = pm_missing_node_create(parser, PM_NODE_START(node), PM_NODE_LENGTH(node));
16810
16811 pm_node_unreference(parser, node);
16812 pm_node_destroy(parser, node);
16813 return UP(missing_node);
16814 }
16815
16816 // Now that we have a primitive, we need to check if it's part of a range.
16817 if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
16818 pm_token_t operator = parser->previous;
16819
16820 // Now that we have the operator, we need to check if this is followed
16821 // by another expression. If it is, then we will create a full range
16822 // node. Otherwise, we'll create an endless range.
16823 switch (parser->current.type) {
16824 case PM_CASE_PRIMITIVE: {
16825 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
16826 return UP(pm_range_node_create(parser, node, &operator, right));
16827 }
16828 default:
16829 return UP(pm_range_node_create(parser, node, &operator, NULL));
16830 }
16831 }
16832
16833 return node;
16834 }
16835 case PM_TOKEN_CARET: {
16836 parser_lex(parser);
16837 pm_token_t operator = parser->previous;
16838
16839 // At this point we have a pin operator. We need to check the subsequent
16840 // expression to determine if it's a variable or an expression.
16841 switch (parser->current.type) {
16842 case PM_TOKEN_IDENTIFIER: {
16843 parser_lex(parser);
16844 pm_node_t *variable = UP(parse_variable(parser));
16845
16846 if (variable == NULL) {
16847 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
16848 variable = UP(pm_local_variable_read_node_missing_create(parser, &parser->previous, 0));
16849 }
16850
16851 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16852 }
16853 case PM_TOKEN_INSTANCE_VARIABLE: {
16854 parser_lex(parser);
16855 pm_node_t *variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
16856
16857 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16858 }
16859 case PM_TOKEN_CLASS_VARIABLE: {
16860 parser_lex(parser);
16861 pm_node_t *variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
16862
16863 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16864 }
16865 case PM_TOKEN_GLOBAL_VARIABLE: {
16866 parser_lex(parser);
16867 pm_node_t *variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
16868
16869 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16870 }
16871 case PM_TOKEN_NUMBERED_REFERENCE: {
16872 parser_lex(parser);
16873 pm_node_t *variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
16874
16875 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16876 }
16877 case PM_TOKEN_BACK_REFERENCE: {
16878 parser_lex(parser);
16879 pm_node_t *variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
16880
16881 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16882 }
16883 case PM_TOKEN_PARENTHESIS_LEFT: {
16884 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
16885 parser->pattern_matching_newlines = false;
16886
16887 pm_token_t lparen = parser->current;
16888 parser_lex(parser);
16889
16890 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
16891 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
16892
16893 accept1(parser, PM_TOKEN_NEWLINE);
16894 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &lparen);
16895 return UP(pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous));
16896 }
16897 default: {
16898 // If we get here, then we have a pin operator followed by something
16899 // not understood. We'll create a missing node and return that.
16900 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
16901 pm_node_t *variable = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &operator), PM_TOKEN_LENGTH(&operator)));
16902 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
16903 }
16904 }
16905 }
16906 case PM_TOKEN_UCOLON_COLON: {
16907 pm_token_t delimiter = parser->current;
16908 parser_lex(parser);
16909
16910 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16911 pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
16912
16913 return parse_pattern_constant_path(parser, captures, UP(node), (uint16_t) (depth + 1));
16914 }
16915 case PM_TOKEN_CONSTANT: {
16916 pm_token_t constant = parser->current;
16917 parser_lex(parser);
16918
16919 pm_node_t *node = UP(pm_constant_read_node_create(parser, &constant));
16920 return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
16921 }
16922 default:
16923 pm_parser_err_current(parser, diag_id);
16924 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
16925 }
16926}
16927
16928static bool
16929parse_pattern_alternation_error_each(const pm_node_t *node, void *data) {
16930 switch (PM_NODE_TYPE(node)) {
16931 case PM_LOCAL_VARIABLE_TARGET_NODE: {
16932 pm_parser_t *parser = (pm_parser_t *) data;
16933 pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE);
16934 return false;
16935 }
16936 default:
16937 return true;
16938 }
16939}
16940
16945static void
16946parse_pattern_alternation_error(pm_parser_t *parser, const pm_node_t *node) {
16947 pm_visit_node(node, parse_pattern_alternation_error_each, parser);
16948}
16949
16954static pm_node_t *
16955parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
16956 pm_node_t *node = first_node;
16957 bool alternation = false;
16958
16959 while ((node == NULL) || (alternation = accept1(parser, PM_TOKEN_PIPE))) {
16960 if (alternation && !PM_NODE_TYPE_P(node, PM_ALTERNATION_PATTERN_NODE) && captures->size) {
16961 parse_pattern_alternation_error(parser, node);
16962 }
16963
16964 switch (parser->current.type) {
16965 case PM_TOKEN_IDENTIFIER:
16966 case PM_TOKEN_BRACKET_LEFT_ARRAY:
16967 case PM_TOKEN_BRACE_LEFT:
16968 case PM_TOKEN_CARET:
16969 case PM_TOKEN_CONSTANT:
16970 case PM_TOKEN_UCOLON_COLON:
16971 case PM_TOKEN_UDOT_DOT:
16972 case PM_TOKEN_UDOT_DOT_DOT:
16973 case PM_CASE_PRIMITIVE: {
16974 if (!alternation) {
16975 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
16976 } else {
16977 pm_token_t operator = parser->previous;
16978 pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
16979
16980 if (captures->size) parse_pattern_alternation_error(parser, right);
16981 node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
16982 }
16983
16984 break;
16985 }
16986 case PM_TOKEN_PARENTHESIS_LEFT:
16987 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
16988 pm_token_t operator = parser->previous;
16989 pm_token_t opening = parser->current;
16990 parser_lex(parser);
16991
16992 pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16993 accept1(parser, PM_TOKEN_NEWLINE);
16994 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
16995 pm_node_t *right = UP(pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0));
16996
16997 if (!alternation) {
16998 node = right;
16999 } else {
17000 if (captures->size) parse_pattern_alternation_error(parser, right);
17001 node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
17002 }
17003
17004 break;
17005 }
17006 default: {
17007 pm_parser_err_current(parser, diag_id);
17008 pm_node_t *right = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
17009
17010 if (!alternation) {
17011 node = right;
17012 } else {
17013 if (captures->size) parse_pattern_alternation_error(parser, right);
17014 node = UP(pm_alternation_pattern_node_create(parser, node, right, &parser->previous));
17015 }
17016
17017 break;
17018 }
17019 }
17020 }
17021
17022 // If we have an =>, then we are assigning this pattern to a variable.
17023 // In this case we should create an assignment node.
17024 while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
17025 pm_token_t operator = parser->previous;
17026 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
17027
17028 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17029 int depth;
17030
17031 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17032 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17033 }
17034
17035 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
17036 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17037 parser,
17038 &TOK2LOC(parser, &parser->previous),
17039 constant_id,
17040 (uint32_t) (depth == -1 ? 0 : depth)
17041 );
17042
17043 node = UP(pm_capture_pattern_node_create(parser, node, target, &operator));
17044 }
17045
17046 return node;
17047}
17048
17052static pm_node_t *
17053parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
17054 pm_node_t *node = NULL;
17055
17056 bool leading_rest = false;
17057 bool trailing_rest = false;
17058
17059 switch (parser->current.type) {
17060 case PM_TOKEN_LABEL: {
17061 parser_lex(parser);
17062 pm_node_t *key = UP(pm_symbol_node_label_create(parser, &parser->previous));
17063 node = UP(parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1)));
17064
17065 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17066 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17067 }
17068
17069 return node;
17070 }
17071 case PM_TOKEN_USTAR_STAR: {
17072 node = parse_pattern_keyword_rest(parser, captures);
17073 node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
17074
17075 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17076 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17077 }
17078
17079 return node;
17080 }
17081 case PM_TOKEN_STRING_BEGIN: {
17082 // We need special handling for string beginnings because they could
17083 // be dynamic symbols leading to hash patterns.
17084 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17085
17086 if (pm_symbol_node_label_p(parser, node)) {
17087 node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
17088
17089 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17090 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17091 }
17092
17093 return node;
17094 }
17095
17096 node = parse_pattern_primitives(parser, captures, node, diag_id, (uint16_t) (depth + 1));
17097 break;
17098 }
17099 case PM_TOKEN_USTAR: {
17100 if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
17101 parser_lex(parser);
17102 node = UP(parse_pattern_rest(parser, captures));
17103 leading_rest = true;
17104 break;
17105 }
17106 }
17108 default:
17109 node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
17110 break;
17111 }
17112
17113 // If we got a dynamic label symbol, then we need to treat it like the
17114 // beginning of a hash pattern.
17115 if (pm_symbol_node_label_p(parser, node)) {
17116 return UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
17117 }
17118
17119 if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
17120 // If we have a comma, then we are now parsing either an array pattern
17121 // or a find pattern. We need to parse all of the patterns, put them
17122 // into a big list, and then determine which type of node we have.
17123 pm_node_list_t nodes = { 0 };
17124 pm_node_list_append(&nodes, node);
17125
17126 // Gather up all of the patterns into the list.
17127 while (accept1(parser, PM_TOKEN_COMMA)) {
17128 // Break early here in case we have a trailing comma.
17129 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
17130 node = UP(pm_implicit_rest_node_create(parser, &parser->previous));
17131 pm_node_list_append(&nodes, node);
17132 trailing_rest = true;
17133 break;
17134 }
17135
17136 if (accept1(parser, PM_TOKEN_USTAR)) {
17137 node = UP(parse_pattern_rest(parser, captures));
17138
17139 // If we have already parsed a splat pattern, then this is an
17140 // error. We will continue to parse the rest of the patterns,
17141 // but we will indicate it as an error.
17142 if (trailing_rest) {
17143 pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
17144 }
17145
17146 trailing_rest = true;
17147 } else {
17148 node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
17149 }
17150
17151 pm_node_list_append(&nodes, node);
17152 }
17153
17154 // If the first pattern and the last pattern are rest patterns, then we
17155 // will call this a find pattern, regardless of how many rest patterns
17156 // are in between because we know we already added the appropriate
17157 // errors. Otherwise we will create an array pattern.
17158 if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
17159 node = UP(pm_find_pattern_node_create(parser, &nodes));
17160
17161 if (nodes.size == 2) {
17162 pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
17163 }
17164 } else {
17165 node = UP(pm_array_pattern_node_node_list_create(parser, &nodes));
17166
17167 if (leading_rest && trailing_rest) {
17168 pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
17169 }
17170 }
17171
17172 xfree_sized(nodes.nodes, nodes.capacity * sizeof(pm_node_t *));
17173 } else if (leading_rest) {
17174 // Otherwise, if we parsed a single splat pattern, then we know we have
17175 // an array pattern, so we can go ahead and create that node.
17176 node = UP(pm_array_pattern_node_rest_create(parser, node));
17177 }
17178
17179 return node;
17180}
17181
17187static inline void
17188parse_negative_numeric(pm_node_t *node) {
17189 switch (PM_NODE_TYPE(node)) {
17190 case PM_INTEGER_NODE: {
17191 pm_integer_node_t *cast = (pm_integer_node_t *) node;
17192 cast->base.location.start--;
17193 cast->base.location.length++;
17194 cast->value.negative = true;
17195 break;
17196 }
17197 case PM_FLOAT_NODE: {
17198 pm_float_node_t *cast = (pm_float_node_t *) node;
17199 cast->base.location.start--;
17200 cast->base.location.length++;
17201 cast->value = -cast->value;
17202 break;
17203 }
17204 case PM_RATIONAL_NODE: {
17205 pm_rational_node_t *cast = (pm_rational_node_t *) node;
17206 cast->base.location.start--;
17207 cast->base.location.length++;
17208 cast->numerator.negative = true;
17209 break;
17210 }
17211 case PM_IMAGINARY_NODE:
17212 node->location.start--;
17213 node->location.length++;
17214 parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
17215 break;
17216 default:
17217 assert(false && "unreachable");
17218 break;
17219 }
17220}
17221
17227static void
17228pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
17229 switch (diag_id) {
17230 case PM_ERR_HASH_KEY: {
17231 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, pm_token_type_human(parser->previous.type));
17232 break;
17233 }
17234 case PM_ERR_HASH_VALUE:
17235 case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17236 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_type_human(parser->current.type));
17237 break;
17238 }
17239 case PM_ERR_UNARY_RECEIVER: {
17240 const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
17241 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, human, parser->previous.start[0]);
17242 break;
17243 }
17244 case PM_ERR_UNARY_DISALLOWED:
17245 case PM_ERR_EXPECT_ARGUMENT: {
17246 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_type_human(parser->current.type));
17247 break;
17248 }
17249 default:
17250 pm_parser_err_previous(parser, diag_id);
17251 break;
17252 }
17253}
17254
17258static void
17259parse_retry(pm_parser_t *parser, const pm_node_t *node) {
17260#define CONTEXT_NONE 0
17261#define CONTEXT_THROUGH_ENSURE 1
17262#define CONTEXT_THROUGH_ELSE 2
17263
17264 pm_context_node_t *context_node = parser->current_context;
17265 int context = CONTEXT_NONE;
17266
17267 while (context_node != NULL) {
17268 switch (context_node->context) {
17276 case PM_CONTEXT_DEFINED:
17278 // These are the good cases. We're allowed to have a retry here.
17279 return;
17280 case PM_CONTEXT_CLASS:
17281 case PM_CONTEXT_DEF:
17283 case PM_CONTEXT_MAIN:
17284 case PM_CONTEXT_MODULE:
17285 case PM_CONTEXT_PREEXE:
17286 case PM_CONTEXT_SCLASS:
17287 // These are the bad cases. We're not allowed to have a retry in
17288 // these contexts.
17289 if (context == CONTEXT_NONE) {
17290 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE);
17291 } else if (context == CONTEXT_THROUGH_ENSURE) {
17292 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ENSURE);
17293 } else if (context == CONTEXT_THROUGH_ELSE) {
17294 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ELSE);
17295 }
17296 return;
17304 // These are also bad cases, but with a more specific error
17305 // message indicating the else.
17306 context = CONTEXT_THROUGH_ELSE;
17307 break;
17315 // These are also bad cases, but with a more specific error
17316 // message indicating the ensure.
17317 context = CONTEXT_THROUGH_ENSURE;
17318 break;
17319 case PM_CONTEXT_NONE:
17320 // This case should never happen.
17321 assert(false && "unreachable");
17322 break;
17323 case PM_CONTEXT_BEGIN:
17327 case PM_CONTEXT_CASE_IN:
17330 case PM_CONTEXT_ELSE:
17331 case PM_CONTEXT_ELSIF:
17332 case PM_CONTEXT_EMBEXPR:
17334 case PM_CONTEXT_FOR:
17335 case PM_CONTEXT_IF:
17340 case PM_CONTEXT_PARENS:
17341 case PM_CONTEXT_POSTEXE:
17343 case PM_CONTEXT_TERNARY:
17344 case PM_CONTEXT_UNLESS:
17345 case PM_CONTEXT_UNTIL:
17346 case PM_CONTEXT_WHILE:
17347 // In these contexts we should continue walking up the list of
17348 // contexts.
17349 break;
17350 }
17351
17352 context_node = context_node->prev;
17353 }
17354
17355#undef CONTEXT_NONE
17356#undef CONTEXT_ENSURE
17357#undef CONTEXT_ELSE
17358}
17359
17363static void
17364parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17365 pm_context_node_t *context_node = parser->current_context;
17366
17367 while (context_node != NULL) {
17368 switch (context_node->context) {
17369 case PM_CONTEXT_DEF:
17371 case PM_CONTEXT_DEFINED:
17375 // These are the good cases. We're allowed to have a block exit
17376 // in these contexts.
17377 return;
17378 case PM_CONTEXT_CLASS:
17382 case PM_CONTEXT_MAIN:
17383 case PM_CONTEXT_MODULE:
17387 case PM_CONTEXT_SCLASS:
17391 // These are the bad cases. We're not allowed to have a retry in
17392 // these contexts.
17393 pm_parser_err_node(parser, node, PM_ERR_INVALID_YIELD);
17394 return;
17395 case PM_CONTEXT_NONE:
17396 // This case should never happen.
17397 assert(false && "unreachable");
17398 break;
17399 case PM_CONTEXT_BEGIN:
17409 case PM_CONTEXT_CASE_IN:
17412 case PM_CONTEXT_ELSE:
17413 case PM_CONTEXT_ELSIF:
17414 case PM_CONTEXT_EMBEXPR:
17416 case PM_CONTEXT_FOR:
17417 case PM_CONTEXT_IF:
17425 case PM_CONTEXT_PARENS:
17426 case PM_CONTEXT_POSTEXE:
17428 case PM_CONTEXT_PREEXE:
17430 case PM_CONTEXT_TERNARY:
17431 case PM_CONTEXT_UNLESS:
17432 case PM_CONTEXT_UNTIL:
17433 case PM_CONTEXT_WHILE:
17434 // In these contexts we should continue walking up the list of
17435 // contexts.
17436 break;
17437 }
17438
17439 context_node = context_node->prev;
17440 }
17441}
17442
17447typedef struct {
17450
17452 const uint8_t *start;
17453
17455 const uint8_t *end;
17456
17465
17470static void
17471parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
17473 pm_token_t location;
17474
17475 if (callback_data->shared) {
17476 location = (pm_token_t) { .type = 0, .start = start, .end = end };
17477 } else {
17478 location = (pm_token_t) { .type = 0, .start = callback_data->start, .end = callback_data->end };
17479 }
17480
17481 PM_PARSER_ERR_FORMAT(callback_data->parser, PM_TOKEN_START(callback_data->parser, &location), PM_TOKEN_LENGTH(&location), PM_ERR_REGEXP_PARSE_ERROR, message);
17482}
17483
17487static void
17488parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
17489 const pm_string_t *unescaped = &node->unescaped;
17491 .parser = parser,
17492 .start = parser->start + PM_NODE_START(node),
17493 .end = parser->start + PM_NODE_END(node),
17494 .shared = unescaped->type == PM_STRING_SHARED
17495 };
17496
17497 pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED), NULL, NULL, parse_regular_expression_error, &error_data);
17498}
17499
17503static inline pm_node_t *
17504parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
17505 switch (parser->current.type) {
17506 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
17507 parser_lex(parser);
17508
17509 pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
17510 pm_accepts_block_stack_push(parser, true);
17511 bool parsed_bare_hash = false;
17512
17513 while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
17514 bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
17515
17516 // Handle the case where we don't have a comma and we have a
17517 // newline followed by a right bracket.
17518 if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17519 break;
17520 }
17521
17522 // Ensure that we have a comma between elements in the array.
17523 if (array->elements.size > 0) {
17524 if (accept1(parser, PM_TOKEN_COMMA)) {
17525 // If there was a comma but we also accepts a newline,
17526 // then this is a syntax error.
17527 if (accepted_newline) {
17528 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
17529 }
17530 } else {
17531 // If there was no comma, then we need to add a syntax
17532 // error.
17533 PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_END(parser, &parser->previous), 0, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
17534 parser->previous.start = parser->previous.end;
17535 parser->previous.type = 0;
17536 }
17537 }
17538
17539 // If we have a right bracket immediately following a comma,
17540 // this is allowed since it's a trailing comma. In this case we
17541 // can break out of the loop.
17542 if (match1(parser, PM_TOKEN_BRACKET_RIGHT)) break;
17543
17544 pm_node_t *element;
17545
17546 if (accept1(parser, PM_TOKEN_USTAR)) {
17547 pm_token_t operator = parser->previous;
17548 pm_node_t *expression = NULL;
17549
17550 if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
17551 pm_parser_scope_forwarding_positionals_check(parser, &operator);
17552 } else {
17553 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
17554 }
17555
17556 element = UP(pm_splat_node_create(parser, &operator, expression));
17557 } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
17558 if (parsed_bare_hash) {
17559 pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
17560 }
17561
17562 element = UP(pm_keyword_hash_node_create(parser));
17563 pm_static_literals_t hash_keys = { 0 };
17564
17565 if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
17566 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
17567 }
17568
17569 pm_static_literals_free(&hash_keys);
17570 parsed_bare_hash = true;
17571 } else {
17572 element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
17573
17574 if (pm_symbol_node_label_p(parser, element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
17575 if (parsed_bare_hash) {
17576 pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
17577 }
17578
17579 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
17580 pm_static_literals_t hash_keys = { 0 };
17581 pm_hash_key_static_literals_add(parser, &hash_keys, element);
17582
17583 pm_token_t operator = { 0 };
17584 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
17585 operator = parser->previous;
17586 }
17587
17588 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
17589 pm_node_t *assoc = UP(pm_assoc_node_create(parser, element, NTOK2PTR(operator), value));
17590 pm_keyword_hash_node_elements_append(hash, assoc);
17591
17592 element = UP(hash);
17593 if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17594 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
17595 }
17596
17597 pm_static_literals_free(&hash_keys);
17598 parsed_bare_hash = true;
17599 }
17600 }
17601
17602 pm_array_node_elements_append(array, element);
17603 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
17604 }
17605
17606 accept1(parser, PM_TOKEN_NEWLINE);
17607
17608 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17609 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
17610 parser->previous.start = parser->previous.end;
17611 parser->previous.type = 0;
17612 }
17613
17614 pm_array_node_close_set(parser, array, &parser->previous);
17615 pm_accepts_block_stack_pop(parser);
17616
17617 return UP(array);
17618 }
17619 case PM_TOKEN_PARENTHESIS_LEFT:
17620 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
17621 pm_token_t opening = parser->current;
17622 pm_node_flags_t flags = 0;
17623
17624 pm_node_list_t current_block_exits = { 0 };
17625 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
17626
17627 parser_lex(parser);
17628 while (true) {
17629 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
17630 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17631 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
17632 break;
17633 }
17634 }
17635
17636 // If this is the end of the file or we match a right parenthesis, then
17637 // we have an empty parentheses node, and we can immediately return.
17638 if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
17639 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
17640
17641 pop_block_exits(parser, previous_block_exits);
17642 pm_node_list_free(&current_block_exits);
17643
17644 return UP(pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, flags));
17645 }
17646
17647 // Otherwise, we're going to parse the first statement in the list
17648 // of statements within the parentheses.
17649 pm_accepts_block_stack_push(parser, true);
17650 context_push(parser, PM_CONTEXT_PARENS);
17651 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
17652 context_pop(parser);
17653
17654 // Determine if this statement is followed by a terminator. In the
17655 // case of a single statement, this is fine. But in the case of
17656 // multiple statements it's required.
17657 bool terminator_found = false;
17658
17659 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
17660 terminator_found = true;
17661 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17662 } else if (accept1(parser, PM_TOKEN_NEWLINE)) {
17663 terminator_found = true;
17664 }
17665
17666 if (terminator_found) {
17667 while (true) {
17668 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
17669 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17670 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
17671 break;
17672 }
17673 }
17674 }
17675
17676 // If we hit a right parenthesis, then we're done parsing the
17677 // parentheses node, and we can check which kind of node we should
17678 // return.
17679 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
17680 if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
17681 lex_state_set(parser, PM_LEX_STATE_ENDARG);
17682 }
17683
17684 parser_lex(parser);
17685 pm_accepts_block_stack_pop(parser);
17686
17687 pop_block_exits(parser, previous_block_exits);
17688 pm_node_list_free(&current_block_exits);
17689
17690 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
17691 // If we have a single statement and are ending on a right
17692 // parenthesis, then we need to check if this is possibly a
17693 // multiple target node.
17694 pm_multi_target_node_t *multi_target;
17695
17696 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.length == 0) {
17697 multi_target = (pm_multi_target_node_t *) statement;
17698 } else {
17699 multi_target = pm_multi_target_node_create(parser);
17700 pm_multi_target_node_targets_append(parser, multi_target, statement);
17701 }
17702
17703 multi_target->lparen_loc = TOK2LOC(parser, &opening);
17704 multi_target->rparen_loc = TOK2LOC(parser, &parser->previous);
17705 PM_NODE_START_SET_TOKEN(parser, multi_target, &opening);
17706 PM_NODE_LENGTH_SET_TOKEN(parser, multi_target, &parser->previous);
17707
17708 pm_node_t *result;
17709 if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
17710 result = parse_targets(parser, UP(multi_target), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17711 accept1(parser, PM_TOKEN_NEWLINE);
17712 } else {
17713 result = UP(multi_target);
17714 }
17715
17716 if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
17717 // All set, this is explicitly allowed by the parent
17718 // context.
17719 } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
17720 // All set, we're inside a for loop and we're parsing
17721 // multiple targets.
17722 } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
17723 // Multi targets are not allowed when it's not a
17724 // statement level.
17725 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
17726 } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
17727 // Multi targets must be followed by an equal sign in
17728 // order to be valid (or a right parenthesis if they are
17729 // nested).
17730 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
17731 }
17732
17733 return result;
17734 }
17735
17736 // If we have a single statement and are ending on a right parenthesis
17737 // and we didn't return a multiple assignment node, then we can return a
17738 // regular parentheses node now.
17739 pm_statements_node_t *statements = pm_statements_node_create(parser);
17740 pm_statements_node_body_append(parser, statements, statement, true);
17741
17742 return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, flags));
17743 }
17744
17745 // If we have more than one statement in the set of parentheses,
17746 // then we are going to parse all of them as a list of statements.
17747 // We'll do that here.
17748 context_push(parser, PM_CONTEXT_PARENS);
17749 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17750
17751 pm_statements_node_t *statements = pm_statements_node_create(parser);
17752 pm_statements_node_body_append(parser, statements, statement, true);
17753
17754 // If we didn't find a terminator and we didn't find a right
17755 // parenthesis, then this is a syntax error.
17756 if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
17757 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
17758 }
17759
17760 // Parse each statement within the parentheses.
17761 while (true) {
17762 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
17763 pm_statements_node_body_append(parser, statements, node, true);
17764
17765 // If we're recovering from a syntax error, then we need to stop
17766 // parsing the statements now.
17767 if (parser->recovering) {
17768 // If this is the level of context where the recovery has
17769 // happened, then we can mark the parser as done recovering.
17770 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
17771 break;
17772 }
17773
17774 // If we couldn't parse an expression at all, then we need to
17775 // bail out of the loop.
17776 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) break;
17777
17778 // If we successfully parsed a statement, then we are going to
17779 // need terminator to delimit them.
17780 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
17781 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
17782 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
17783 } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
17784 break;
17785 } else if (!match1(parser, PM_TOKEN_EOF)) {
17786 // If we're at the end of the file, then we're going to add
17787 // an error after this for the ) anyway.
17788 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
17789 }
17790 }
17791
17792 context_pop(parser);
17793 pm_accepts_block_stack_pop(parser);
17794 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
17795
17796 // When we're parsing multi targets, we allow them to be followed by
17797 // a right parenthesis if they are at the statement level. This is
17798 // only possible if they are the final statement in a parentheses.
17799 // We need to explicitly reject that here.
17800 {
17801 pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
17802
17803 if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
17804 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
17805 pm_multi_target_node_targets_append(parser, multi_target, statement);
17806
17807 statement = UP(multi_target);
17808 statements->body.nodes[statements->body.size - 1] = statement;
17809 }
17810
17811 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
17812 const uint8_t *offset = parser->start + PM_NODE_END(statement);
17813 pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
17814 pm_node_t *value = UP(pm_missing_node_create(parser, PM_NODE_END(statement), 0));
17815
17816 statement = UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value));
17817 statements->body.nodes[statements->body.size - 1] = statement;
17818
17819 pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
17820 }
17821 }
17822
17823 pop_block_exits(parser, previous_block_exits);
17824 pm_node_list_free(&current_block_exits);
17825
17826 pm_void_statements_check(parser, statements, true);
17827 return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, flags));
17828 }
17829 case PM_TOKEN_BRACE_LEFT: {
17830 // If we were passed a current_hash_keys via the parser, then that
17831 // means we're already parsing a hash and we want to share the set
17832 // of hash keys with this inner hash we're about to parse for the
17833 // sake of warnings. We'll set it to NULL after we grab it to make
17834 // sure subsequent expressions don't use it. Effectively this is a
17835 // way of getting around passing it to every call to
17836 // parse_expression.
17837 pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
17838 parser->current_hash_keys = NULL;
17839
17840 pm_accepts_block_stack_push(parser, true);
17841 parser_lex(parser);
17842
17843 pm_token_t opening = parser->previous;
17844 pm_hash_node_t *node = pm_hash_node_create(parser, &opening);
17845
17846 if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
17847 if (current_hash_keys != NULL) {
17848 parse_assocs(parser, current_hash_keys, UP(node), (uint16_t) (depth + 1));
17849 } else {
17850 pm_static_literals_t hash_keys = { 0 };
17851 parse_assocs(parser, &hash_keys, UP(node), (uint16_t) (depth + 1));
17852 pm_static_literals_free(&hash_keys);
17853 }
17854
17855 accept1(parser, PM_TOKEN_NEWLINE);
17856 }
17857
17858 pm_accepts_block_stack_pop(parser);
17859 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM, &opening);
17860 pm_hash_node_closing_loc_set(parser, node, &parser->previous);
17861
17862 return UP(node);
17863 }
17864 case PM_TOKEN_CHARACTER_LITERAL: {
17865 pm_node_t *node = UP(pm_string_node_create_current_string(
17866 parser,
17867 &(pm_token_t) {
17868 .type = PM_TOKEN_STRING_BEGIN,
17869 .start = parser->current.start,
17870 .end = parser->current.start + 1
17871 },
17872 &(pm_token_t) {
17873 .type = PM_TOKEN_STRING_CONTENT,
17874 .start = parser->current.start + 1,
17875 .end = parser->current.end
17876 },
17877 NULL
17878 ));
17879
17880 pm_node_flag_set(node, parse_unescaped_encoding(parser));
17881
17882 // Skip past the character literal here, since now we have handled
17883 // parser->explicit_encoding correctly.
17884 parser_lex(parser);
17885
17886 // Characters can be followed by strings in which case they are
17887 // automatically concatenated.
17888 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
17889 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
17890 }
17891
17892 return node;
17893 }
17894 case PM_TOKEN_CLASS_VARIABLE: {
17895 parser_lex(parser);
17896 pm_node_t *node = UP(pm_class_variable_read_node_create(parser, &parser->previous));
17897
17898 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17899 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17900 }
17901
17902 return node;
17903 }
17904 case PM_TOKEN_CONSTANT: {
17905 parser_lex(parser);
17906 pm_token_t constant = parser->previous;
17907
17908 // If a constant is immediately followed by parentheses, then this is in
17909 // fact a method call, not a constant read.
17910 if (
17911 match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
17912 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
17913 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
17914 match1(parser, PM_TOKEN_BRACE_LEFT)
17915 ) {
17916 pm_arguments_t arguments = { 0 };
17917 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
17918 return UP(pm_call_node_fcall_create(parser, &constant, &arguments));
17919 }
17920
17921 pm_node_t *node = UP(pm_constant_read_node_create(parser, &parser->previous));
17922
17923 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17924 // If we get here, then we have a comma immediately following a
17925 // constant, so we're going to parse this as a multiple assignment.
17926 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17927 }
17928
17929 return node;
17930 }
17931 case PM_TOKEN_UCOLON_COLON: {
17932 parser_lex(parser);
17933 pm_token_t delimiter = parser->previous;
17934
17935 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17936 pm_node_t *node = UP(pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous));
17937
17938 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17939 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17940 }
17941
17942 return node;
17943 }
17944 case PM_TOKEN_UDOT_DOT:
17945 case PM_TOKEN_UDOT_DOT_DOT: {
17946 pm_token_t operator = parser->current;
17947 parser_lex(parser);
17948
17949 pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
17950
17951 // Unary .. and ... are special because these are non-associative
17952 // operators that can also be unary operators. In this case we need
17953 // to explicitly reject code that has a .. or ... that follows this
17954 // expression.
17955 if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
17956 pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
17957 }
17958
17959 return UP(pm_range_node_create(parser, NULL, &operator, right));
17960 }
17961 case PM_TOKEN_FLOAT:
17962 parser_lex(parser);
17963 return UP(pm_float_node_create(parser, &parser->previous));
17964 case PM_TOKEN_FLOAT_IMAGINARY:
17965 parser_lex(parser);
17966 return UP(pm_float_node_imaginary_create(parser, &parser->previous));
17967 case PM_TOKEN_FLOAT_RATIONAL:
17968 parser_lex(parser);
17969 return UP(pm_float_node_rational_create(parser, &parser->previous));
17970 case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
17971 parser_lex(parser);
17972 return UP(pm_float_node_rational_imaginary_create(parser, &parser->previous));
17973 case PM_TOKEN_NUMBERED_REFERENCE: {
17974 parser_lex(parser);
17975 pm_node_t *node = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
17976
17977 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17978 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17979 }
17980
17981 return node;
17982 }
17983 case PM_TOKEN_GLOBAL_VARIABLE: {
17984 parser_lex(parser);
17985 pm_node_t *node = UP(pm_global_variable_read_node_create(parser, &parser->previous));
17986
17987 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17988 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17989 }
17990
17991 return node;
17992 }
17993 case PM_TOKEN_BACK_REFERENCE: {
17994 parser_lex(parser);
17995 pm_node_t *node = UP(pm_back_reference_read_node_create(parser, &parser->previous));
17996
17997 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17998 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17999 }
18000
18001 return node;
18002 }
18003 case PM_TOKEN_IDENTIFIER:
18004 case PM_TOKEN_METHOD_NAME: {
18005 parser_lex(parser);
18006 pm_token_t identifier = parser->previous;
18007 pm_node_t *node = parse_variable_call(parser);
18008
18009 if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
18010 // If parse_variable_call returned with a call node, then we
18011 // know the identifier is not in the local table. In that case
18012 // we need to check if there are arguments following the
18013 // identifier.
18014 pm_call_node_t *call = (pm_call_node_t *) node;
18015 pm_arguments_t arguments = { 0 };
18016
18017 if (parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1))) {
18018 // Since we found arguments, we need to turn off the
18019 // variable call bit in the flags.
18020 pm_node_flag_unset(UP(call), PM_CALL_NODE_FLAGS_VARIABLE_CALL);
18021
18022 call->opening_loc = arguments.opening_loc;
18023 call->arguments = arguments.arguments;
18024 call->closing_loc = arguments.closing_loc;
18025 call->block = arguments.block;
18026
18027 const pm_location_t *end = pm_arguments_end(&arguments);
18028 if (end == NULL) {
18029 PM_NODE_LENGTH_SET_LOCATION(call, &call->message_loc);
18030 } else {
18031 PM_NODE_LENGTH_SET_LOCATION(call, end);
18032 }
18033 }
18034 } else {
18035 // Otherwise, we know the identifier is in the local table. This
18036 // can still be a method call if it is followed by arguments or
18037 // a block, so we need to check for that here.
18038 if (
18039 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
18040 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
18041 match1(parser, PM_TOKEN_BRACE_LEFT)
18042 ) {
18043 pm_arguments_t arguments = { 0 };
18044 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18045 pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
18046
18047 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
18048 // If we're about to convert an 'it' implicit local
18049 // variable read into a method call, we need to remove
18050 // it from the list of implicit local variables.
18051 pm_node_unreference(parser, node);
18052 } else {
18053 // Otherwise, we're about to convert a regular local
18054 // variable read into a method call, in which case we
18055 // need to indicate that this was not a read for the
18056 // purposes of warnings.
18057 assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
18058
18059 if (pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &identifier), PM_TOKEN_LENGTH(&identifier))) {
18060 pm_node_unreference(parser, node);
18061 } else {
18063 pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
18064 }
18065 }
18066
18067 pm_node_destroy(parser, node);
18068 return UP(fcall);
18069 }
18070 }
18071
18072 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18073 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18074 }
18075
18076 return node;
18077 }
18078 case PM_TOKEN_HEREDOC_START: {
18079 // Here we have found a heredoc. We'll parse it and add it to the
18080 // list of strings.
18081 assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
18082 pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
18083
18084 size_t common_whitespace = (size_t) -1;
18085 parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
18086
18087 parser_lex(parser);
18088 pm_token_t opening = parser->previous;
18089
18090 pm_node_t *node;
18091 pm_node_t *part;
18092
18093 if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18094 // If we get here, then we have an empty heredoc. We'll create
18095 // an empty content token and return an empty string node.
18096 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18097 pm_token_t content = parse_strings_empty_content(parser->previous.start);
18098
18099 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18100 node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
18101 } else {
18102 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
18103 }
18104
18105 PM_NODE_LENGTH_SET_TOKEN(parser, node, &opening);
18106 } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
18107 // If we get here, then we tried to find something in the
18108 // heredoc but couldn't actually parse anything, so we'll just
18109 // return a missing node.
18110 //
18111 // parse_string_part handles its own errors, so there is no need
18112 // for us to add one here.
18113 node = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
18114 } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18115 // If we get here, then the part that we parsed was plain string
18116 // content and we're at the end of the heredoc, so we can return
18117 // just a string node with the heredoc opening and closing as
18118 // its opening and closing.
18119 pm_node_flag_set(part, parse_unescaped_encoding(parser));
18120 pm_string_node_t *cast = (pm_string_node_t *) part;
18121
18122 cast->opening_loc = TOK2LOC(parser, &opening);
18123 cast->closing_loc = TOK2LOC(parser, &parser->current);
18124 cast->base.location = cast->opening_loc;
18125
18126 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18127 assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
18128 cast->base.type = PM_X_STRING_NODE;
18129 }
18130
18131 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18132 parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
18133 }
18134
18135 node = UP(cast);
18136 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18137 } else {
18138 // If we get here, then we have multiple parts in the heredoc,
18139 // so we'll need to create an interpolated string node to hold
18140 // them all.
18141 pm_node_list_t parts = { 0 };
18142 pm_node_list_append(&parts, part);
18143
18144 while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18145 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
18146 pm_node_list_append(&parts, part);
18147 }
18148 }
18149
18150 // Now that we have all of the parts, create the correct type of
18151 // interpolated node.
18152 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18153 pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
18154 cast->parts = parts;
18155
18156 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18157 pm_interpolated_xstring_node_closing_set(parser, cast, &parser->previous);
18158
18159 cast->base.location = cast->opening_loc;
18160 node = UP(cast);
18161 } else {
18162 pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
18163 pm_node_list_free(&parts);
18164
18165 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18166 pm_interpolated_string_node_closing_set(parser, cast, &parser->previous);
18167
18168 cast->base.location = cast->opening_loc;
18169 node = UP(cast);
18170 }
18171
18172 // If this is a heredoc that is indented with a ~, then we need
18173 // to dedent each line by the common leading whitespace.
18174 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18175 pm_node_list_t *nodes;
18176 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18177 nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
18178 } else {
18179 nodes = &((pm_interpolated_string_node_t *) node)->parts;
18180 }
18181
18182 parse_heredoc_dedent(parser, nodes, common_whitespace);
18183 }
18184 }
18185
18186 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18187 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18188 }
18189
18190 return node;
18191 }
18192 case PM_TOKEN_INSTANCE_VARIABLE: {
18193 parser_lex(parser);
18194 pm_node_t *node = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
18195
18196 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18197 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18198 }
18199
18200 return node;
18201 }
18202 case PM_TOKEN_INTEGER: {
18203 pm_node_flags_t base = parser->integer_base;
18204 parser_lex(parser);
18205 return UP(pm_integer_node_create(parser, base, &parser->previous));
18206 }
18207 case PM_TOKEN_INTEGER_IMAGINARY: {
18208 pm_node_flags_t base = parser->integer_base;
18209 parser_lex(parser);
18210 return UP(pm_integer_node_imaginary_create(parser, base, &parser->previous));
18211 }
18212 case PM_TOKEN_INTEGER_RATIONAL: {
18213 pm_node_flags_t base = parser->integer_base;
18214 parser_lex(parser);
18215 return UP(pm_integer_node_rational_create(parser, base, &parser->previous));
18216 }
18217 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: {
18218 pm_node_flags_t base = parser->integer_base;
18219 parser_lex(parser);
18220 return UP(pm_integer_node_rational_imaginary_create(parser, base, &parser->previous));
18221 }
18222 case PM_TOKEN_KEYWORD___ENCODING__:
18223 parser_lex(parser);
18224 return UP(pm_source_encoding_node_create(parser, &parser->previous));
18225 case PM_TOKEN_KEYWORD___FILE__:
18226 parser_lex(parser);
18227 return UP(pm_source_file_node_create(parser, &parser->previous));
18228 case PM_TOKEN_KEYWORD___LINE__:
18229 parser_lex(parser);
18230 return UP(pm_source_line_node_create(parser, &parser->previous));
18231 case PM_TOKEN_KEYWORD_ALIAS: {
18232 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18233 pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
18234 }
18235
18236 parser_lex(parser);
18237 pm_token_t keyword = parser->previous;
18238
18239 pm_node_t *new_name = parse_alias_argument(parser, true, (uint16_t) (depth + 1));
18240 pm_node_t *old_name = parse_alias_argument(parser, false, (uint16_t) (depth + 1));
18241
18242 switch (PM_NODE_TYPE(new_name)) {
18243 case PM_BACK_REFERENCE_READ_NODE:
18244 case PM_NUMBERED_REFERENCE_READ_NODE:
18245 case PM_GLOBAL_VARIABLE_READ_NODE: {
18246 if (PM_NODE_TYPE_P(old_name, PM_BACK_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_GLOBAL_VARIABLE_READ_NODE)) {
18247 if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) {
18248 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
18249 }
18250 } else {
18251 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18252 }
18253
18254 return UP(pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name));
18255 }
18256 case PM_SYMBOL_NODE:
18257 case PM_INTERPOLATED_SYMBOL_NODE: {
18258 if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE)) {
18259 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18260 }
18261 }
18263 default:
18264 return UP(pm_alias_method_node_create(parser, &keyword, new_name, old_name));
18265 }
18266 }
18267 case PM_TOKEN_KEYWORD_CASE: {
18268 size_t opening_newline_index = token_newline_index(parser);
18269 parser_lex(parser);
18270
18271 pm_token_t case_keyword = parser->previous;
18272 pm_node_t *predicate = NULL;
18273
18274 pm_node_list_t current_block_exits = { 0 };
18275 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18276
18277 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18278 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18279 predicate = NULL;
18280 } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
18281 predicate = NULL;
18282 } else if (!token_begins_expression_p(parser->current.type)) {
18283 predicate = NULL;
18284 } else {
18285 predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
18286 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18287 }
18288
18289 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
18290 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18291 parser_lex(parser);
18292
18293 pop_block_exits(parser, previous_block_exits);
18294 pm_node_list_free(&current_block_exits);
18295
18296 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18297 return UP(pm_case_node_create(parser, &case_keyword, predicate, &parser->previous));
18298 }
18299
18300 // At this point we can create a case node, though we don't yet know
18301 // if it is a case-in or case-when node.
18302 pm_node_t *node;
18303
18304 if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18305 pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, NULL);
18306 pm_static_literals_t literals = { 0 };
18307
18308 // At this point we've seen a when keyword, so we know this is a
18309 // case-when node. We will continue to parse the when nodes
18310 // until we hit the end of the list.
18311 while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18312 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18313 parser_lex(parser);
18314
18315 pm_token_t when_keyword = parser->previous;
18316 pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
18317
18318 do {
18319 if (accept1(parser, PM_TOKEN_USTAR)) {
18320 pm_token_t operator = parser->previous;
18321 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18322
18323 pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
18324 pm_when_node_conditions_append(when_node, UP(splat_node));
18325
18326 if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
18327 } else {
18328 pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
18329 pm_when_node_conditions_append(when_node, condition);
18330
18331 // If we found a missing node, then this is a syntax
18332 // error and we should stop looping.
18333 if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
18334
18335 // If this is a string node, then we need to mark it
18336 // as frozen because when clause strings are frozen.
18337 if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
18338 pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
18339 } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
18340 pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
18341 }
18342
18343 pm_when_clause_static_literals_add(parser, &literals, condition);
18344 }
18345 } while (accept1(parser, PM_TOKEN_COMMA));
18346
18347 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18348 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18349 pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous);
18350 }
18351 } else {
18352 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
18353 pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous);
18354 }
18355
18356 if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18357 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
18358 if (statements != NULL) {
18359 pm_when_node_statements_set(when_node, statements);
18360 }
18361 }
18362
18363 pm_case_node_condition_append(case_node, UP(when_node));
18364 }
18365
18366 // If we didn't parse any conditions (in or when) then we need
18367 // to indicate that we have an error.
18368 if (case_node->conditions.size == 0) {
18369 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18370 }
18371
18372 pm_static_literals_free(&literals);
18373 node = UP(case_node);
18374 } else {
18375 pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate);
18376
18377 // If this is a case-match node (i.e., it is a pattern matching
18378 // case statement) then we must have a predicate.
18379 if (predicate == NULL) {
18380 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
18381 }
18382
18383 // At this point we expect that we're parsing a case-in node. We
18384 // will continue to parse the in nodes until we hit the end of
18385 // the list.
18386 while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
18387 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18388
18389 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
18390 parser->pattern_matching_newlines = true;
18391
18392 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
18393 parser->command_start = false;
18394 parser_lex(parser);
18395
18396 pm_token_t in_keyword = parser->previous;
18397
18398 pm_constant_id_list_t captures = { 0 };
18399 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
18400
18401 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
18402 pm_constant_id_list_free(&captures);
18403
18404 // Since we're in the top-level of the case-in node we need
18405 // to check for guard clauses in the form of `if` or
18406 // `unless` statements.
18407 if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
18408 pm_token_t keyword = parser->previous;
18409 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
18410 pattern = UP(pm_if_node_modifier_create(parser, pattern, &keyword, predicate));
18411 } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
18412 pm_token_t keyword = parser->previous;
18413 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
18414 pattern = UP(pm_unless_node_modifier_create(parser, pattern, &keyword, predicate));
18415 }
18416
18417 // Now we need to check for the terminator of the in node's
18418 // pattern. It can be a newline or semicolon optionally
18419 // followed by a `then` keyword.
18420 pm_token_t then_keyword = { 0 };
18421 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18422 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18423 then_keyword = parser->previous;
18424 }
18425 } else {
18426 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
18427 then_keyword = parser->previous;
18428 }
18429
18430 // Now we can actually parse the statements associated with
18431 // the in node.
18432 pm_statements_node_t *statements;
18433 if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18434 statements = NULL;
18435 } else {
18436 statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
18437 }
18438
18439 // Now that we have the full pattern and statements, we can
18440 // create the node and attach it to the case node.
18441 pm_node_t *condition = UP(pm_in_node_create(parser, pattern, statements, &in_keyword, NTOK2PTR(then_keyword)));
18442 pm_case_match_node_condition_append(case_node, condition);
18443 }
18444
18445 // If we didn't parse any conditions (in or when) then we need
18446 // to indicate that we have an error.
18447 if (case_node->conditions.size == 0) {
18448 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18449 }
18450
18451 node = UP(case_node);
18452 }
18453
18454 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18455 if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
18456 pm_token_t else_keyword = parser->previous;
18457 pm_else_node_t *else_node;
18458
18459 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
18460 else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
18461 } else {
18462 else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
18463 }
18464
18465 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18466 pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
18467 } else {
18468 pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
18469 }
18470 }
18471
18472 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18473 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM, &case_keyword);
18474
18475 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18476 pm_case_node_end_keyword_loc_set(parser, (pm_case_node_t *) node, &parser->previous);
18477 } else {
18478 pm_case_match_node_end_keyword_loc_set(parser, (pm_case_match_node_t *) node, &parser->previous);
18479 }
18480
18481 pop_block_exits(parser, previous_block_exits);
18482 pm_node_list_free(&current_block_exits);
18483
18484 return node;
18485 }
18486 case PM_TOKEN_KEYWORD_BEGIN: {
18487 size_t opening_newline_index = token_newline_index(parser);
18488 parser_lex(parser);
18489
18490 pm_token_t begin_keyword = parser->previous;
18491 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18492
18493 pm_node_list_t current_block_exits = { 0 };
18494 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18495 pm_statements_node_t *begin_statements = NULL;
18496
18497 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18498 pm_accepts_block_stack_push(parser, true);
18499 begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN, (uint16_t) (depth + 1));
18500 pm_accepts_block_stack_pop(parser);
18501 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18502 }
18503
18504 pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
18505 parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
18506 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM, &begin_keyword);
18507
18508 PM_NODE_LENGTH_SET_TOKEN(parser, begin_node, &parser->previous);
18509 pm_begin_node_end_keyword_set(parser, begin_node, &parser->previous);
18510
18511 pop_block_exits(parser, previous_block_exits);
18512 pm_node_list_free(&current_block_exits);
18513
18514 return UP(begin_node);
18515 }
18516 case PM_TOKEN_KEYWORD_BEGIN_UPCASE: {
18517 pm_node_list_t current_block_exits = { 0 };
18518 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18519
18520 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18521 pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
18522 }
18523
18524 parser_lex(parser);
18525 pm_token_t keyword = parser->previous;
18526
18527 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE);
18528 pm_token_t opening = parser->previous;
18529 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
18530
18531 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM, &opening);
18532 pm_context_t context = parser->current_context->context;
18533 if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
18534 pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
18535 }
18536
18537 flush_block_exits(parser, previous_block_exits);
18538 pm_node_list_free(&current_block_exits);
18539
18540 return UP(pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
18541 }
18542 case PM_TOKEN_KEYWORD_BREAK:
18543 case PM_TOKEN_KEYWORD_NEXT:
18544 case PM_TOKEN_KEYWORD_RETURN: {
18545 parser_lex(parser);
18546
18547 pm_token_t keyword = parser->previous;
18548 pm_arguments_t arguments = { 0 };
18549
18550 if (
18551 token_begins_expression_p(parser->current.type) ||
18552 match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)
18553 ) {
18554 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
18555
18556 if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
18557 pm_token_t next = parser->current;
18558 parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, (uint16_t) (depth + 1));
18559
18560 // Reject `foo && return bar`.
18561 if (!accepts_command_call && arguments.arguments != NULL) {
18562 PM_PARSER_ERR_TOKEN_FORMAT(parser, &next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(next.type));
18563 }
18564 }
18565 }
18566
18567 switch (keyword.type) {
18568 case PM_TOKEN_KEYWORD_BREAK: {
18569 pm_node_t *node = UP(pm_break_node_create(parser, &keyword, arguments.arguments));
18570 if (!parser->partial_script) parse_block_exit(parser, node);
18571 return node;
18572 }
18573 case PM_TOKEN_KEYWORD_NEXT: {
18574 pm_node_t *node = UP(pm_next_node_create(parser, &keyword, arguments.arguments));
18575 if (!parser->partial_script) parse_block_exit(parser, node);
18576 return node;
18577 }
18578 case PM_TOKEN_KEYWORD_RETURN: {
18579 pm_node_t *node = UP(pm_return_node_create(parser, &keyword, arguments.arguments));
18580 parse_return(parser, node);
18581 return node;
18582 }
18583 default:
18584 assert(false && "unreachable");
18585 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
18586 }
18587 }
18588 case PM_TOKEN_KEYWORD_SUPER: {
18589 parser_lex(parser);
18590
18591 pm_token_t keyword = parser->previous;
18592 pm_arguments_t arguments = { 0 };
18593 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18594
18595 if (
18596 arguments.opening_loc.length == 0 &&
18597 arguments.arguments == NULL &&
18598 ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
18599 ) {
18600 return UP(pm_forwarding_super_node_create(parser, &keyword, &arguments));
18601 }
18602
18603 return UP(pm_super_node_create(parser, &keyword, &arguments));
18604 }
18605 case PM_TOKEN_KEYWORD_YIELD: {
18606 parser_lex(parser);
18607
18608 pm_token_t keyword = parser->previous;
18609 pm_arguments_t arguments = { 0 };
18610 parse_arguments_list(parser, &arguments, false, accepts_command_call, (uint16_t) (depth + 1));
18611
18612 // It's possible that we've parsed a block argument through our
18613 // call to parse_arguments_list. If we found one, we should mark it
18614 // as invalid and destroy it, as we don't have a place for it on the
18615 // yield node.
18616 if (arguments.block != NULL) {
18617 pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
18618 pm_node_unreference(parser, arguments.block);
18619 pm_node_destroy(parser, arguments.block);
18620 arguments.block = NULL;
18621 }
18622
18623 pm_node_t *node = UP(pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc));
18624 if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
18625
18626 return node;
18627 }
18628 case PM_TOKEN_KEYWORD_CLASS: {
18629 size_t opening_newline_index = token_newline_index(parser);
18630 parser_lex(parser);
18631
18632 pm_token_t class_keyword = parser->previous;
18633 pm_do_loop_stack_push(parser, false);
18634
18635 pm_node_list_t current_block_exits = { 0 };
18636 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18637
18638 if (accept1(parser, PM_TOKEN_LESS_LESS)) {
18639 pm_token_t operator = parser->previous;
18640 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
18641
18642 pm_parser_scope_push(parser, true);
18643 if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18644 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type));
18645 }
18646
18647 pm_node_t *statements = NULL;
18648 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18649 pm_accepts_block_stack_push(parser, true);
18650 statements = UP(parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1)));
18651 pm_accepts_block_stack_pop(parser);
18652 }
18653
18654 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
18655 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18656 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1)));
18657 } else {
18658 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
18659 }
18660
18661 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
18662
18663 pm_constant_id_list_t locals;
18664 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18665
18666 pm_parser_scope_pop(parser);
18667 pm_do_loop_stack_pop(parser);
18668
18669 flush_block_exits(parser, previous_block_exits);
18670 pm_node_list_free(&current_block_exits);
18671
18672 return UP(pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous));
18673 }
18674
18675 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
18676 pm_token_t name = parser->previous;
18677 if (name.type != PM_TOKEN_CONSTANT) {
18678 pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
18679 }
18680
18681 pm_token_t inheritance_operator = { 0 };
18682 pm_node_t *superclass;
18683
18684 if (match1(parser, PM_TOKEN_LESS)) {
18685 inheritance_operator = parser->current;
18686 lex_state_set(parser, PM_LEX_STATE_BEG);
18687
18688 parser->command_start = true;
18689 parser_lex(parser);
18690
18691 superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
18692 } else {
18693 superclass = NULL;
18694 }
18695
18696 pm_parser_scope_push(parser, true);
18697
18698 if (inheritance_operator.start != NULL) {
18699 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
18700 } else {
18701 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18702 }
18703 pm_node_t *statements = NULL;
18704
18705 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18706 pm_accepts_block_stack_push(parser, true);
18707 statements = UP(parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1)));
18708 pm_accepts_block_stack_pop(parser);
18709 }
18710
18711 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
18712 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18713 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1)));
18714 } else {
18715 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
18716 }
18717
18718 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
18719
18720 if (context_def_p(parser)) {
18721 pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
18722 }
18723
18724 pm_constant_id_list_t locals;
18725 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18726
18727 pm_parser_scope_pop(parser);
18728 pm_do_loop_stack_pop(parser);
18729
18730 if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
18731 pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
18732 }
18733
18734 pop_block_exits(parser, previous_block_exits);
18735 pm_node_list_free(&current_block_exits);
18736
18737 return UP(pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, NTOK2PTR(inheritance_operator), superclass, statements, &parser->previous));
18738 }
18739 case PM_TOKEN_KEYWORD_DEF: {
18740 pm_node_list_t current_block_exits = { 0 };
18741 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18742
18743 pm_token_t def_keyword = parser->current;
18744 size_t opening_newline_index = token_newline_index(parser);
18745
18746 pm_node_t *receiver = NULL;
18747 pm_token_t operator = { 0 };
18748 pm_token_t name;
18749
18750 // This context is necessary for lexing `...` in a bare params
18751 // correctly. It must be pushed before lexing the first param, so it
18752 // is here.
18753 context_push(parser, PM_CONTEXT_DEF_PARAMS);
18754 parser_lex(parser);
18755
18756 // This will be false if the method name is not a valid identifier
18757 // but could be followed by an operator.
18758 bool valid_name = true;
18759
18760 switch (parser->current.type) {
18761 case PM_CASE_OPERATOR:
18762 pm_parser_scope_push(parser, true);
18763 lex_state_set(parser, PM_LEX_STATE_ENDFN);
18764 parser_lex(parser);
18765
18766 name = parser->previous;
18767 break;
18768 case PM_TOKEN_IDENTIFIER: {
18769 parser_lex(parser);
18770
18771 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
18772 receiver = parse_variable_call(parser);
18773
18774 pm_parser_scope_push(parser, true);
18775 lex_state_set(parser, PM_LEX_STATE_FNAME);
18776 parser_lex(parser);
18777
18778 operator = parser->previous;
18779 name = parse_method_definition_name(parser);
18780 } else {
18781 pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous));
18782 pm_parser_scope_push(parser, true);
18783
18784 name = parser->previous;
18785 }
18786
18787 break;
18788 }
18789 case PM_TOKEN_INSTANCE_VARIABLE:
18790 case PM_TOKEN_CLASS_VARIABLE:
18791 case PM_TOKEN_GLOBAL_VARIABLE:
18792 valid_name = false;
18794 case PM_TOKEN_CONSTANT:
18795 case PM_TOKEN_KEYWORD_NIL:
18796 case PM_TOKEN_KEYWORD_SELF:
18797 case PM_TOKEN_KEYWORD_TRUE:
18798 case PM_TOKEN_KEYWORD_FALSE:
18799 case PM_TOKEN_KEYWORD___FILE__:
18800 case PM_TOKEN_KEYWORD___LINE__:
18801 case PM_TOKEN_KEYWORD___ENCODING__: {
18802 pm_parser_scope_push(parser, true);
18803 parser_lex(parser);
18804
18805 pm_token_t identifier = parser->previous;
18806
18807 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
18808 lex_state_set(parser, PM_LEX_STATE_FNAME);
18809 parser_lex(parser);
18810 operator = parser->previous;
18811
18812 switch (identifier.type) {
18813 case PM_TOKEN_CONSTANT:
18814 receiver = UP(pm_constant_read_node_create(parser, &identifier));
18815 break;
18816 case PM_TOKEN_INSTANCE_VARIABLE:
18817 receiver = UP(pm_instance_variable_read_node_create(parser, &identifier));
18818 break;
18819 case PM_TOKEN_CLASS_VARIABLE:
18820 receiver = UP(pm_class_variable_read_node_create(parser, &identifier));
18821 break;
18822 case PM_TOKEN_GLOBAL_VARIABLE:
18823 receiver = UP(pm_global_variable_read_node_create(parser, &identifier));
18824 break;
18825 case PM_TOKEN_KEYWORD_NIL:
18826 receiver = UP(pm_nil_node_create(parser, &identifier));
18827 break;
18828 case PM_TOKEN_KEYWORD_SELF:
18829 receiver = UP(pm_self_node_create(parser, &identifier));
18830 break;
18831 case PM_TOKEN_KEYWORD_TRUE:
18832 receiver = UP(pm_true_node_create(parser, &identifier));
18833 break;
18834 case PM_TOKEN_KEYWORD_FALSE:
18835 receiver = UP(pm_false_node_create(parser, &identifier));
18836 break;
18837 case PM_TOKEN_KEYWORD___FILE__:
18838 receiver = UP(pm_source_file_node_create(parser, &identifier));
18839 break;
18840 case PM_TOKEN_KEYWORD___LINE__:
18841 receiver = UP(pm_source_line_node_create(parser, &identifier));
18842 break;
18843 case PM_TOKEN_KEYWORD___ENCODING__:
18844 receiver = UP(pm_source_encoding_node_create(parser, &identifier));
18845 break;
18846 default:
18847 break;
18848 }
18849
18850 name = parse_method_definition_name(parser);
18851 } else {
18852 if (!valid_name) {
18853 PM_PARSER_ERR_TOKEN_FORMAT(parser, &identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
18854 }
18855
18856 name = identifier;
18857 }
18858 break;
18859 }
18860 case PM_TOKEN_PARENTHESIS_LEFT: {
18861 // The current context is `PM_CONTEXT_DEF_PARAMS`, however
18862 // the inner expression of this parenthesis should not be
18863 // processed under this context. Thus, the context is popped
18864 // here.
18865 context_pop(parser);
18866 parser_lex(parser);
18867
18868 pm_token_t lparen = parser->previous;
18869 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
18870
18871 accept1(parser, PM_TOKEN_NEWLINE);
18872 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18873 pm_token_t rparen = parser->previous;
18874
18875 lex_state_set(parser, PM_LEX_STATE_FNAME);
18876 expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
18877
18878 operator = parser->previous;
18879 receiver = UP(pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0));
18880
18881 // To push `PM_CONTEXT_DEF_PARAMS` again is for the same
18882 // reason as described the above.
18883 pm_parser_scope_push(parser, true);
18884 context_push(parser, PM_CONTEXT_DEF_PARAMS);
18885 name = parse_method_definition_name(parser);
18886 break;
18887 }
18888 default:
18889 pm_parser_scope_push(parser, true);
18890 name = parse_method_definition_name(parser);
18891 break;
18892 }
18893
18894 pm_token_t lparen = { 0 };
18895 pm_token_t rparen = { 0 };
18896 pm_parameters_node_t *params;
18897
18898 bool accept_endless_def = true;
18899 switch (parser->current.type) {
18900 case PM_TOKEN_PARENTHESIS_LEFT: {
18901 parser_lex(parser);
18902 lparen = parser->previous;
18903
18904 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18905 params = NULL;
18906 } else {
18907 // https://bugs.ruby-lang.org/issues/19107
18908 bool allow_trailing_comma = parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1;
18909 params = parse_parameters(
18910 parser,
18911 PM_BINDING_POWER_DEFINED,
18912 true,
18913 allow_trailing_comma,
18914 true,
18915 true,
18916 false,
18917 PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
18918 (uint16_t) (depth + 1)
18919 );
18920 }
18921
18922 lex_state_set(parser, PM_LEX_STATE_BEG);
18923 parser->command_start = true;
18924
18925 context_pop(parser);
18926 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18927 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
18928 parser->previous.start = parser->previous.end;
18929 parser->previous.type = 0;
18930 }
18931
18932 rparen = parser->previous;
18933 break;
18934 }
18935 case PM_CASE_PARAMETER: {
18936 // If we're about to lex a label, we need to add the label
18937 // state to make sure the next newline is ignored.
18938 if (parser->current.type == PM_TOKEN_LABEL) {
18939 lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
18940 }
18941
18942 params = parse_parameters(
18943 parser,
18944 PM_BINDING_POWER_DEFINED,
18945 false,
18946 false,
18947 true,
18948 true,
18949 false,
18950 PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
18951 (uint16_t) (depth + 1)
18952 );
18953
18954 // Reject `def * = 1` and similar. We have to specifically check
18955 // for them because they create ambiguity with optional arguments.
18956 accept_endless_def = false;
18957
18958 context_pop(parser);
18959 break;
18960 }
18961 default: {
18962 params = NULL;
18963 context_pop(parser);
18964 break;
18965 }
18966 }
18967
18968 pm_node_t *statements = NULL;
18969 pm_token_t equal = { 0 };
18970 pm_token_t end_keyword = { 0 };
18971
18972 if (accept1(parser, PM_TOKEN_EQUAL)) {
18973 if (token_is_setter_name(&name)) {
18974 pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
18975 }
18976 if (!accept_endless_def) {
18977 pm_parser_err_previous(parser, PM_ERR_DEF_ENDLESS_PARAMETERS);
18978 }
18979 if (
18982 ) {
18983 PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &def_keyword), PM_TOKENS_LENGTH(&def_keyword, &parser->previous), PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE, "endless method definition");
18984 }
18985 equal = parser->previous;
18986
18987 context_push(parser, PM_CONTEXT_DEF);
18988 pm_do_loop_stack_push(parser, false);
18989 statements = UP(pm_statements_node_create(parser));
18990
18991 bool allow_command_call;
18992 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
18993 allow_command_call = accepts_command_call;
18994 } else {
18995 // Allow `def foo = puts "Hello"` but not `private def foo = puts "Hello"`
18996 allow_command_call = binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION;
18997 }
18998
18999 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_command_call, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
19000
19001 // In an endless method definition, the body is not allowed to
19002 // be a command with a do..end block.
19003 if (PM_NODE_TYPE_P(statement, PM_CALL_NODE)) {
19004 pm_call_node_t *call = (pm_call_node_t *) statement;
19005
19006 if (call->arguments != NULL && call->block != NULL && PM_NODE_TYPE_P(call->block, PM_BLOCK_NODE)) {
19007 pm_block_node_t *block = (pm_block_node_t *) call->block;
19008
19009 if (parser->start[block->opening_loc.start] != '{') {
19010 pm_parser_err_node(parser, call->block, PM_ERR_DEF_ENDLESS_DO_BLOCK);
19011 }
19012 }
19013 }
19014
19015 if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
19016 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
19017
19018 pm_token_t rescue_keyword = parser->previous;
19019 pm_node_t *value = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
19020 context_pop(parser);
19021
19022 statement = UP(pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value));
19023 }
19024
19025 pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
19026 pm_do_loop_stack_pop(parser);
19027 context_pop(parser);
19028 } else {
19029 if (lparen.start == NULL) {
19030 lex_state_set(parser, PM_LEX_STATE_BEG);
19031 parser->command_start = true;
19032 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
19033 } else {
19034 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19035 }
19036
19037 pm_accepts_block_stack_push(parser, true);
19038 pm_do_loop_stack_push(parser, false);
19039
19040 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19041 pm_accepts_block_stack_push(parser, true);
19042 statements = UP(parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1)));
19043 pm_accepts_block_stack_pop(parser);
19044 }
19045
19046 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
19047 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19048 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1)));
19049 } else {
19050 parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
19051 }
19052
19053 pm_accepts_block_stack_pop(parser);
19054 pm_do_loop_stack_pop(parser);
19055
19056 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM, &def_keyword);
19057 end_keyword = parser->previous;
19058 }
19059
19060 pm_constant_id_list_t locals;
19061 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19062 pm_parser_scope_pop(parser);
19063
19069 pm_constant_id_t name_id = pm_parser_constant_id_raw(parser, name.start, parse_operator_symbol_name(&name));
19070
19071 flush_block_exits(parser, previous_block_exits);
19072 pm_node_list_free(&current_block_exits);
19073
19074 return UP(pm_def_node_create(
19075 parser,
19076 name_id,
19077 &name,
19078 receiver,
19079 params,
19080 statements,
19081 &locals,
19082 &def_keyword,
19083 NTOK2PTR(operator),
19084 NTOK2PTR(lparen),
19085 NTOK2PTR(rparen),
19086 NTOK2PTR(equal),
19087 NTOK2PTR(end_keyword)
19088 ));
19089 }
19090 case PM_TOKEN_KEYWORD_DEFINED: {
19091 parser_lex(parser);
19092
19093 pm_token_t keyword = parser->previous;
19094 pm_token_t lparen = { 0 };
19095 pm_token_t rparen = { 0 };
19096 pm_node_t *expression;
19097
19098 context_push(parser, PM_CONTEXT_DEFINED);
19099 bool newline = accept1(parser, PM_TOKEN_NEWLINE);
19100
19101 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19102 lparen = parser->previous;
19103
19104 if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19105 expression = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
19106 lparen = (pm_token_t) { 0 };
19107 } else {
19108 expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19109
19110 if (!parser->recovering) {
19111 accept1(parser, PM_TOKEN_NEWLINE);
19112 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19113 rparen = parser->previous;
19114 }
19115 }
19116 } else {
19117 expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19118 }
19119
19120 context_pop(parser);
19121 return UP(pm_defined_node_create(
19122 parser,
19123 NTOK2PTR(lparen),
19124 expression,
19125 NTOK2PTR(rparen),
19126 &keyword
19127 ));
19128 }
19129 case PM_TOKEN_KEYWORD_END_UPCASE: {
19130 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19131 pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
19132 }
19133
19134 parser_lex(parser);
19135 pm_token_t keyword = parser->previous;
19136
19137 if (context_def_p(parser)) {
19138 pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
19139 }
19140
19141 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
19142 pm_token_t opening = parser->previous;
19143 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
19144
19145 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM, &opening);
19146 return UP(pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
19147 }
19148 case PM_TOKEN_KEYWORD_FALSE:
19149 parser_lex(parser);
19150 return UP(pm_false_node_create(parser, &parser->previous));
19151 case PM_TOKEN_KEYWORD_FOR: {
19152 size_t opening_newline_index = token_newline_index(parser);
19153 parser_lex(parser);
19154
19155 pm_token_t for_keyword = parser->previous;
19156 pm_node_t *index;
19157
19158 context_push(parser, PM_CONTEXT_FOR_INDEX);
19159
19160 // First, parse out the first index expression.
19161 if (accept1(parser, PM_TOKEN_USTAR)) {
19162 pm_token_t star_operator = parser->previous;
19163 pm_node_t *name = NULL;
19164
19165 if (token_begins_expression_p(parser->current.type)) {
19166 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19167 }
19168
19169 index = UP(pm_splat_node_create(parser, &star_operator, name));
19170 } else if (token_begins_expression_p(parser->current.type)) {
19171 index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
19172 } else {
19173 pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
19174 index = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &for_keyword), PM_TOKEN_LENGTH(&for_keyword)));
19175 }
19176
19177 // Now, if there are multiple index expressions, parse them out.
19178 if (match1(parser, PM_TOKEN_COMMA)) {
19179 index = parse_targets(parser, index, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19180 } else {
19181 index = parse_target(parser, index, false, false);
19182 }
19183
19184 context_pop(parser);
19185 pm_do_loop_stack_push(parser, true);
19186
19187 expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
19188 pm_token_t in_keyword = parser->previous;
19189
19190 pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
19191 pm_do_loop_stack_pop(parser);
19192
19193 pm_token_t do_keyword = { 0 };
19194 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19195 do_keyword = parser->previous;
19196 } else {
19197 if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
19198 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type));
19199 }
19200 }
19201
19202 pm_statements_node_t *statements = NULL;
19203 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19204 statements = parse_statements(parser, PM_CONTEXT_FOR, (uint16_t) (depth + 1));
19205 }
19206
19207 parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
19208 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM, &for_keyword);
19209
19210 return UP(pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, NTOK2PTR(do_keyword), &parser->previous));
19211 }
19212 case PM_TOKEN_KEYWORD_IF:
19213 if (parser_end_of_line_p(parser)) {
19214 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_KEYWORD_EOL);
19215 }
19216
19217 size_t opening_newline_index = token_newline_index(parser);
19218 bool if_after_else = parser->previous.type == PM_TOKEN_KEYWORD_ELSE;
19219 parser_lex(parser);
19220
19221 return parse_conditional(parser, PM_CONTEXT_IF, opening_newline_index, if_after_else, (uint16_t) (depth + 1));
19222 case PM_TOKEN_KEYWORD_UNDEF: {
19223 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19224 pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
19225 }
19226
19227 parser_lex(parser);
19228 pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
19229 pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19230
19231 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19232 pm_node_destroy(parser, name);
19233 } else {
19234 pm_undef_node_append(undef, name);
19235
19236 while (match1(parser, PM_TOKEN_COMMA)) {
19237 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
19238 parser_lex(parser);
19239 name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19240
19241 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19242 pm_node_destroy(parser, name);
19243 break;
19244 }
19245
19246 pm_undef_node_append(undef, name);
19247 }
19248 }
19249
19250 return UP(undef);
19251 }
19252 case PM_TOKEN_KEYWORD_NOT: {
19253 parser_lex(parser);
19254
19255 pm_token_t message = parser->previous;
19256 pm_arguments_t arguments = { 0 };
19257 pm_node_t *receiver = NULL;
19258
19259 // If we do not accept a command call, then we also do not accept a
19260 // not without parentheses. In this case we need to reject this
19261 // syntax.
19262 if (!accepts_command_call && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19263 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) {
19264 pm_parser_err(parser, PM_TOKEN_END(parser, &parser->previous), 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN);
19265 } else {
19266 accept1(parser, PM_TOKEN_NEWLINE);
19267 pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER);
19268 }
19269
19270 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
19271 }
19272
19273 accept1(parser, PM_TOKEN_NEWLINE);
19274
19275 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19276 pm_token_t lparen = parser->previous;
19277
19278 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19279 receiver = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
19280 } else {
19281 arguments.opening_loc = TOK2LOC(parser, &lparen);
19282 receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19283
19284 if (!parser->recovering) {
19285 accept1(parser, PM_TOKEN_NEWLINE);
19286 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19287 arguments.closing_loc = TOK2LOC(parser, &parser->previous);
19288 }
19289 }
19290 } else {
19291 receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19292 }
19293
19294 return UP(pm_call_node_not_create(parser, receiver, &message, &arguments));
19295 }
19296 case PM_TOKEN_KEYWORD_UNLESS: {
19297 size_t opening_newline_index = token_newline_index(parser);
19298 parser_lex(parser);
19299
19300 return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
19301 }
19302 case PM_TOKEN_KEYWORD_MODULE: {
19303 pm_node_list_t current_block_exits = { 0 };
19304 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19305
19306 size_t opening_newline_index = token_newline_index(parser);
19307 parser_lex(parser);
19308 pm_token_t module_keyword = parser->previous;
19309
19310 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
19311 pm_token_t name;
19312
19313 // If we can recover from a syntax error that occurred while parsing
19314 // the name of the module, then we'll handle that here.
19315 if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) {
19316 pop_block_exits(parser, previous_block_exits);
19317 pm_node_list_free(&current_block_exits);
19318
19319 pm_token_t missing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
19320 return UP(pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing));
19321 }
19322
19323 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
19324 pm_token_t double_colon = parser->previous;
19325
19326 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
19327 constant_path = UP(pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous));
19328 }
19329
19330 // Here we retrieve the name of the module. If it wasn't a constant,
19331 // then it's possible that `module foo` was passed, which is a
19332 // syntax error. We handle that here as well.
19333 name = parser->previous;
19334 if (name.type != PM_TOKEN_CONSTANT) {
19335 pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
19336 }
19337
19338 pm_parser_scope_push(parser, true);
19339 accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
19340 pm_node_t *statements = NULL;
19341
19342 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19343 pm_accepts_block_stack_push(parser, true);
19344 statements = UP(parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1)));
19345 pm_accepts_block_stack_pop(parser);
19346 }
19347
19348 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
19349 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19350 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1)));
19351 } else {
19352 parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
19353 }
19354
19355 pm_constant_id_list_t locals;
19356 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19357
19358 pm_parser_scope_pop(parser);
19359 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM, &module_keyword);
19360
19361 if (context_def_p(parser)) {
19362 pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
19363 }
19364
19365 pop_block_exits(parser, previous_block_exits);
19366 pm_node_list_free(&current_block_exits);
19367
19368 return UP(pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous));
19369 }
19370 case PM_TOKEN_KEYWORD_NIL:
19371 parser_lex(parser);
19372 return UP(pm_nil_node_create(parser, &parser->previous));
19373 case PM_TOKEN_KEYWORD_REDO: {
19374 parser_lex(parser);
19375
19376 pm_node_t *node = UP(pm_redo_node_create(parser, &parser->previous));
19377 if (!parser->partial_script) parse_block_exit(parser, node);
19378
19379 return node;
19380 }
19381 case PM_TOKEN_KEYWORD_RETRY: {
19382 parser_lex(parser);
19383
19384 pm_node_t *node = UP(pm_retry_node_create(parser, &parser->previous));
19385 parse_retry(parser, node);
19386
19387 return node;
19388 }
19389 case PM_TOKEN_KEYWORD_SELF:
19390 parser_lex(parser);
19391 return UP(pm_self_node_create(parser, &parser->previous));
19392 case PM_TOKEN_KEYWORD_TRUE:
19393 parser_lex(parser);
19394 return UP(pm_true_node_create(parser, &parser->previous));
19395 case PM_TOKEN_KEYWORD_UNTIL: {
19396 size_t opening_newline_index = token_newline_index(parser);
19397
19398 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19399 pm_do_loop_stack_push(parser, true);
19400
19401 parser_lex(parser);
19402 pm_token_t keyword = parser->previous;
19403 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
19404
19405 pm_do_loop_stack_pop(parser);
19406 context_pop(parser);
19407
19408 pm_token_t do_keyword = { 0 };
19409 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19410 do_keyword = parser->previous;
19411 } else {
19412 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19413 }
19414
19415 pm_statements_node_t *statements = NULL;
19416 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19417 pm_accepts_block_stack_push(parser, true);
19418 statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
19419 pm_accepts_block_stack_pop(parser);
19420 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19421 }
19422
19423 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19424 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM, &keyword);
19425
19426 return UP(pm_until_node_create(parser, &keyword, NTOK2PTR(do_keyword), &parser->previous, predicate, statements, 0));
19427 }
19428 case PM_TOKEN_KEYWORD_WHILE: {
19429 size_t opening_newline_index = token_newline_index(parser);
19430
19431 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19432 pm_do_loop_stack_push(parser, true);
19433
19434 parser_lex(parser);
19435 pm_token_t keyword = parser->previous;
19436 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
19437
19438 pm_do_loop_stack_pop(parser);
19439 context_pop(parser);
19440
19441 pm_token_t do_keyword = { 0 };
19442 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19443 do_keyword = parser->previous;
19444 } else {
19445 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19446 }
19447
19448 pm_statements_node_t *statements = NULL;
19449 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19450 pm_accepts_block_stack_push(parser, true);
19451 statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
19452 pm_accepts_block_stack_pop(parser);
19453 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19454 }
19455
19456 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19457 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM, &keyword);
19458
19459 return UP(pm_while_node_create(parser, &keyword, NTOK2PTR(do_keyword), &parser->previous, predicate, statements, 0));
19460 }
19461 case PM_TOKEN_PERCENT_LOWER_I: {
19462 parser_lex(parser);
19463 pm_token_t opening = parser->previous;
19464 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19465 pm_node_t *current = NULL;
19466
19467 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19468 accept1(parser, PM_TOKEN_WORDS_SEP);
19469 if (match1(parser, PM_TOKEN_STRING_END)) break;
19470
19471 // Interpolation is not possible but nested heredocs can still lead to
19472 // consecutive (disjoint) string tokens when the final newline is escaped.
19473 while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19474 // Record the string node, moving to interpolation if needed.
19475 if (current == NULL) {
19476 current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL));
19477 parser_lex(parser);
19478 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19479 pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
19480 parser_lex(parser);
19481 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
19482 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19483 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
19484 pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = parser->start + cast->value_loc.start, .end = parser->start + cast->value_loc.start + cast->value_loc.length };
19485 pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped));
19486 pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL));
19487 parser_lex(parser);
19488
19489 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
19490 pm_interpolated_symbol_node_append(interpolated, first_string);
19491 pm_interpolated_symbol_node_append(interpolated, second_string);
19492
19493 xfree_sized(current, sizeof(pm_symbol_node_t));
19494 current = UP(interpolated);
19495 } else {
19496 assert(false && "unreachable");
19497 }
19498 }
19499
19500 if (current) {
19501 pm_array_node_elements_append(array, current);
19502 current = NULL;
19503 } else {
19504 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
19505 }
19506 }
19507
19508 pm_token_t closing = parser->current;
19509 if (match1(parser, PM_TOKEN_EOF)) {
19510 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
19511 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
19512 } else {
19513 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
19514 }
19515 pm_array_node_close_set(parser, array, &closing);
19516
19517 return UP(array);
19518 }
19519 case PM_TOKEN_PERCENT_UPPER_I: {
19520 parser_lex(parser);
19521 pm_token_t opening = parser->previous;
19522 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19523
19524 // This is the current node that we are parsing that will be added to the
19525 // list of elements.
19526 pm_node_t *current = NULL;
19527
19528 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19529 switch (parser->current.type) {
19530 case PM_TOKEN_WORDS_SEP: {
19531 if (current == NULL) {
19532 // If we hit a separator before we have any content, then we don't
19533 // need to do anything.
19534 } else {
19535 // If we hit a separator after we've hit content, then we need to
19536 // append that content to the list and reset the current node.
19537 pm_array_node_elements_append(array, current);
19538 current = NULL;
19539 }
19540
19541 parser_lex(parser);
19542 break;
19543 }
19544 case PM_TOKEN_STRING_CONTENT: {
19545 if (current == NULL) {
19546 // If we hit content and the current node is NULL, then this is
19547 // the first string content we've seen. In that case we're going
19548 // to create a new string node and set that to the current.
19549 current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL));
19550 parser_lex(parser);
19551 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19552 // If we hit string content and the current node is an
19553 // interpolated string, then we need to append the string content
19554 // to the list of child nodes.
19555 pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
19556 parser_lex(parser);
19557
19558 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
19559 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19560 // If we hit string content and the current node is a symbol node,
19561 // then we need to convert the current node into an interpolated
19562 // string and add the string content to the list of child nodes.
19563 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
19564 pm_token_t content = {
19565 .type = PM_TOKEN_STRING_CONTENT,
19566 .start = parser->start + cast->value_loc.start,
19567 .end = parser->start + cast->value_loc.start + cast->value_loc.length
19568 };
19569
19570 pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped));
19571 pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL));
19572 parser_lex(parser);
19573
19574 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
19575 pm_interpolated_symbol_node_append(interpolated, first_string);
19576 pm_interpolated_symbol_node_append(interpolated, second_string);
19577
19578 xfree_sized(current, sizeof(pm_symbol_node_t));
19579 current = UP(interpolated);
19580 } else {
19581 assert(false && "unreachable");
19582 }
19583
19584 break;
19585 }
19586 case PM_TOKEN_EMBVAR: {
19587 bool start_location_set = false;
19588 if (current == NULL) {
19589 // If we hit an embedded variable and the current node is NULL,
19590 // then this is the start of a new string. We'll set the current
19591 // node to a new interpolated string.
19592 current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL));
19593 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19594 // If we hit an embedded variable and the current node is a string
19595 // node, then we'll convert the current into an interpolated
19596 // string and add the string node to the list of parts.
19597 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
19598
19599 current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
19600 pm_interpolated_symbol_node_append(interpolated, current);
19601 PM_NODE_START_SET_NODE(interpolated, current);
19602 start_location_set = true;
19603 current = UP(interpolated);
19604 } else {
19605 // If we hit an embedded variable and the current node is an
19606 // interpolated string, then we'll just add the embedded variable.
19607 }
19608
19609 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19610 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
19611 if (!start_location_set) {
19612 PM_NODE_START_SET_NODE(current, part);
19613 }
19614 break;
19615 }
19616 case PM_TOKEN_EMBEXPR_BEGIN: {
19617 bool start_location_set = false;
19618 if (current == NULL) {
19619 // If we hit an embedded expression and the current node is NULL,
19620 // then this is the start of a new string. We'll set the current
19621 // node to a new interpolated string.
19622 current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL));
19623 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19624 // If we hit an embedded expression and the current node is a
19625 // string node, then we'll convert the current into an
19626 // interpolated string and add the string node to the list of
19627 // parts.
19628 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
19629
19630 current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
19631 pm_interpolated_symbol_node_append(interpolated, current);
19632 PM_NODE_START_SET_NODE(interpolated, current);
19633 start_location_set = true;
19634 current = UP(interpolated);
19635 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19636 // If we hit an embedded expression and the current node is an
19637 // interpolated string, then we'll just continue on.
19638 } else {
19639 assert(false && "unreachable");
19640 }
19641
19642 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19643 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
19644 if (!start_location_set) {
19645 PM_NODE_START_SET_NODE(current, part);
19646 }
19647 break;
19648 }
19649 default:
19650 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
19651 parser_lex(parser);
19652 break;
19653 }
19654 }
19655
19656 // If we have a current node, then we need to append it to the list.
19657 if (current) {
19658 pm_array_node_elements_append(array, current);
19659 }
19660
19661 pm_token_t closing = parser->current;
19662 if (match1(parser, PM_TOKEN_EOF)) {
19663 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
19664 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
19665 } else {
19666 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
19667 }
19668 pm_array_node_close_set(parser, array, &closing);
19669
19670 return UP(array);
19671 }
19672 case PM_TOKEN_PERCENT_LOWER_W: {
19673 parser_lex(parser);
19674 pm_token_t opening = parser->previous;
19675 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19676 pm_node_t *current = NULL;
19677
19678 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19679 accept1(parser, PM_TOKEN_WORDS_SEP);
19680 if (match1(parser, PM_TOKEN_STRING_END)) break;
19681
19682 // Interpolation is not possible but nested heredocs can still lead to
19683 // consecutive (disjoint) string tokens when the final newline is escaped.
19684 while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19685 pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
19686
19687 // Record the string node, moving to interpolation if needed.
19688 if (current == NULL) {
19689 current = string;
19690 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
19691 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
19692 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19693 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
19694 pm_interpolated_string_node_append(interpolated, current);
19695 pm_interpolated_string_node_append(interpolated, string);
19696 current = UP(interpolated);
19697 } else {
19698 assert(false && "unreachable");
19699 }
19700 parser_lex(parser);
19701 }
19702
19703 if (current) {
19704 pm_array_node_elements_append(array, current);
19705 current = NULL;
19706 } else {
19707 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
19708 }
19709 }
19710
19711 pm_token_t closing = parser->current;
19712 if (match1(parser, PM_TOKEN_EOF)) {
19713 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
19714 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
19715 } else {
19716 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
19717 }
19718
19719 pm_array_node_close_set(parser, array, &closing);
19720 return UP(array);
19721 }
19722 case PM_TOKEN_PERCENT_UPPER_W: {
19723 parser_lex(parser);
19724 pm_token_t opening = parser->previous;
19725 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19726
19727 // This is the current node that we are parsing that will be added
19728 // to the list of elements.
19729 pm_node_t *current = NULL;
19730
19731 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19732 switch (parser->current.type) {
19733 case PM_TOKEN_WORDS_SEP: {
19734 // Reset the explicit encoding if we hit a separator
19735 // since each element can have its own encoding.
19736 parser->explicit_encoding = NULL;
19737
19738 if (current == NULL) {
19739 // If we hit a separator before we have any content,
19740 // then we don't need to do anything.
19741 } else {
19742 // If we hit a separator after we've hit content,
19743 // then we need to append that content to the list
19744 // and reset the current node.
19745 pm_array_node_elements_append(array, current);
19746 current = NULL;
19747 }
19748
19749 parser_lex(parser);
19750 break;
19751 }
19752 case PM_TOKEN_STRING_CONTENT: {
19753 pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
19754 pm_node_flag_set(string, parse_unescaped_encoding(parser));
19755 parser_lex(parser);
19756
19757 if (current == NULL) {
19758 // If we hit content and the current node is NULL,
19759 // then this is the first string content we've seen.
19760 // In that case we're going to create a new string
19761 // node and set that to the current.
19762 current = string;
19763 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
19764 // If we hit string content and the current node is
19765 // an interpolated string, then we need to append
19766 // the string content to the list of child nodes.
19767 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
19768 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19769 // If we hit string content and the current node is
19770 // a string node, then we need to convert the
19771 // current node into an interpolated string and add
19772 // the string content to the list of child nodes.
19773 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
19774 pm_interpolated_string_node_append(interpolated, current);
19775 pm_interpolated_string_node_append(interpolated, string);
19776 current = UP(interpolated);
19777 } else {
19778 assert(false && "unreachable");
19779 }
19780
19781 break;
19782 }
19783 case PM_TOKEN_EMBVAR: {
19784 if (current == NULL) {
19785 // If we hit an embedded variable and the current
19786 // node is NULL, then this is the start of a new
19787 // string. We'll set the current node to a new
19788 // interpolated string.
19789 current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL));
19790 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19791 // If we hit an embedded variable and the current
19792 // node is a string node, then we'll convert the
19793 // current into an interpolated string and add the
19794 // string node to the list of parts.
19795 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
19796 pm_interpolated_string_node_append(interpolated, current);
19797 current = UP(interpolated);
19798 } else {
19799 // If we hit an embedded variable and the current
19800 // node is an interpolated string, then we'll just
19801 // add the embedded variable.
19802 }
19803
19804 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19805 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
19806 break;
19807 }
19808 case PM_TOKEN_EMBEXPR_BEGIN: {
19809 if (current == NULL) {
19810 // If we hit an embedded expression and the current
19811 // node is NULL, then this is the start of a new
19812 // string. We'll set the current node to a new
19813 // interpolated string.
19814 current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL));
19815 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19816 // If we hit an embedded expression and the current
19817 // node is a string node, then we'll convert the
19818 // current into an interpolated string and add the
19819 // string node to the list of parts.
19820 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
19821 pm_interpolated_string_node_append(interpolated, current);
19822 current = UP(interpolated);
19823 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
19824 // If we hit an embedded expression and the current
19825 // node is an interpolated string, then we'll just
19826 // continue on.
19827 } else {
19828 assert(false && "unreachable");
19829 }
19830
19831 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19832 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
19833 break;
19834 }
19835 default:
19836 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
19837 parser_lex(parser);
19838 break;
19839 }
19840 }
19841
19842 // If we have a current node, then we need to append it to the list.
19843 if (current) {
19844 pm_array_node_elements_append(array, current);
19845 }
19846
19847 pm_token_t closing = parser->current;
19848 if (match1(parser, PM_TOKEN_EOF)) {
19849 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
19850 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
19851 } else {
19852 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
19853 }
19854
19855 pm_array_node_close_set(parser, array, &closing);
19856 return UP(array);
19857 }
19858 case PM_TOKEN_REGEXP_BEGIN: {
19859 pm_token_t opening = parser->current;
19860 parser_lex(parser);
19861
19862 if (match1(parser, PM_TOKEN_REGEXP_END)) {
19863 // If we get here, then we have an end immediately after a start. In
19864 // that case we'll create an empty content token and return an
19865 // uninterpolated regular expression.
19866 pm_token_t content = (pm_token_t) {
19867 .type = PM_TOKEN_STRING_CONTENT,
19868 .start = parser->previous.end,
19869 .end = parser->previous.end
19870 };
19871
19872 parser_lex(parser);
19873
19874 pm_node_t *node = UP(pm_regular_expression_node_create(parser, &opening, &content, &parser->previous));
19875 pm_node_flag_set(node, PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING);
19876
19877 return node;
19878 }
19879
19881
19882 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19883 // In this case we've hit string content so we know the regular
19884 // expression at least has something in it. We'll need to check if the
19885 // following token is the end (in which case we can return a plain
19886 // regular expression) or if it's not then it has interpolation.
19887 pm_string_t unescaped = parser->current_string;
19888 pm_token_t content = parser->current;
19889 bool ascii_only = parser->current_regular_expression_ascii_only;
19890 parser_lex(parser);
19891
19892 // If we hit an end, then we can create a regular expression
19893 // node without interpolation, which can be represented more
19894 // succinctly and more easily compiled.
19895 if (accept1(parser, PM_TOKEN_REGEXP_END)) {
19896 pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
19897
19898 // If we're not immediately followed by a =~, then we want
19899 // to parse all of the errors at this point. If it is
19900 // followed by a =~, then it will get parsed higher up while
19901 // parsing the named captures as well.
19902 if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
19903 parse_regular_expression_errors(parser, node);
19904 }
19905
19906 pm_node_flag_set(UP(node), parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, FL(node)));
19907 return UP(node);
19908 }
19909
19910 // If we get here, then we have interpolation so we'll need to create
19911 // a regular expression node with interpolation.
19912 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
19913
19914 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
19915 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
19916 // This is extremely strange, but the first string part of a
19917 // regular expression will always be tagged as binary if we
19918 // are in a US-ASCII file, no matter its contents.
19919 pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
19920 }
19921
19922 pm_interpolated_regular_expression_node_append(interpolated, part);
19923 } else {
19924 // If the first part of the body of the regular expression is not a
19925 // string content, then we have interpolation and we need to create an
19926 // interpolated regular expression node.
19927 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
19928 }
19929
19930 // Now that we're here and we have interpolation, we'll parse all of the
19931 // parts into the list.
19932 pm_node_t *part;
19933 while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
19934 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
19935 pm_interpolated_regular_expression_node_append(interpolated, part);
19936 }
19937 }
19938
19939 pm_token_t closing = parser->current;
19940 if (match1(parser, PM_TOKEN_EOF)) {
19941 pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
19942 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
19943 } else {
19944 expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
19945 }
19946
19947 pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
19948 return UP(interpolated);
19949 }
19950 case PM_TOKEN_BACKTICK:
19951 case PM_TOKEN_PERCENT_LOWER_X: {
19952 parser_lex(parser);
19953 pm_token_t opening = parser->previous;
19954
19955 // When we get here, we don't know if this string is going to have
19956 // interpolation or not, even though it is allowed. Still, we want to be
19957 // able to return a string node without interpolation if we can since
19958 // it'll be faster.
19959 if (match1(parser, PM_TOKEN_STRING_END)) {
19960 // If we get here, then we have an end immediately after a start. In
19961 // that case we'll create an empty content token and return an
19962 // uninterpolated string.
19963 pm_token_t content = (pm_token_t) {
19964 .type = PM_TOKEN_STRING_CONTENT,
19965 .start = parser->previous.end,
19966 .end = parser->previous.end
19967 };
19968
19969 parser_lex(parser);
19970 return UP(pm_xstring_node_create(parser, &opening, &content, &parser->previous));
19971 }
19972
19974
19975 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19976 // In this case we've hit string content so we know the string
19977 // at least has something in it. We'll need to check if the
19978 // following token is the end (in which case we can return a
19979 // plain string) or if it's not then it has interpolation.
19980 pm_string_t unescaped = parser->current_string;
19981 pm_token_t content = parser->current;
19982 parser_lex(parser);
19983
19984 if (match1(parser, PM_TOKEN_STRING_END)) {
19985 pm_node_t *node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
19986 pm_node_flag_set(node, parse_unescaped_encoding(parser));
19987 parser_lex(parser);
19988 return node;
19989 }
19990
19991 // If we get here, then we have interpolation so we'll need to
19992 // create a string node with interpolation.
19993 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
19994
19995 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
19996 pm_node_flag_set(part, parse_unescaped_encoding(parser));
19997
19998 pm_interpolated_xstring_node_append(node, part);
19999 } else {
20000 // If the first part of the body of the string is not a string
20001 // content, then we have interpolation and we need to create an
20002 // interpolated string node.
20003 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20004 }
20005
20006 pm_node_t *part;
20007 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20008 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20009 pm_interpolated_xstring_node_append(node, part);
20010 }
20011 }
20012
20013 pm_token_t closing = parser->current;
20014 if (match1(parser, PM_TOKEN_EOF)) {
20015 pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
20016 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
20017 } else {
20018 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
20019 }
20020 pm_interpolated_xstring_node_closing_set(parser, node, &closing);
20021
20022 return UP(node);
20023 }
20024 case PM_TOKEN_USTAR: {
20025 parser_lex(parser);
20026
20027 // * operators at the beginning of expressions are only valid in the
20028 // context of a multiple assignment. We enforce that here. We'll
20029 // still lex past it though and create a missing node place.
20030 if (binding_power != PM_BINDING_POWER_STATEMENT) {
20031 pm_parser_err_prefix(parser, diag_id);
20032 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
20033 }
20034
20035 pm_token_t operator = parser->previous;
20036 pm_node_t *name = NULL;
20037
20038 if (token_begins_expression_p(parser->current.type)) {
20039 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
20040 }
20041
20042 pm_node_t *splat = UP(pm_splat_node_create(parser, &operator, name));
20043
20044 if (match1(parser, PM_TOKEN_COMMA)) {
20045 return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
20046 } else {
20047 return parse_target_validate(parser, splat, true);
20048 }
20049 }
20050 case PM_TOKEN_BANG: {
20051 if (binding_power > PM_BINDING_POWER_UNARY) {
20052 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20053 }
20054
20055 parser_lex(parser);
20056
20057 pm_token_t operator = parser->previous;
20058 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20059 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
20060
20061 pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
20062 return UP(node);
20063 }
20064 case PM_TOKEN_TILDE: {
20065 if (binding_power > PM_BINDING_POWER_UNARY) {
20066 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20067 }
20068 parser_lex(parser);
20069
20070 pm_token_t operator = parser->previous;
20071 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20072 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
20073
20074 return UP(node);
20075 }
20076 case PM_TOKEN_UMINUS: {
20077 if (binding_power > PM_BINDING_POWER_UNARY) {
20078 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20079 }
20080 parser_lex(parser);
20081
20082 pm_token_t operator = parser->previous;
20083 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20084 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
20085
20086 return UP(node);
20087 }
20088 case PM_TOKEN_UMINUS_NUM: {
20089 parser_lex(parser);
20090
20091 pm_token_t operator = parser->previous;
20092 pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20093
20094 if (accept1(parser, PM_TOKEN_STAR_STAR)) {
20095 pm_token_t exponent_operator = parser->previous;
20096 pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
20097 node = UP(pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0));
20098 node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
20099 } else {
20100 switch (PM_NODE_TYPE(node)) {
20101 case PM_INTEGER_NODE:
20102 case PM_FLOAT_NODE:
20103 case PM_RATIONAL_NODE:
20104 case PM_IMAGINARY_NODE:
20105 parse_negative_numeric(node);
20106 break;
20107 default:
20108 node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
20109 break;
20110 }
20111 }
20112
20113 return node;
20114 }
20115 case PM_TOKEN_MINUS_GREATER: {
20116 int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting;
20118
20119 size_t opening_newline_index = token_newline_index(parser);
20120 pm_accepts_block_stack_push(parser, true);
20121 parser_lex(parser);
20122
20123 pm_token_t operator = parser->previous;
20124 pm_parser_scope_push(parser, false);
20125
20126 pm_block_parameters_node_t *block_parameters;
20127
20128 switch (parser->current.type) {
20129 case PM_TOKEN_PARENTHESIS_LEFT: {
20130 pm_token_t opening = parser->current;
20131 parser_lex(parser);
20132
20133 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
20134 block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
20135 } else {
20136 block_parameters = parse_block_parameters(parser, false, &opening, true, true, (uint16_t) (depth + 1));
20137 }
20138
20139 accept1(parser, PM_TOKEN_NEWLINE);
20140 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
20141
20142 pm_block_parameters_node_closing_set(parser, block_parameters, &parser->previous);
20143 break;
20144 }
20145 case PM_CASE_PARAMETER: {
20146 pm_accepts_block_stack_push(parser, false);
20147 block_parameters = parse_block_parameters(parser, false, NULL, true, false, (uint16_t) (depth + 1));
20148 pm_accepts_block_stack_pop(parser);
20149 break;
20150 }
20151 default: {
20152 block_parameters = NULL;
20153 break;
20154 }
20155 }
20156
20157 pm_token_t opening;
20158 pm_node_t *body = NULL;
20159 parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
20160
20161 if (accept1(parser, PM_TOKEN_LAMBDA_BEGIN)) {
20162 opening = parser->previous;
20163
20164 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
20165 body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1)));
20166 }
20167
20168 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20169 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE, &opening);
20170 } else {
20171 expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
20172 opening = parser->previous;
20173
20174 if (!match3(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20175 pm_accepts_block_stack_push(parser, true);
20176 body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1)));
20177 pm_accepts_block_stack_pop(parser);
20178 }
20179
20180 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20181 assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
20182 body = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1)));
20183 } else {
20184 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20185 }
20186
20187 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END, &operator);
20188 }
20189
20190 pm_constant_id_list_t locals;
20191 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
20192 pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &operator, &parser->previous);
20193
20194 pm_parser_scope_pop(parser);
20195 pm_accepts_block_stack_pop(parser);
20196
20197 return UP(pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body));
20198 }
20199 case PM_TOKEN_UPLUS: {
20200 if (binding_power > PM_BINDING_POWER_UNARY) {
20201 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20202 }
20203 parser_lex(parser);
20204
20205 pm_token_t operator = parser->previous;
20206 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20207 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
20208
20209 return UP(node);
20210 }
20211 case PM_TOKEN_STRING_BEGIN:
20212 return parse_strings(parser, NULL, accepts_label, (uint16_t) (depth + 1));
20213 case PM_TOKEN_SYMBOL_BEGIN: {
20214 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
20215 parser_lex(parser);
20216
20217 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END, (uint16_t) (depth + 1));
20218 }
20219 default: {
20220 pm_context_t recoverable = context_recoverable(parser, &parser->current);
20221
20222 if (recoverable != PM_CONTEXT_NONE) {
20223 parser->recovering = true;
20224
20225 // If the given error is not the generic one, then we'll add it
20226 // here because it will provide more context in addition to the
20227 // recoverable error that we will also add.
20228 if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
20229 pm_parser_err_prefix(parser, diag_id);
20230 }
20231
20232 // If we get here, then we are assuming this token is closing a
20233 // parent context, so we'll indicate that to the user so that
20234 // they know how we behaved.
20235 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
20236 } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
20237 // We're going to make a special case here, because "cannot
20238 // parse expression" is pretty generic, and we know here that we
20239 // have an unexpected token.
20240 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
20241 } else {
20242 pm_parser_err_prefix(parser, diag_id);
20243 }
20244
20245 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
20246 }
20247 }
20248}
20249
20259static pm_node_t *
20260parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20261 pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, false, diag_id, (uint16_t) (depth + 1));
20262
20263 // Contradicting binding powers, the right-hand-side value of the assignment
20264 // allows the `rescue` modifier.
20265 if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20266 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20267
20268 pm_token_t rescue = parser->current;
20269 parser_lex(parser);
20270
20271 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20272 context_pop(parser);
20273
20274 return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
20275 }
20276
20277 return value;
20278}
20279
20284static void
20285parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
20286 switch (PM_NODE_TYPE(node)) {
20287 case PM_BEGIN_NODE: {
20288 const pm_begin_node_t *cast = (const pm_begin_node_t *) node;
20289 if (cast->statements != NULL) parse_assignment_value_local(parser, (const pm_node_t *) cast->statements);
20290 break;
20291 }
20292 case PM_LOCAL_VARIABLE_WRITE_NODE: {
20294 pm_locals_read(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
20295 break;
20296 }
20297 case PM_PARENTHESES_NODE: {
20298 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
20299 if (cast->body != NULL) parse_assignment_value_local(parser, cast->body);
20300 break;
20301 }
20302 case PM_STATEMENTS_NODE: {
20303 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
20304 const pm_node_t *statement;
20305
20306 PM_NODE_LIST_FOREACH(&cast->body, index, statement) {
20307 parse_assignment_value_local(parser, statement);
20308 }
20309 break;
20310 }
20311 default:
20312 break;
20313 }
20314}
20315
20328static pm_node_t *
20329parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20330 bool permitted = true;
20331 if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
20332
20333 pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MODIFIER, diag_id, (uint16_t) (depth + 1));
20334 if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
20335
20336 parse_assignment_value_local(parser, value);
20337 bool single_value = true;
20338
20339 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
20340 single_value = false;
20341
20342 pm_array_node_t *array = pm_array_node_create(parser, NULL);
20343 pm_array_node_elements_append(array, value);
20344 value = UP(array);
20345
20346 while (accept1(parser, PM_TOKEN_COMMA)) {
20347 pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
20348
20349 pm_array_node_elements_append(array, element);
20350 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
20351
20352 parse_assignment_value_local(parser, element);
20353 }
20354 }
20355
20356 // Contradicting binding powers, the right-hand-side value of the assignment
20357 // allows the `rescue` modifier.
20358 if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20359 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20360
20361 pm_token_t rescue = parser->current;
20362 parser_lex(parser);
20363
20364 bool accepts_command_call_inner = false;
20365
20366 // RHS can accept command call iff the value is a call with arguments
20367 // but without parenthesis.
20368 if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
20369 pm_call_node_t *call_node = (pm_call_node_t *) value;
20370 if ((call_node->arguments != NULL) && (call_node->opening_loc.length == 0)) {
20371 accepts_command_call_inner = true;
20372 }
20373 }
20374
20375 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20376 context_pop(parser);
20377
20378 return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
20379 }
20380
20381 return value;
20382}
20383
20391static void
20392parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
20393 if (call_node->arguments != NULL) {
20394 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
20395 pm_node_unreference(parser, UP(call_node->arguments));
20396 pm_node_destroy(parser, UP(call_node->arguments));
20397 call_node->arguments = NULL;
20398 }
20399
20400 if (call_node->block != NULL) {
20401 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
20402 pm_node_unreference(parser, UP(call_node->block));
20403 pm_node_destroy(parser, UP(call_node->block));
20404 call_node->block = NULL;
20405 }
20406}
20407
20432
20433static inline const uint8_t *
20434pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20435 cursor++;
20436
20437 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20438 uint8_t value = escape_hexadecimal_digit(*cursor);
20439 cursor++;
20440
20441 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20442 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(*cursor));
20443 cursor++;
20444 }
20445
20446 pm_buffer_append_byte(unescaped, value);
20447 } else {
20448 pm_buffer_append_string(unescaped, "\\x", 2);
20449 }
20450
20451 return cursor;
20452}
20453
20454static inline const uint8_t *
20455pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20456 uint8_t value = (uint8_t) (*cursor - '0');
20457 cursor++;
20458
20459 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20460 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20461 cursor++;
20462
20463 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20464 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20465 cursor++;
20466 }
20467 }
20468
20469 pm_buffer_append_byte(unescaped, value);
20470 return cursor;
20471}
20472
20473static inline const uint8_t *
20474pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end, const pm_location_t *error_location) {
20475 const uint8_t *start = cursor - 1;
20476 cursor++;
20477
20478 if (cursor >= end) {
20479 pm_buffer_append_string(unescaped, "\\u", 2);
20480 return cursor;
20481 }
20482
20483 if (*cursor != '{') {
20484 size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
20485 uint32_t value = escape_unicode(parser, cursor, length, error_location);
20486
20487 if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
20488 pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start));
20489 }
20490
20491 return cursor + length;
20492 }
20493
20494 cursor++;
20495 for (;;) {
20496 while (cursor < end && *cursor == ' ') cursor++;
20497
20498 if (cursor >= end) break;
20499 if (*cursor == '}') {
20500 cursor++;
20501 break;
20502 }
20503
20504 size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor);
20505 if (length == 0) {
20506 break;
20507 }
20508 uint32_t value = escape_unicode(parser, cursor, length, error_location);
20509
20510 (void) pm_buffer_append_unicode_codepoint(unescaped, value);
20511 cursor += length;
20512 }
20513
20514 return cursor;
20515}
20516
20517static void
20518pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor, const pm_location_t *error_location) {
20519 const uint8_t *end = source + length;
20520 pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source));
20521
20522 for (;;) {
20523 if (++cursor >= end) {
20524 pm_buffer_append_byte(unescaped, '\\');
20525 return;
20526 }
20527
20528 switch (*cursor) {
20529 case 'x':
20530 cursor = pm_named_capture_escape_hex(unescaped, cursor, end);
20531 break;
20532 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
20533 cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
20534 break;
20535 case 'u':
20536 cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end, error_location);
20537 break;
20538 default:
20539 pm_buffer_append_byte(unescaped, '\\');
20540 break;
20541 }
20542
20543 const uint8_t *next_cursor = pm_memchr(cursor, '\\', (size_t) (end - cursor), parser->encoding_changed, parser->encoding);
20544 if (next_cursor == NULL) break;
20545
20546 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (next_cursor - cursor));
20547 cursor = next_cursor;
20548 }
20549
20550 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (end - cursor));
20551}
20552
20557static void
20558parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
20560
20561 pm_parser_t *parser = callback_data->parser;
20562 pm_call_node_t *call = callback_data->call;
20563 pm_constant_id_list_t *names = &callback_data->names;
20564
20565 const uint8_t *source = pm_string_source(capture);
20566 size_t length = pm_string_length(capture);
20567 pm_buffer_t unescaped = { 0 };
20568
20569 // First, we need to handle escapes within the name of the capture group.
20570 // This is because regular expressions have three different representations
20571 // in prism. The first is the plain source code. The second is the
20572 // representation that will be sent to the regular expression engine, which
20573 // is the value of the "unescaped" field. This is poorly named, because it
20574 // actually still contains escapes, just a subset of them that the regular
20575 // expression engine knows how to handle. The third representation is fully
20576 // unescaped, which is what we need.
20577 const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding);
20578 if (PRISM_UNLIKELY(cursor != NULL)) {
20579 pm_named_capture_escape(parser, &unescaped, source, length, cursor, callback_data->shared ? NULL : &call->receiver->location);
20580 source = (const uint8_t *) pm_buffer_value(&unescaped);
20581 length = pm_buffer_length(&unescaped);
20582 }
20583
20584 const uint8_t *start;
20585 const uint8_t *end;
20586 pm_constant_id_t name;
20587
20588 // If the name of the capture group isn't a valid identifier, we do
20589 // not add it to the local table.
20590 if (!pm_slice_is_valid_local(parser, source, source + length)) {
20591 pm_buffer_free(&unescaped);
20592 return;
20593 }
20594
20595 if (callback_data->shared) {
20596 // If the unescaped string is a slice of the source, then we can
20597 // copy the names directly. The pointers will line up.
20598 start = source;
20599 end = source + length;
20600 name = pm_parser_constant_id_raw(parser, start, end);
20601 } else {
20602 // Otherwise, the name is a slice of the malloc-ed owned string,
20603 // in which case we need to copy it out into a new string.
20604 start = parser->start + PM_NODE_START(call->receiver);
20605 end = parser->start + PM_NODE_END(call->receiver);
20606
20607 void *memory = xmalloc(length);
20608 if (memory == NULL) abort();
20609
20610 memcpy(memory, source, length);
20611 name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
20612 }
20613
20614 // Add this name to the list of constants if it is valid, not duplicated,
20615 // and not a keyword.
20616 if (name != 0 && !pm_constant_id_list_includes(names, name)) {
20617 pm_constant_id_list_append(names, name);
20618
20619 int depth;
20620 if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
20621 // If the local is not already a local but it is a keyword, then we
20622 // do not want to add a capture for this.
20623 if (pm_local_is_keyword((const char *) source, length)) {
20624 pm_buffer_free(&unescaped);
20625 return;
20626 }
20627
20628 // If the identifier is not already a local, then we will add it to
20629 // the local table.
20630 pm_parser_local_add(parser, name, start, end, 0);
20631 }
20632
20633 // Here we lazily create the MatchWriteNode since we know we're
20634 // about to add a target.
20635 if (callback_data->match == NULL) {
20636 callback_data->match = pm_match_write_node_create(parser, call);
20637 }
20638
20639 // Next, create the local variable target and add it to the list of
20640 // targets for the match.
20641 pm_node_t *target = UP(pm_local_variable_target_node_create(parser, &TOK2LOC(parser, &((pm_token_t) { .type = 0, .start = start, .end = end })), name, depth == -1 ? 0 : (uint32_t) depth));
20642 pm_node_list_append(&callback_data->match->targets, target);
20643 }
20644
20645 pm_buffer_free(&unescaped);
20646}
20647
20652static pm_node_t *
20653parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
20655 .parser = parser,
20656 .call = call,
20657 .names = { 0 },
20658 .shared = content->type == PM_STRING_SHARED
20659 };
20660
20662 .parser = parser,
20663 .start = parser->start + PM_NODE_START(call->receiver),
20664 .end = parser->start + PM_NODE_END(call->receiver),
20665 .shared = content->type == PM_STRING_SHARED
20666 };
20667
20668 pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), extended_mode, parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
20669 pm_constant_id_list_free(&callback_data.names);
20670
20671 if (callback_data.match != NULL) {
20672 return UP(callback_data.match);
20673 } else {
20674 return UP(call);
20675 }
20676}
20677
20678static inline pm_node_t *
20679parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, uint16_t depth) {
20680 pm_token_t token = parser->current;
20681
20682 switch (token.type) {
20683 case PM_TOKEN_EQUAL: {
20684 switch (PM_NODE_TYPE(node)) {
20685 case PM_CALL_NODE: {
20686 // If we have no arguments to the call node and we need this
20687 // to be a target then this is either a method call or a
20688 // local variable write. This _must_ happen before the value
20689 // is parsed because it could be referenced in the value.
20690 pm_call_node_t *call_node = (pm_call_node_t *) node;
20691 if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20692 pm_parser_local_add_location(parser, &call_node->message_loc, 0);
20693 }
20694 }
20696 case PM_CASE_WRITABLE: {
20697 // When we have `it = value`, we need to add `it` as a local
20698 // variable before parsing the value, in case the value
20699 // references the variable.
20700 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
20701 pm_parser_local_add_location(parser, &node->location, 0);
20702 }
20703
20704 parser_lex(parser);
20705 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20706
20707 if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
20708 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
20709 }
20710
20711 return parse_write(parser, node, &token, value);
20712 }
20713 case PM_SPLAT_NODE: {
20714 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
20715 pm_multi_target_node_targets_append(parser, multi_target, node);
20716
20717 parser_lex(parser);
20718 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20719 return parse_write(parser, UP(multi_target), &token, value);
20720 }
20721 case PM_SOURCE_ENCODING_NODE:
20722 case PM_FALSE_NODE:
20723 case PM_SOURCE_FILE_NODE:
20724 case PM_SOURCE_LINE_NODE:
20725 case PM_NIL_NODE:
20726 case PM_SELF_NODE:
20727 case PM_TRUE_NODE: {
20728 // In these special cases, we have specific error messages
20729 // and we will replace them with local variable writes.
20730 parser_lex(parser);
20731 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20732 return parse_unwriteable_write(parser, node, &token, value);
20733 }
20734 default:
20735 // In this case we have an = sign, but we don't know what
20736 // it's for. We need to treat it as an error. We'll mark it
20737 // as an error and skip past it.
20738 parser_lex(parser);
20739 pm_parser_err_token(parser, &token, PM_ERR_EXPRESSION_NOT_WRITABLE);
20740 return node;
20741 }
20742 }
20743 case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL: {
20744 switch (PM_NODE_TYPE(node)) {
20745 case PM_BACK_REFERENCE_READ_NODE:
20746 case PM_NUMBERED_REFERENCE_READ_NODE:
20747 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20749 case PM_GLOBAL_VARIABLE_READ_NODE: {
20750 parser_lex(parser);
20751
20752 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20753 pm_node_t *result = UP(pm_global_variable_and_write_node_create(parser, node, &token, value));
20754
20755 pm_node_destroy(parser, node);
20756 return result;
20757 }
20758 case PM_CLASS_VARIABLE_READ_NODE: {
20759 parser_lex(parser);
20760
20761 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20762 pm_node_t *result = UP(pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
20763
20764 pm_node_destroy(parser, node);
20765 return result;
20766 }
20767 case PM_CONSTANT_PATH_NODE: {
20768 parser_lex(parser);
20769
20770 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20771 pm_node_t *write = UP(pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
20772
20773 return parse_shareable_constant_write(parser, write);
20774 }
20775 case PM_CONSTANT_READ_NODE: {
20776 parser_lex(parser);
20777
20778 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20779 pm_node_t *write = UP(pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
20780
20781 pm_node_destroy(parser, node);
20782 return parse_shareable_constant_write(parser, write);
20783 }
20784 case PM_INSTANCE_VARIABLE_READ_NODE: {
20785 parser_lex(parser);
20786
20787 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20788 pm_node_t *result = UP(pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
20789
20790 pm_node_destroy(parser, node);
20791 return result;
20792 }
20793 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
20794 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
20795 parser_lex(parser);
20796
20797 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20798 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0));
20799
20800 pm_node_unreference(parser, node);
20801 pm_node_destroy(parser, node);
20802 return result;
20803 }
20804 case PM_LOCAL_VARIABLE_READ_NODE: {
20805 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
20806 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.length, PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + node->location.start);
20807 pm_node_unreference(parser, node);
20808 }
20809
20811 parser_lex(parser);
20812
20813 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20814 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth));
20815
20816 pm_node_destroy(parser, node);
20817 return result;
20818 }
20819 case PM_CALL_NODE: {
20820 pm_call_node_t *cast = (pm_call_node_t *) node;
20821
20822 // If we have a vcall (a method with no arguments and no
20823 // receiver that could have been a local variable) then we
20824 // will transform it into a local variable write.
20825 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20826 pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
20827 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
20828 parser_lex(parser);
20829
20830 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20831 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
20832
20833 pm_node_destroy(parser, UP(cast));
20834 return result;
20835 }
20836
20837 // Move past the token here so that we have already added
20838 // the local variable by this point.
20839 parser_lex(parser);
20840
20841 // If there is no call operator and the message is "[]" then
20842 // this is an aref expression, and we can transform it into
20843 // an aset expression.
20844 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
20845 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20846 return UP(pm_index_and_write_node_create(parser, cast, &token, value));
20847 }
20848
20849 // If this node cannot be writable, then we have an error.
20850 if (pm_call_node_writable_p(parser, cast)) {
20851 parse_write_name(parser, &cast->name);
20852 } else {
20853 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
20854 }
20855
20856 parse_call_operator_write(parser, cast, &token);
20857 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20858 return UP(pm_call_and_write_node_create(parser, cast, &token, value));
20859 }
20860 case PM_MULTI_WRITE_NODE: {
20861 parser_lex(parser);
20862 pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
20863 return node;
20864 }
20865 default:
20866 parser_lex(parser);
20867
20868 // In this case we have an &&= sign, but we don't know what it's for.
20869 // We need to treat it as an error. For now, we'll mark it as an error
20870 // and just skip right past it.
20871 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
20872 return node;
20873 }
20874 }
20875 case PM_TOKEN_PIPE_PIPE_EQUAL: {
20876 switch (PM_NODE_TYPE(node)) {
20877 case PM_BACK_REFERENCE_READ_NODE:
20878 case PM_NUMBERED_REFERENCE_READ_NODE:
20879 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20881 case PM_GLOBAL_VARIABLE_READ_NODE: {
20882 parser_lex(parser);
20883
20884 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20885 pm_node_t *result = UP(pm_global_variable_or_write_node_create(parser, node, &token, value));
20886
20887 pm_node_destroy(parser, node);
20888 return result;
20889 }
20890 case PM_CLASS_VARIABLE_READ_NODE: {
20891 parser_lex(parser);
20892
20893 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20894 pm_node_t *result = UP(pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
20895
20896 pm_node_destroy(parser, node);
20897 return result;
20898 }
20899 case PM_CONSTANT_PATH_NODE: {
20900 parser_lex(parser);
20901
20902 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20903 pm_node_t *write = UP(pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
20904
20905 return parse_shareable_constant_write(parser, write);
20906 }
20907 case PM_CONSTANT_READ_NODE: {
20908 parser_lex(parser);
20909
20910 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20911 pm_node_t *write = UP(pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
20912
20913 pm_node_destroy(parser, node);
20914 return parse_shareable_constant_write(parser, write);
20915 }
20916 case PM_INSTANCE_VARIABLE_READ_NODE: {
20917 parser_lex(parser);
20918
20919 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20920 pm_node_t *result = UP(pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
20921
20922 pm_node_destroy(parser, node);
20923 return result;
20924 }
20925 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
20926 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
20927 parser_lex(parser);
20928
20929 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20930 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0));
20931
20932 pm_node_unreference(parser, node);
20933 pm_node_destroy(parser, node);
20934 return result;
20935 }
20936 case PM_LOCAL_VARIABLE_READ_NODE: {
20937 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
20938 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(node));
20939 pm_node_unreference(parser, node);
20940 }
20941
20943 parser_lex(parser);
20944
20945 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20946 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth));
20947
20948 pm_node_destroy(parser, node);
20949 return result;
20950 }
20951 case PM_CALL_NODE: {
20952 pm_call_node_t *cast = (pm_call_node_t *) node;
20953
20954 // If we have a vcall (a method with no arguments and no
20955 // receiver that could have been a local variable) then we
20956 // will transform it into a local variable write.
20957 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20958 pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
20959 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
20960 parser_lex(parser);
20961
20962 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20963 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
20964
20965 pm_node_destroy(parser, UP(cast));
20966 return result;
20967 }
20968
20969 // Move past the token here so that we have already added
20970 // the local variable by this point.
20971 parser_lex(parser);
20972
20973 // If there is no call operator and the message is "[]" then
20974 // this is an aref expression, and we can transform it into
20975 // an aset expression.
20976 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
20977 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20978 return UP(pm_index_or_write_node_create(parser, cast, &token, value));
20979 }
20980
20981 // If this node cannot be writable, then we have an error.
20982 if (pm_call_node_writable_p(parser, cast)) {
20983 parse_write_name(parser, &cast->name);
20984 } else {
20985 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
20986 }
20987
20988 parse_call_operator_write(parser, cast, &token);
20989 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20990 return UP(pm_call_or_write_node_create(parser, cast, &token, value));
20991 }
20992 case PM_MULTI_WRITE_NODE: {
20993 parser_lex(parser);
20994 pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
20995 return node;
20996 }
20997 default:
20998 parser_lex(parser);
20999
21000 // In this case we have an ||= sign, but we don't know what it's for.
21001 // We need to treat it as an error. For now, we'll mark it as an error
21002 // and just skip right past it.
21003 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
21004 return node;
21005 }
21006 }
21007 case PM_TOKEN_AMPERSAND_EQUAL:
21008 case PM_TOKEN_CARET_EQUAL:
21009 case PM_TOKEN_GREATER_GREATER_EQUAL:
21010 case PM_TOKEN_LESS_LESS_EQUAL:
21011 case PM_TOKEN_MINUS_EQUAL:
21012 case PM_TOKEN_PERCENT_EQUAL:
21013 case PM_TOKEN_PIPE_EQUAL:
21014 case PM_TOKEN_PLUS_EQUAL:
21015 case PM_TOKEN_SLASH_EQUAL:
21016 case PM_TOKEN_STAR_EQUAL:
21017 case PM_TOKEN_STAR_STAR_EQUAL: {
21018 switch (PM_NODE_TYPE(node)) {
21019 case PM_BACK_REFERENCE_READ_NODE:
21020 case PM_NUMBERED_REFERENCE_READ_NODE:
21021 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21023 case PM_GLOBAL_VARIABLE_READ_NODE: {
21024 parser_lex(parser);
21025
21026 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21027 pm_node_t *result = UP(pm_global_variable_operator_write_node_create(parser, node, &token, value));
21028
21029 pm_node_destroy(parser, node);
21030 return result;
21031 }
21032 case PM_CLASS_VARIABLE_READ_NODE: {
21033 parser_lex(parser);
21034
21035 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21036 pm_node_t *result = UP(pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
21037
21038 pm_node_destroy(parser, node);
21039 return result;
21040 }
21041 case PM_CONSTANT_PATH_NODE: {
21042 parser_lex(parser);
21043
21044 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21045 pm_node_t *write = UP(pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
21046
21047 return parse_shareable_constant_write(parser, write);
21048 }
21049 case PM_CONSTANT_READ_NODE: {
21050 parser_lex(parser);
21051
21052 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21053 pm_node_t *write = UP(pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
21054
21055 pm_node_destroy(parser, node);
21056 return parse_shareable_constant_write(parser, write);
21057 }
21058 case PM_INSTANCE_VARIABLE_READ_NODE: {
21059 parser_lex(parser);
21060
21061 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21062 pm_node_t *result = UP(pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
21063
21064 pm_node_destroy(parser, node);
21065 return result;
21066 }
21067 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21068 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21069 parser_lex(parser);
21070
21071 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21072 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0));
21073
21074 pm_node_unreference(parser, node);
21075 pm_node_destroy(parser, node);
21076 return result;
21077 }
21078 case PM_LOCAL_VARIABLE_READ_NODE: {
21079 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
21080 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(node));
21081 pm_node_unreference(parser, node);
21082 }
21083
21085 parser_lex(parser);
21086
21087 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21088 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth));
21089
21090 pm_node_destroy(parser, node);
21091 return result;
21092 }
21093 case PM_CALL_NODE: {
21094 parser_lex(parser);
21095 pm_call_node_t *cast = (pm_call_node_t *) node;
21096
21097 // If we have a vcall (a method with no arguments and no
21098 // receiver that could have been a local variable) then we
21099 // will transform it into a local variable write.
21100 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21101 pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
21102 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
21103 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21104 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
21105
21106 pm_node_destroy(parser, UP(cast));
21107 return result;
21108 }
21109
21110 // If there is no call operator and the message is "[]" then
21111 // this is an aref expression, and we can transform it into
21112 // an aset expression.
21113 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21114 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21115 return UP(pm_index_operator_write_node_create(parser, cast, &token, value));
21116 }
21117
21118 // If this node cannot be writable, then we have an error.
21119 if (pm_call_node_writable_p(parser, cast)) {
21120 parse_write_name(parser, &cast->name);
21121 } else {
21122 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21123 }
21124
21125 parse_call_operator_write(parser, cast, &token);
21126 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21127 return UP(pm_call_operator_write_node_create(parser, cast, &token, value));
21128 }
21129 case PM_MULTI_WRITE_NODE: {
21130 parser_lex(parser);
21131 pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
21132 return node;
21133 }
21134 default:
21135 parser_lex(parser);
21136
21137 // In this case we have an operator but we don't know what it's for.
21138 // We need to treat it as an error. For now, we'll mark it as an error
21139 // and just skip right past it.
21140 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
21141 return node;
21142 }
21143 }
21144 case PM_TOKEN_AMPERSAND_AMPERSAND:
21145 case PM_TOKEN_KEYWORD_AND: {
21146 parser_lex(parser);
21147
21148 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21149 return UP(pm_and_node_create(parser, node, &token, right));
21150 }
21151 case PM_TOKEN_KEYWORD_OR:
21152 case PM_TOKEN_PIPE_PIPE: {
21153 parser_lex(parser);
21154
21155 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21156 return UP(pm_or_node_create(parser, node, &token, right));
21157 }
21158 case PM_TOKEN_EQUAL_TILDE: {
21159 // Note that we _must_ parse the value before adding the local
21160 // variables in order to properly mirror the behavior of Ruby. For
21161 // example,
21162 //
21163 // /(?<foo>bar)/ =~ foo
21164 //
21165 // In this case, `foo` should be a method call and not a local yet.
21166 parser_lex(parser);
21167 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21168
21169 // By default, we're going to create a call node and then return it.
21170 pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
21171 pm_node_t *result = UP(call);
21172
21173 // If the receiver of this =~ is a regular expression node, then we
21174 // need to introduce local variables for it based on its named
21175 // capture groups.
21176 if (PM_NODE_TYPE_P(node, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE)) {
21177 // It's possible to have an interpolated regular expression node
21178 // that only contains strings. This is because it can be split
21179 // up by a heredoc. In this case we need to concat the unescaped
21180 // strings together and then parse them as a regular expression.
21182
21183 bool interpolated = false;
21184 size_t total_length = 0;
21185
21186 pm_node_t *part;
21187 PM_NODE_LIST_FOREACH(parts, index, part) {
21188 if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
21189 total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
21190 } else {
21191 interpolated = true;
21192 break;
21193 }
21194 }
21195
21196 if (!interpolated && total_length > 0) {
21197 void *memory = xmalloc(total_length);
21198 if (!memory) abort();
21199
21200 uint8_t *cursor = memory;
21201 PM_NODE_LIST_FOREACH(parts, index, part) {
21202 pm_string_t *unescaped = &((pm_string_node_t *) part)->unescaped;
21203 size_t length = pm_string_length(unescaped);
21204
21205 memcpy(cursor, pm_string_source(unescaped), length);
21206 cursor += length;
21207 }
21208
21209 pm_string_t owned;
21210 pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
21211
21212 result = parse_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21213 pm_string_free(&owned);
21214 }
21215 } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
21216 // If we have a regular expression node, then we can just parse
21217 // the named captures directly off the unescaped string.
21218 const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
21219 result = parse_regular_expression_named_captures(parser, content, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21220 }
21221
21222 return result;
21223 }
21224 case PM_TOKEN_UAMPERSAND:
21225 case PM_TOKEN_USTAR:
21226 case PM_TOKEN_USTAR_STAR:
21227 // The only times this will occur are when we are in an error state,
21228 // but we'll put them in here so that errors can propagate.
21229 case PM_TOKEN_BANG_EQUAL:
21230 case PM_TOKEN_BANG_TILDE:
21231 case PM_TOKEN_EQUAL_EQUAL:
21232 case PM_TOKEN_EQUAL_EQUAL_EQUAL:
21233 case PM_TOKEN_LESS_EQUAL_GREATER:
21234 case PM_TOKEN_CARET:
21235 case PM_TOKEN_PIPE:
21236 case PM_TOKEN_AMPERSAND:
21237 case PM_TOKEN_GREATER_GREATER:
21238 case PM_TOKEN_LESS_LESS:
21239 case PM_TOKEN_MINUS:
21240 case PM_TOKEN_PLUS:
21241 case PM_TOKEN_PERCENT:
21242 case PM_TOKEN_SLASH:
21243 case PM_TOKEN_STAR:
21244 case PM_TOKEN_STAR_STAR: {
21245 parser_lex(parser);
21246 pm_token_t operator = parser->previous;
21247 switch (PM_NODE_TYPE(node)) {
21248 case PM_RESCUE_MODIFIER_NODE: {
21250 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21251 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21252 }
21253 break;
21254 }
21255 case PM_AND_NODE: {
21256 pm_and_node_t *cast = (pm_and_node_t *) node;
21257 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21258 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21259 }
21260 break;
21261 }
21262 case PM_OR_NODE: {
21263 pm_or_node_t *cast = (pm_or_node_t *) node;
21264 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21265 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21266 }
21267 break;
21268 }
21269 default:
21270 break;
21271 }
21272
21273 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21274 return UP(pm_call_node_binary_create(parser, node, &token, argument, 0));
21275 }
21276 case PM_TOKEN_GREATER:
21277 case PM_TOKEN_GREATER_EQUAL:
21278 case PM_TOKEN_LESS:
21279 case PM_TOKEN_LESS_EQUAL: {
21280 if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
21281 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
21282 }
21283
21284 parser_lex(parser);
21285 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21286 return UP(pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON));
21287 }
21288 case PM_TOKEN_AMPERSAND_DOT:
21289 case PM_TOKEN_DOT: {
21290 parser_lex(parser);
21291 pm_token_t operator = parser->previous;
21292 pm_arguments_t arguments = { 0 };
21293
21294 // This if statement handles the foo.() syntax.
21295 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
21296 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21297 return UP(pm_call_node_shorthand_create(parser, node, &operator, &arguments));
21298 }
21299
21300 switch (PM_NODE_TYPE(node)) {
21301 case PM_RESCUE_MODIFIER_NODE: {
21303 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21304 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21305 }
21306 break;
21307 }
21308 case PM_AND_NODE: {
21309 pm_and_node_t *cast = (pm_and_node_t *) node;
21310 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21311 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21312 }
21313 break;
21314 }
21315 case PM_OR_NODE: {
21316 pm_or_node_t *cast = (pm_or_node_t *) node;
21317 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21318 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21319 }
21320 break;
21321 }
21322 default:
21323 break;
21324 }
21325
21326 pm_token_t message;
21327
21328 switch (parser->current.type) {
21329 case PM_CASE_OPERATOR:
21330 case PM_CASE_KEYWORD:
21331 case PM_TOKEN_CONSTANT:
21332 case PM_TOKEN_IDENTIFIER:
21333 case PM_TOKEN_METHOD_NAME: {
21334 parser_lex(parser);
21335 message = parser->previous;
21336 break;
21337 }
21338 default: {
21339 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type));
21340 message = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
21341 }
21342 }
21343
21344 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21345 pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
21346
21347 if (
21348 (previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
21349 arguments.arguments == NULL &&
21350 arguments.opening_loc.length == 0 &&
21351 match1(parser, PM_TOKEN_COMMA)
21352 ) {
21353 return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21354 } else {
21355 return UP(call);
21356 }
21357 }
21358 case PM_TOKEN_DOT_DOT:
21359 case PM_TOKEN_DOT_DOT_DOT: {
21360 parser_lex(parser);
21361
21362 pm_node_t *right = NULL;
21363 if (token_begins_expression_p(parser->current.type)) {
21364 right = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21365 }
21366
21367 return UP(pm_range_node_create(parser, node, &token, right));
21368 }
21369 case PM_TOKEN_KEYWORD_IF_MODIFIER: {
21370 pm_token_t keyword = parser->current;
21371 parser_lex(parser);
21372
21373 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
21374 return UP(pm_if_node_modifier_create(parser, node, &keyword, predicate));
21375 }
21376 case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: {
21377 pm_token_t keyword = parser->current;
21378 parser_lex(parser);
21379
21380 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
21381 return UP(pm_unless_node_modifier_create(parser, node, &keyword, predicate));
21382 }
21383 case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: {
21384 parser_lex(parser);
21385 pm_statements_node_t *statements = pm_statements_node_create(parser);
21386 pm_statements_node_body_append(parser, statements, node, true);
21387
21388 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
21389 return UP(pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
21390 }
21391 case PM_TOKEN_KEYWORD_WHILE_MODIFIER: {
21392 parser_lex(parser);
21393 pm_statements_node_t *statements = pm_statements_node_create(parser);
21394 pm_statements_node_body_append(parser, statements, node, true);
21395
21396 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
21397 return UP(pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
21398 }
21399 case PM_TOKEN_QUESTION_MARK: {
21400 context_push(parser, PM_CONTEXT_TERNARY);
21401 pm_node_list_t current_block_exits = { 0 };
21402 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21403
21404 pm_token_t qmark = parser->current;
21405 parser_lex(parser);
21406
21407 pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
21408
21409 if (parser->recovering) {
21410 // If parsing the true expression of this ternary resulted in a syntax
21411 // error that we can recover from, then we're going to put missing nodes
21412 // and tokens into the remaining places. We want to be sure to do this
21413 // before the `expect` function call to make sure it doesn't
21414 // accidentally move past a ':' token that occurs after the syntax
21415 // error.
21416 pm_token_t colon = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
21417 pm_node_t *false_expression = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &colon), PM_TOKEN_LENGTH(&colon)));
21418
21419 context_pop(parser);
21420 pop_block_exits(parser, previous_block_exits);
21421 pm_node_list_free(&current_block_exits);
21422
21423 return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
21424 }
21425
21426 accept1(parser, PM_TOKEN_NEWLINE);
21427 expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
21428
21429 pm_token_t colon = parser->previous;
21430 pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
21431
21432 context_pop(parser);
21433 pop_block_exits(parser, previous_block_exits);
21434 pm_node_list_free(&current_block_exits);
21435
21436 return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
21437 }
21438 case PM_TOKEN_COLON_COLON: {
21439 parser_lex(parser);
21440 pm_token_t delimiter = parser->previous;
21441
21442 switch (parser->current.type) {
21443 case PM_TOKEN_CONSTANT: {
21444 parser_lex(parser);
21445 pm_node_t *path;
21446
21447 if (
21448 (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
21449 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
21450 ) {
21451 // If we have a constant immediately following a '::' operator, then
21452 // this can either be a constant path or a method call, depending on
21453 // what follows the constant.
21454 //
21455 // If we have parentheses, then this is a method call. That would
21456 // look like Foo::Bar().
21457 pm_token_t message = parser->previous;
21458 pm_arguments_t arguments = { 0 };
21459
21460 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21461 path = UP(pm_call_node_call_create(parser, node, &delimiter, &message, &arguments));
21462 } else {
21463 // Otherwise, this is a constant path. That would look like Foo::Bar.
21464 path = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
21465 }
21466
21467 // If this is followed by a comma then it is a multiple assignment.
21468 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21469 return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21470 }
21471
21472 return path;
21473 }
21474 case PM_CASE_OPERATOR:
21475 case PM_CASE_KEYWORD:
21476 case PM_TOKEN_IDENTIFIER:
21477 case PM_TOKEN_METHOD_NAME: {
21478 parser_lex(parser);
21479 pm_token_t message = parser->previous;
21480
21481 // If we have an identifier following a '::' operator, then it is for
21482 // sure a method call.
21483 pm_arguments_t arguments = { 0 };
21484 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21485 pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21486
21487 // If this is followed by a comma then it is a multiple assignment.
21488 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21489 return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21490 }
21491
21492 return UP(call);
21493 }
21494 case PM_TOKEN_PARENTHESIS_LEFT: {
21495 // If we have a parenthesis following a '::' operator, then it is the
21496 // method call shorthand. That would look like Foo::(bar).
21497 pm_arguments_t arguments = { 0 };
21498 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21499
21500 return UP(pm_call_node_shorthand_create(parser, node, &delimiter, &arguments));
21501 }
21502 default: {
21503 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
21504 return UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
21505 }
21506 }
21507 }
21508 case PM_TOKEN_KEYWORD_RESCUE_MODIFIER: {
21509 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
21510 parser_lex(parser);
21511 accept1(parser, PM_TOKEN_NEWLINE);
21512
21513 pm_node_t *value = parse_expression(parser, binding_power, true, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
21514 context_pop(parser);
21515
21516 return UP(pm_rescue_modifier_node_create(parser, node, &token, value));
21517 }
21518 case PM_TOKEN_BRACKET_LEFT: {
21519 parser_lex(parser);
21520
21521 pm_arguments_t arguments = { 0 };
21522 arguments.opening_loc = TOK2LOC(parser, &parser->previous);
21523
21524 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
21525 pm_accepts_block_stack_push(parser, true);
21526 parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint16_t) (depth + 1));
21527 pm_accepts_block_stack_pop(parser);
21528 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
21529 }
21530
21531 arguments.closing_loc = TOK2LOC(parser, &parser->previous);
21532
21533 // If we have a comma after the closing bracket then this is a multiple
21534 // assignment and we should parse the targets.
21535 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21536 pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
21537 return parse_targets_validate(parser, UP(aref), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21538 }
21539
21540 // If we're at the end of the arguments, we can now check if there is a
21541 // block node that starts with a {. If there is, then we can parse it and
21542 // add it to the arguments.
21543 pm_block_node_t *block = NULL;
21544 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
21545 block = parse_block(parser, (uint16_t) (depth + 1));
21546 pm_arguments_validate_block(parser, &arguments, block);
21547 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
21548 block = parse_block(parser, (uint16_t) (depth + 1));
21549 }
21550
21551 if (block != NULL) {
21552 if (arguments.block != NULL) {
21553 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_AFTER_BLOCK);
21554 if (arguments.arguments == NULL) {
21555 arguments.arguments = pm_arguments_node_create(parser);
21556 }
21557 pm_arguments_node_arguments_append(arguments.arguments, arguments.block);
21558 }
21559
21560 arguments.block = UP(block);
21561 }
21562
21563 return UP(pm_call_node_aref_create(parser, node, &arguments));
21564 }
21565 case PM_TOKEN_KEYWORD_IN: {
21566 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21567 parser->pattern_matching_newlines = true;
21568
21569 pm_token_t operator = parser->current;
21570 parser->command_start = false;
21571 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21572 parser_lex(parser);
21573
21574 pm_constant_id_list_t captures = { 0 };
21575 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
21576
21577 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21578 pm_constant_id_list_free(&captures);
21579
21580 return UP(pm_match_predicate_node_create(parser, node, pattern, &operator));
21581 }
21582 case PM_TOKEN_EQUAL_GREATER: {
21583 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21584 parser->pattern_matching_newlines = true;
21585
21586 pm_token_t operator = parser->current;
21587 parser->command_start = false;
21588 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21589 parser_lex(parser);
21590
21591 pm_constant_id_list_t captures = { 0 };
21592 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
21593
21594 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21595 pm_constant_id_list_free(&captures);
21596
21597 return UP(pm_match_required_node_create(parser, node, pattern, &operator));
21598 }
21599 default:
21600 assert(false && "unreachable");
21601 return NULL;
21602 }
21603}
21604
21605#undef PM_PARSE_PATTERN_SINGLE
21606#undef PM_PARSE_PATTERN_TOP
21607#undef PM_PARSE_PATTERN_MULTI
21608
21613static inline bool
21614pm_call_node_command_p(const pm_call_node_t *node) {
21615 return (
21616 (node->opening_loc.length == 0) &&
21617 (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
21618 (node->arguments != NULL || node->block != NULL)
21619 );
21620}
21621
21630static pm_node_t *
21631parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
21632 if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
21633 pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
21634 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
21635 }
21636
21637 pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
21638
21639 switch (PM_NODE_TYPE(node)) {
21640 case PM_MISSING_NODE:
21641 // If we found a syntax error, then the type of node returned by
21642 // parse_expression_prefix is going to be a missing node.
21643 return node;
21644 case PM_PRE_EXECUTION_NODE:
21645 case PM_POST_EXECUTION_NODE:
21646 case PM_ALIAS_GLOBAL_VARIABLE_NODE:
21647 case PM_ALIAS_METHOD_NODE:
21648 case PM_MULTI_WRITE_NODE:
21649 case PM_UNDEF_NODE:
21650 // These expressions are statements, and cannot be followed by
21651 // operators (except modifiers).
21652 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21653 return node;
21654 }
21655 break;
21656 case PM_CALL_NODE:
21657 // If we have a call node, then we need to check if it looks like a
21658 // method call without parentheses that contains arguments. If it
21659 // does, then it has different rules for parsing infix operators,
21660 // namely that it only accepts composition (and/or) and modifiers
21661 // (if/unless/etc.).
21662 if ((pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_COMPOSITION) && pm_call_node_command_p((pm_call_node_t *) node)) {
21663 return node;
21664 }
21665 break;
21666 case PM_SYMBOL_NODE:
21667 // If we have a symbol node that is being parsed as a label, then we
21668 // need to immediately return, because there should never be an
21669 // infix operator following this node.
21670 if (pm_symbol_node_label_p(parser, node)) {
21671 return node;
21672 }
21673 break;
21674 default:
21675 break;
21676 }
21677
21678 // Otherwise we'll look and see if the next token can be parsed as an infix
21679 // operator. If it can, then we'll parse it using parse_expression_infix.
21680 pm_binding_powers_t current_binding_powers;
21681 pm_token_type_t current_token_type;
21682
21683 while (
21684 current_token_type = parser->current.type,
21685 current_binding_powers = pm_binding_powers[current_token_type],
21686 binding_power <= current_binding_powers.left &&
21687 current_binding_powers.binary
21688 ) {
21689 node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1));
21690
21691 switch (PM_NODE_TYPE(node)) {
21692 case PM_MULTI_WRITE_NODE:
21693 // Multi-write nodes are statements, and cannot be followed by
21694 // operators except modifiers.
21695 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21696 return node;
21697 }
21698 break;
21699 case PM_CLASS_VARIABLE_WRITE_NODE:
21700 case PM_CONSTANT_PATH_WRITE_NODE:
21701 case PM_CONSTANT_WRITE_NODE:
21702 case PM_GLOBAL_VARIABLE_WRITE_NODE:
21703 case PM_INSTANCE_VARIABLE_WRITE_NODE:
21704 case PM_LOCAL_VARIABLE_WRITE_NODE:
21705 // These expressions are statements, by virtue of the right-hand
21706 // side of their write being an implicit array.
21707 if (PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21708 return node;
21709 }
21710 break;
21711 case PM_CALL_NODE:
21712 // These expressions are also statements, by virtue of the
21713 // right-hand side of the expression (i.e., the last argument to
21714 // the call node) being an implicit array.
21715 if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21716 return node;
21717 }
21718 break;
21719 case PM_RESCUE_MODIFIER_NODE:
21720 // A rescue modifier whose handler is a one-liner pattern match
21721 // (=> or in) produces a statement. That means it cannot be
21722 // extended by operators above the modifier level.
21723 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21725 pm_node_t *rescue_expression = cast->rescue_expression;
21726
21727 if (PM_NODE_TYPE_P(rescue_expression, PM_MATCH_REQUIRED_NODE) || PM_NODE_TYPE_P(rescue_expression, PM_MATCH_PREDICATE_NODE)) {
21728 return node;
21729 }
21730 }
21731 break;
21732 default:
21733 break;
21734 }
21735
21736 // If the operator is nonassoc and we should not be able to parse the
21737 // upcoming infix operator, break.
21738 if (current_binding_powers.nonassoc) {
21739 // If this is a non-assoc operator and we are about to parse the
21740 // exact same operator, then we need to add an error.
21741 if (match1(parser, current_token_type)) {
21742 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
21743 break;
21744 }
21745
21746 // If this is an endless range, then we need to reject a couple of
21747 // additional operators because it violates the normal operator
21748 // precedence rules. Those patterns are:
21749 //
21750 // 1.. & 2
21751 // 1.. * 2
21752 //
21753 if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
21754 if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) {
21755 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
21756 break;
21757 }
21758
21759 if (PM_BINDING_POWER_TERM <= pm_binding_powers[parser->current.type].left) {
21760 break;
21761 }
21762 } else if (current_binding_powers.left <= pm_binding_powers[parser->current.type].left) {
21763 break;
21764 }
21765 }
21766
21767 if (accepts_command_call) {
21768 // A command-style method call is only accepted on method chains.
21769 // Thus, we check whether the parsed node can continue method chains.
21770 // The method chain can continue if the parsed node is one of the following five kinds:
21771 // (1) index access: foo[1]
21772 // (2) attribute access: foo.bar
21773 // (3) method call with parenthesis: foo.bar(1)
21774 // (4) method call with a block: foo.bar do end
21775 // (5) constant path: foo::Bar
21776 switch (node->type) {
21777 case PM_CALL_NODE: {
21778 pm_call_node_t *cast = (pm_call_node_t *)node;
21779 if (
21780 // (1) foo[1]
21781 !(
21782 cast->call_operator_loc.length == 0 &&
21783 cast->message_loc.length > 0 &&
21784 parser->start[cast->message_loc.start] == '[' &&
21785 parser->start[cast->message_loc.start + cast->message_loc.length - 1] == ']'
21786 ) &&
21787 // (2) foo.bar
21788 !(
21789 cast->call_operator_loc.length > 0 &&
21790 cast->arguments == NULL &&
21791 cast->block == NULL &&
21792 cast->opening_loc.length == 0
21793 ) &&
21794 // (3) foo.bar(1)
21795 !(
21796 cast->call_operator_loc.length > 0 &&
21797 cast->opening_loc.length > 0
21798 ) &&
21799 // (4) foo.bar do end
21800 !(
21801 cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
21802 )
21803 ) {
21804 accepts_command_call = false;
21805 }
21806 break;
21807 }
21808 // (5) foo::Bar
21809 case PM_CONSTANT_PATH_NODE:
21810 break;
21811 default:
21812 accepts_command_call = false;
21813 break;
21814 }
21815 }
21816
21817 if (context_terminator(parser->current_context->context, &parser->current)) {
21818 pm_binding_powers_t next_binding_powers = pm_binding_powers[parser->current.type];
21819 if (
21820 !next_binding_powers.binary ||
21821 binding_power > next_binding_powers.left ||
21822 (PM_NODE_TYPE_P(node, PM_CALL_NODE) && pm_call_node_command_p((pm_call_node_t *) node))
21823 ) {
21824 return node;
21825 }
21826 }
21827 }
21828
21829 return node;
21830}
21831
21836static pm_statements_node_t *
21837wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
21838 if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
21839 if (statements == NULL) {
21840 statements = pm_statements_node_create(parser);
21841 }
21842
21843 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21844 pm_arguments_node_arguments_append(
21845 arguments,
21846 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2)))
21847 );
21848
21849 pm_statements_node_body_append(parser, statements, UP(pm_call_node_fcall_synthesized_create(
21850 parser,
21851 arguments,
21852 pm_parser_constant_id_constant(parser, "print", 5)
21853 )), true);
21854 }
21855
21856 if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
21857 if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
21858 if (statements == NULL) {
21859 statements = pm_statements_node_create(parser);
21860 }
21861
21862 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21863 pm_arguments_node_arguments_append(
21864 arguments,
21865 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2)))
21866 );
21867
21868 pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
21869 pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, UP(receiver), "split", arguments);
21870
21871 pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
21872 parser,
21873 pm_parser_constant_id_constant(parser, "$F", 2),
21874 UP(call)
21875 );
21876
21877 pm_statements_node_body_prepend(statements, UP(write));
21878 }
21879
21880 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21881 pm_arguments_node_arguments_append(
21882 arguments,
21883 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2)))
21884 );
21885
21886 if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
21887 pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
21888 pm_keyword_hash_node_elements_append(keywords, UP(pm_assoc_node_create(
21889 parser,
21890 UP(pm_symbol_node_synthesized_create(parser, "chomp")),
21891 NULL,
21892 UP(pm_true_node_synthesized_create(parser))
21893 )));
21894
21895 pm_arguments_node_arguments_append(arguments, UP(keywords));
21896 pm_node_flag_set(UP(arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
21897 }
21898
21899 pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
21900 pm_statements_node_body_append(parser, wrapped_statements, UP(pm_while_node_synthesized_create(
21901 parser,
21902 UP(pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4))),
21903 statements
21904 )), true);
21905
21906 statements = wrapped_statements;
21907 }
21908
21909 return statements;
21910}
21911
21915static pm_node_t *
21916parse_program(pm_parser_t *parser) {
21917 // If the current scope is NULL, then we want to push a new top level scope.
21918 // The current scope could exist in the event that we are parsing an eval
21919 // and the user has passed into scopes that already exist.
21920 if (parser->current_scope == NULL) {
21921 pm_parser_scope_push(parser, true);
21922 }
21923
21924 pm_node_list_t current_block_exits = { 0 };
21925 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21926
21927 parser_lex(parser);
21928 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
21929
21930 if (statements != NULL && !parser->parsing_eval) {
21931 // If we have statements, then the top-level statement should be
21932 // explicitly checked as well. We have to do this here because
21933 // everywhere else we check all but the last statement.
21934 assert(statements->body.size > 0);
21935 pm_void_statement_check(parser, statements->body.nodes[statements->body.size - 1]);
21936 }
21937
21938 pm_constant_id_list_t locals;
21939 pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
21940 pm_parser_scope_pop(parser);
21941
21942 // At the top level, see if we need to wrap the statements in a program
21943 // node with a while loop based on the options.
21945 statements = wrap_statements(parser, statements);
21946 } else {
21947 flush_block_exits(parser, previous_block_exits);
21948 }
21949
21950 pm_node_list_free(&current_block_exits);
21951
21952 // If this is an empty file, then we're still going to parse all of the
21953 // statements in order to gather up all of the comments and such. Here we'll
21954 // correct the location information.
21955 if (statements == NULL) {
21956 statements = pm_statements_node_create(parser);
21957 statements->base.location = (pm_location_t) { 0 };
21958 }
21959
21960 return UP(pm_program_node_create(parser, &locals, statements));
21961}
21962
21963/******************************************************************************/
21964/* External functions */
21965/******************************************************************************/
21966
21976static const char *
21977pm_strnstr(const char *big, const char *little, size_t big_length) {
21978 size_t little_length = strlen(little);
21979
21980 for (const char *max = big + big_length - little_length; big <= max; big++) {
21981 if (*big == *little && memcmp(big, little, little_length) == 0) return big;
21982 }
21983
21984 return NULL;
21985}
21986
21987#ifdef _WIN32
21988#define pm_parser_warn_shebang_carriage_return(parser, start, length) ((void) 0)
21989#else
21995static void
21996pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
21997 if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
21998 pm_parser_warn(parser, U32(start - parser->start), U32(length), PM_WARN_SHEBANG_CARRIAGE_RETURN);
21999 }
22000}
22001#endif
22002
22007static void
22008pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const char *engine, size_t length) {
22009 const char *switches = pm_strnstr(engine, " -", length);
22010 if (switches == NULL) return;
22011
22012 pm_options_t next_options = *options;
22013 options->shebang_callback(
22014 &next_options,
22015 (const uint8_t *) (switches + 1),
22016 length - ((size_t) (switches - engine)) - 1,
22017 options->shebang_callback_data
22018 );
22019
22020 size_t encoding_length;
22021 if ((encoding_length = pm_string_length(&next_options.encoding)) > 0) {
22022 const uint8_t *encoding_source = pm_string_source(&next_options.encoding);
22023 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22024 }
22025
22026 parser->command_line = next_options.command_line;
22027 parser->frozen_string_literal = next_options.frozen_string_literal;
22028}
22029
22034pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
22035 assert(source != NULL);
22036
22037 *parser = (pm_parser_t) {
22038 .node_id = 0,
22039 .lex_state = PM_LEX_STATE_BEG,
22040 .enclosure_nesting = 0,
22041 .lambda_enclosure_nesting = -1,
22042 .brace_nesting = 0,
22043 .do_loop_stack = 0,
22044 .accepts_block_stack = 0,
22045 .lex_modes = {
22046 .index = 0,
22047 .stack = {{ .mode = PM_LEX_DEFAULT }},
22048 .current = &parser->lex_modes.stack[0],
22049 },
22050 .start = source,
22051 .end = source + size,
22052 .previous = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22053 .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22054 .next_start = NULL,
22055 .heredoc_end = NULL,
22056 .data_loc = { 0 },
22057 .comment_list = { 0 },
22058 .magic_comment_list = { 0 },
22059 .warning_list = { 0 },
22060 .error_list = { 0 },
22061 .current_scope = NULL,
22062 .current_context = NULL,
22063 .encoding = PM_ENCODING_UTF_8_ENTRY,
22064 .encoding_changed_callback = NULL,
22065 .encoding_comment_start = source,
22066 .lex_callback = NULL,
22067 .filepath = { 0 },
22068 .constant_pool = { 0 },
22069 .line_offsets = { 0 },
22070 .integer_base = 0,
22071 .current_string = PM_STRING_EMPTY,
22072 .start_line = 1,
22073 .explicit_encoding = NULL,
22074 .command_line = 0,
22075 .parsing_eval = false,
22076 .partial_script = false,
22077 .command_start = true,
22078 .recovering = false,
22079 .encoding_locked = false,
22080 .encoding_changed = false,
22081 .pattern_matching_newlines = false,
22082 .in_keyword_arg = false,
22083 .current_block_exits = NULL,
22084 .semantic_token_seen = false,
22085 .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
22086 .current_regular_expression_ascii_only = false,
22087 .warn_mismatched_indentation = true
22088 };
22089
22090 // Initialize the constant pool. We're going to completely guess as to the
22091 // number of constants that we'll need based on the size of the input. The
22092 // ratio we chose here is actually less arbitrary than you might think.
22093 //
22094 // We took ~50K Ruby files and measured the size of the file versus the
22095 // number of constants that were found in those files. Then we found the
22096 // average and standard deviation of the ratios of constants/bytesize. Then
22097 // we added 1.34 standard deviations to the average to get a ratio that
22098 // would fit 75% of the files (for a two-tailed distribution). This works
22099 // because there was about a 0.77 correlation and the distribution was
22100 // roughly normal.
22101 //
22102 // This ratio will need to change if we add more constants to the constant
22103 // pool for another node type.
22104 uint32_t constant_size = ((uint32_t) size) / 95;
22105 pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size);
22106
22107 // Initialize the newline list. Similar to the constant pool, we're going to
22108 // guess at the number of newlines that we'll need based on the size of the
22109 // input.
22110 size_t newline_size = size / 22;
22111 pm_line_offset_list_init(&parser->line_offsets, newline_size < 4 ? 4 : newline_size);
22112
22113 // If options were provided to this parse, establish them here.
22114 if (options != NULL) {
22115 // filepath option
22116 parser->filepath = options->filepath;
22117
22118 // line option
22119 parser->start_line = options->line;
22120
22121 // encoding option
22122 size_t encoding_length = pm_string_length(&options->encoding);
22123 if (encoding_length > 0) {
22124 const uint8_t *encoding_source = pm_string_source(&options->encoding);
22125 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22126 }
22127
22128 // encoding_locked option
22129 parser->encoding_locked = options->encoding_locked;
22130
22131 // frozen_string_literal option
22133
22134 // command_line option
22135 parser->command_line = options->command_line;
22136
22137 // version option
22138 parser->version = options->version;
22139
22140 // partial_script
22141 parser->partial_script = options->partial_script;
22142
22143 // scopes option
22144 parser->parsing_eval = options->scopes_count > 0;
22145 if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
22146
22147 for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
22148 const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index);
22149 pm_parser_scope_push(parser, scope_index == 0);
22150
22151 // Scopes given from the outside are not allowed to have numbered
22152 // parameters.
22153 parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
22154
22155 for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
22156 const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
22157
22158 const uint8_t *source = pm_string_source(local);
22159 size_t length = pm_string_length(local);
22160
22161 void *allocated = xmalloc(length);
22162 if (allocated == NULL) continue;
22163
22164 memcpy(allocated, source, length);
22165 pm_parser_local_add_owned(parser, (uint8_t *) allocated, length);
22166 }
22167 }
22168 }
22169
22170 // Now that we have established the user-provided options, check if
22171 // a version was given and parse as the latest version otherwise.
22172 if (parser->version == PM_OPTIONS_VERSION_UNSET) {
22174 }
22175
22176 pm_accepts_block_stack_push(parser, true);
22177
22178 // Skip past the UTF-8 BOM if it exists.
22179 if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
22180 parser->current.end += 3;
22181 parser->encoding_comment_start += 3;
22182
22183 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
22185 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
22186 }
22187 }
22188
22189 // If the -x command line flag is set, or the first shebang of the file does
22190 // not include "ruby", then we'll search for a shebang that does include
22191 // "ruby" and start parsing from there.
22192 bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
22193
22194 // If the first two bytes of the source are a shebang, then we will do a bit
22195 // of extra processing.
22196 //
22197 // First, we'll indicate that the encoding comment is at the end of the
22198 // shebang. This means that when a shebang is present the encoding comment
22199 // can begin on the second line.
22200 //
22201 // Second, we will check if the shebang includes "ruby". If it does, then we
22202 // we will start parsing from there. We will also potentially warning the
22203 // user if there is a carriage return at the end of the shebang. We will
22204 // also potentially call the shebang callback if this is the main script to
22205 // allow the caller to parse the shebang and find any command-line options.
22206 // If the shebang does not include "ruby" and this is the main script being
22207 // parsed, then we will start searching the file for a shebang that does
22208 // contain "ruby" as if -x were passed on the command line.
22209 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
22210 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->current.end);
22211
22212 if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
22213 const char *engine;
22214
22215 if ((engine = pm_strnstr((const char *) parser->start, "ruby", length)) != NULL) {
22216 if (newline != NULL) {
22217 parser->encoding_comment_start = newline + 1;
22218
22219 if (options == NULL || options->main_script) {
22220 pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1);
22221 }
22222 }
22223
22224 if (options != NULL && options->main_script && options->shebang_callback != NULL) {
22225 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) parser->start)));
22226 }
22227
22228 search_shebang = false;
22229 } else if (options != NULL && options->main_script && !parser->parsing_eval) {
22230 search_shebang = true;
22231 }
22232 }
22233
22234 // Here we're going to find the first shebang that includes "ruby" and start
22235 // parsing from there.
22236 if (search_shebang) {
22237 // If a shebang that includes "ruby" is not found, then we're going to a
22238 // a load error to the list of errors on the parser.
22239 bool found_shebang = false;
22240
22241 // This is going to point to the start of each line as we check it.
22242 // We'll maintain a moving window looking at each line at they come.
22243 const uint8_t *cursor = parser->start;
22244
22245 // The newline pointer points to the end of the current line that we're
22246 // considering. If it is NULL, then we're at the end of the file.
22247 const uint8_t *newline = next_newline(cursor, parser->end - cursor);
22248
22249 while (newline != NULL) {
22250 pm_line_offset_list_append(&parser->line_offsets, U32(newline - parser->start + 1));
22251
22252 cursor = newline + 1;
22253 newline = next_newline(cursor, parser->end - cursor);
22254
22255 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - cursor);
22256 if (length > 2 && cursor[0] == '#' && cursor[1] == '!') {
22257 const char *engine;
22258 if ((engine = pm_strnstr((const char *) cursor, "ruby", length)) != NULL) {
22259 found_shebang = true;
22260
22261 if (newline != NULL) {
22262 pm_parser_warn_shebang_carriage_return(parser, cursor, length + 1);
22263 parser->encoding_comment_start = newline + 1;
22264 }
22265
22266 if (options != NULL && options->shebang_callback != NULL) {
22267 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) cursor)));
22268 }
22269
22270 break;
22271 }
22272 }
22273 }
22274
22275 if (found_shebang) {
22276 parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22277 parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22278 } else {
22279 pm_parser_err(parser, 0, 0, PM_ERR_SCRIPT_NOT_FOUND);
22280 pm_line_offset_list_clear(&parser->line_offsets);
22281 }
22282 }
22283
22284 // The encoding comment can start after any amount of inline whitespace, so
22285 // here we'll advance it to the first non-inline-whitespace character so
22286 // that it is ready for future comparisons.
22287 parser->encoding_comment_start += pm_strspn_inline_whitespace(parser->encoding_comment_start, parser->end - parser->encoding_comment_start);
22288}
22289
22298
22302static inline void
22303pm_comment_list_free(pm_list_t *list) {
22304 pm_list_node_t *node, *next;
22305
22306 for (node = list->head; node != NULL; node = next) {
22307 next = node->next;
22308
22309 pm_comment_t *comment = (pm_comment_t *) node;
22310 xfree_sized(comment, sizeof(pm_comment_t));
22311 }
22312}
22313
22317static inline void
22318pm_magic_comment_list_free(pm_list_t *list) {
22319 pm_list_node_t *node, *next;
22320
22321 for (node = list->head; node != NULL; node = next) {
22322 next = node->next;
22323
22325 xfree_sized(magic_comment, sizeof(pm_magic_comment_t));
22326 }
22327}
22328
22334 pm_string_free(&parser->filepath);
22335 pm_diagnostic_list_free(&parser->error_list);
22336 pm_diagnostic_list_free(&parser->warning_list);
22337 pm_comment_list_free(&parser->comment_list);
22338 pm_magic_comment_list_free(&parser->magic_comment_list);
22339 pm_constant_pool_free(&parser->constant_pool);
22340 pm_line_offset_list_free(&parser->line_offsets);
22341
22342 while (parser->current_scope != NULL) {
22343 // Normally, popping the scope doesn't free the locals since it is
22344 // assumed that ownership has transferred to the AST. However if we have
22345 // scopes while we're freeing the parser, it's likely they came from
22346 // eval scopes and we need to free them explicitly here.
22347 pm_parser_scope_pop(parser);
22348 }
22349
22350 while (parser->lex_modes.index >= PM_LEX_STACK_SIZE) {
22351 lex_mode_pop(parser);
22352 }
22353}
22354
22360 return parse_program(parser);
22361}
22362
22368static bool
22369pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof) {
22370#define LINE_SIZE 4096
22371 char line[LINE_SIZE];
22372
22373 while (memset(line, '\n', LINE_SIZE), stream_fgets(line, LINE_SIZE, stream) != NULL) {
22374 size_t length = LINE_SIZE;
22375 while (length > 0 && line[length - 1] == '\n') length--;
22376
22377 if (length == LINE_SIZE) {
22378 // If we read a line that is the maximum size and it doesn't end
22379 // with a newline, then we'll just append it to the buffer and
22380 // continue reading.
22381 length--;
22382 pm_buffer_append_string(buffer, line, length);
22383 continue;
22384 }
22385
22386 // Append the line to the buffer.
22387 length--;
22388 pm_buffer_append_string(buffer, line, length);
22389
22390 // Check if the line matches the __END__ marker. If it does, then stop
22391 // reading and return false. In most circumstances, this means we should
22392 // stop reading from the stream so that the DATA constant can pick it
22393 // up.
22394 switch (length) {
22395 case 7:
22396 if (strncmp(line, "__END__", 7) == 0) return false;
22397 break;
22398 case 8:
22399 if (strncmp(line, "__END__\n", 8) == 0) return false;
22400 break;
22401 case 9:
22402 if (strncmp(line, "__END__\r\n", 9) == 0) return false;
22403 break;
22404 }
22405
22406 // All data should be read via gets. If the string returned by gets
22407 // _doesn't_ end with a newline, then we assume we hit EOF condition.
22408 if (stream_feof(stream)) {
22409 break;
22410 }
22411 }
22412
22413 return true;
22414#undef LINE_SIZE
22415}
22416
22424pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) {
22425 pm_buffer_init(buffer);
22426
22427 bool eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
22428
22429 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22430 pm_node_t *node = pm_parse(parser);
22431
22432 while (!eof && parser->error_list.size > 0) {
22433 pm_node_destroy(parser, node);
22434 eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
22435
22436 pm_parser_free(parser);
22437 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22438 node = pm_parse(parser);
22439 }
22440
22441 return node;
22442}
22443
22448pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
22449 pm_options_t options = { 0 };
22450 pm_options_read(&options, data);
22451
22452 pm_parser_t parser;
22453 pm_parser_init(&parser, source, size, &options);
22454
22455 pm_node_t *node = pm_parse(&parser);
22456 pm_node_destroy(&parser, node);
22457
22458 bool result = parser.error_list.size == 0;
22459 pm_parser_free(&parser);
22460 pm_options_free(&options);
22461
22462 return result;
22463}
22464
22465#undef PM_CASE_KEYWORD
22466#undef PM_CASE_OPERATOR
22467#undef PM_CASE_WRITABLE
22468#undef PM_STRING_EMPTY
22469
22470// We optionally support serializing to a binary string. For systems that don't
22471// want or need this functionality, it can be turned off with the
22472// PRISM_EXCLUDE_SERIALIZATION define.
22473#ifndef PRISM_EXCLUDE_SERIALIZATION
22474
22475static inline void
22476pm_serialize_header(pm_buffer_t *buffer) {
22477 pm_buffer_append_string(buffer, "PRISM", 5);
22478 pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
22479 pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR);
22480 pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH);
22481 pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
22482}
22483
22488pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
22489 pm_serialize_header(buffer);
22490 pm_serialize_content(parser, node, buffer);
22491 pm_buffer_append_byte(buffer, '\0');
22492}
22493
22499pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22500 pm_options_t options = { 0 };
22501 pm_options_read(&options, data);
22502
22503 pm_parser_t parser;
22504 pm_parser_init(&parser, source, size, &options);
22505
22506 pm_node_t *node = pm_parse(&parser);
22507
22508 pm_serialize_header(buffer);
22509 pm_serialize_content(&parser, node, buffer);
22510 pm_buffer_append_byte(buffer, '\0');
22511
22512 pm_node_destroy(&parser, node);
22513 pm_parser_free(&parser);
22514 pm_options_free(&options);
22515}
22516
22522pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data) {
22523 pm_parser_t parser;
22524 pm_options_t options = { 0 };
22525 pm_options_read(&options, data);
22526
22527 pm_buffer_t parser_buffer;
22528 pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, stream_feof, &options);
22529 pm_serialize_header(buffer);
22530 pm_serialize_content(&parser, node, buffer);
22531 pm_buffer_append_byte(buffer, '\0');
22532
22533 pm_node_destroy(&parser, node);
22534 pm_buffer_free(&parser_buffer);
22535 pm_parser_free(&parser);
22536 pm_options_free(&options);
22537}
22538
22543pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22544 pm_options_t options = { 0 };
22545 pm_options_read(&options, data);
22546
22547 pm_parser_t parser;
22548 pm_parser_init(&parser, source, size, &options);
22549
22550 pm_node_t *node = pm_parse(&parser);
22551 pm_serialize_header(buffer);
22552 pm_serialize_encoding(parser.encoding, buffer);
22553 pm_buffer_append_varsint(buffer, parser.start_line);
22554 pm_serialize_comment_list(&parser.comment_list, buffer);
22555
22556 pm_node_destroy(&parser, node);
22557 pm_parser_free(&parser);
22558 pm_options_free(&options);
22559}
22560
22561#endif
22562
22563/******************************************************************************/
22564/* Slice queries for the Ruby API */
22565/******************************************************************************/
22566
22568typedef enum {
22570 PM_SLICE_TYPE_ERROR = -1,
22571
22573 PM_SLICE_TYPE_NONE,
22574
22576 PM_SLICE_TYPE_LOCAL,
22577
22579 PM_SLICE_TYPE_CONSTANT,
22580
22582 PM_SLICE_TYPE_METHOD_NAME
22583} pm_slice_type_t;
22584
22588pm_slice_type_t
22589pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
22590 // first, get the right encoding object
22591 const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
22592 if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
22593
22594 // check that there is at least one character
22595 if (length == 0) return PM_SLICE_TYPE_NONE;
22596
22597 size_t width;
22598 if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
22599 // valid because alphabetical
22600 } else if (*source == '_') {
22601 // valid because underscore
22602 width = 1;
22603 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
22604 // valid because multibyte
22605 } else {
22606 // invalid because no match
22607 return PM_SLICE_TYPE_NONE;
22608 }
22609
22610 // determine the type of the slice based on the first character
22611 const uint8_t *end = source + length;
22612 pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
22613
22614 // next, iterate through all of the bytes of the string to ensure that they
22615 // are all valid identifier characters
22616 source += width;
22617
22618 while (source < end) {
22619 if ((width = encoding->alnum_char(source, end - source)) != 0) {
22620 // valid because alphanumeric
22621 source += width;
22622 } else if (*source == '_') {
22623 // valid because underscore
22624 source++;
22625 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
22626 // valid because multibyte
22627 source += width;
22628 } else {
22629 // invalid because no match
22630 break;
22631 }
22632 }
22633
22634 // accept a ! or ? at the end of the slice as a method name
22635 if (*source == '!' || *source == '?' || *source == '=') {
22636 source++;
22637 result = PM_SLICE_TYPE_METHOD_NAME;
22638 }
22639
22640 // valid if we are at the end of the slice
22641 return source == end ? result : PM_SLICE_TYPE_NONE;
22642}
22643
22648pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
22649 switch (pm_slice_type(source, length, encoding_name)) {
22650 case PM_SLICE_TYPE_ERROR:
22651 return PM_STRING_QUERY_ERROR;
22652 case PM_SLICE_TYPE_NONE:
22653 case PM_SLICE_TYPE_CONSTANT:
22654 case PM_SLICE_TYPE_METHOD_NAME:
22655 return PM_STRING_QUERY_FALSE;
22656 case PM_SLICE_TYPE_LOCAL:
22657 return PM_STRING_QUERY_TRUE;
22658 }
22659
22660 assert(false && "unreachable");
22661 return PM_STRING_QUERY_FALSE;
22662}
22663
22668pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
22669 switch (pm_slice_type(source, length, encoding_name)) {
22670 case PM_SLICE_TYPE_ERROR:
22671 return PM_STRING_QUERY_ERROR;
22672 case PM_SLICE_TYPE_NONE:
22673 case PM_SLICE_TYPE_LOCAL:
22674 case PM_SLICE_TYPE_METHOD_NAME:
22675 return PM_STRING_QUERY_FALSE;
22676 case PM_SLICE_TYPE_CONSTANT:
22677 return PM_STRING_QUERY_TRUE;
22678 }
22679
22680 assert(false && "unreachable");
22681 return PM_STRING_QUERY_FALSE;
22682}
22683
22688pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
22689#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
22690#define C1(c) (*source == c)
22691#define C2(s) (memcmp(source, s, 2) == 0)
22692#define C3(s) (memcmp(source, s, 3) == 0)
22693
22694 switch (pm_slice_type(source, length, encoding_name)) {
22695 case PM_SLICE_TYPE_ERROR:
22696 return PM_STRING_QUERY_ERROR;
22697 case PM_SLICE_TYPE_NONE:
22698 break;
22699 case PM_SLICE_TYPE_LOCAL:
22700 // numbered parameters are not valid method names
22701 return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
22702 case PM_SLICE_TYPE_CONSTANT:
22703 // all constants are valid method names
22704 case PM_SLICE_TYPE_METHOD_NAME:
22705 // all method names are valid method names
22706 return PM_STRING_QUERY_TRUE;
22707 }
22708
22709 switch (length) {
22710 case 1:
22711 return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
22712 case 2:
22713 return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
22714 case 3:
22715 return B(C3("===") || C3("<=>") || C3("[]="));
22716 default:
22717 return PM_STRING_QUERY_FALSE;
22718 }
22719
22720#undef B
22721#undef C1
22722#undef C2
22723#undef C3
22724}
pm_diagnostic_id_t
The diagnostic IDs of all of the diagnostics, used to communicate the types of errors between the par...
Definition diagnostic.h:29
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
#define xcalloc
Old name of ruby_xcalloc.
Definition xmalloc.h:55
VALUE type(ANYARGS)
ANYARGS-ed function type.
PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options)
Free the internal memory associated with the options.
Definition options.c:218
PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index)
Return a pointer to the local at the given index within the given scope.
Definition options.c:202
PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm_options_t *options, size_t index)
Return a pointer to the scope at the given index within the given options.
Definition options.c:182
static const uint8_t PM_OPTIONS_COMMAND_LINE_N
A bit representing whether or not the command line -n option was set.
Definition options.h:228
#define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
String literals should be made frozen.
Definition options.h:20
#define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
String literals should be made mutable.
Definition options.h:31
#define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
String literals may be frozen or mutable depending on the implementation default.
Definition options.h:26
static const uint8_t PM_OPTIONS_COMMAND_LINE_P
A bit representing whether or not the command line -p option was set.
Definition options.h:234
@ PM_OPTIONS_VERSION_CRUBY_3_3
The vendored version of prism in CRuby 3.3.x.
Definition options.h:92
@ PM_OPTIONS_VERSION_LATEST
The current version of prism.
Definition options.h:107
@ PM_OPTIONS_VERSION_CRUBY_4_1
The vendored version of prism in CRuby 4.1.x.
Definition options.h:104
@ PM_OPTIONS_VERSION_UNSET
If an explicit version is not provided, the current version of prism will be used.
Definition options.h:89
@ PM_OPTIONS_VERSION_CRUBY_3_4
The vendored version of prism in CRuby 3.4.x.
Definition options.h:95
@ PM_OPTIONS_VERSION_CRUBY_4_0
The vendored version of prism in CRuby 4.0.x.
Definition options.h:101
pm_heredoc_indent_t
The type of indentation that a heredoc uses.
Definition parser.h:79
struct pm_context_node pm_context_node_t
This is a node in a linked list of contexts.
#define PM_LEX_STACK_SIZE
We pre-allocate a certain number of lex states in order to avoid having to call malloc too many times...
Definition parser.h:262
struct pm_parser pm_parser_t
The parser used to parse Ruby source.
Definition parser.h:267
struct pm_comment pm_comment_t
This is a node in the linked list of comments that we've found while parsing.
pm_lex_state_t
This enum combines the various bits from the above enum into individual values that represent the var...
Definition parser.h:46
struct pm_scope pm_scope_t
This struct represents a node in a linked list of scopes.
pm_heredoc_quote_t
The type of quote that a heredoc uses.
Definition parser.h:69
void(* pm_encoding_changed_callback_t)(pm_parser_t *parser)
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:493
pm_context_t
While parsing, we keep track of a stack of contexts.
Definition parser.h:274
@ PM_CONTEXT_CLASS_RESCUE
a rescue statement within a class statement
Definition parser.h:324
@ PM_CONTEXT_ELSIF
an elsif clause
Definition parser.h:351
@ PM_CONTEXT_DEF_RESCUE
a rescue statement within a method definition
Definition parser.h:336
@ PM_CONTEXT_ELSE
an else clause
Definition parser.h:348
@ PM_CONTEXT_FOR_INDEX
a for loop's index
Definition parser.h:360
@ PM_CONTEXT_CASE_WHEN
a case when statements
Definition parser.h:309
@ PM_CONTEXT_BLOCK_RESCUE
a rescue statement within a do..end block
Definition parser.h:306
@ PM_CONTEXT_MODULE
a module declaration
Definition parser.h:387
@ PM_CONTEXT_DEF_PARAMS
a method definition's parameters
Definition parser.h:339
@ PM_CONTEXT_CASE_IN
a case in statements
Definition parser.h:312
@ PM_CONTEXT_BLOCK_ELSE
a rescue else statement within a do..end block
Definition parser.h:300
@ PM_CONTEXT_LOOP_PREDICATE
the predicate clause of a loop statement
Definition parser.h:381
@ PM_CONTEXT_SCLASS
a singleton class definition
Definition parser.h:417
@ PM_CONTEXT_UNLESS
an unless statement
Definition parser.h:432
@ PM_CONTEXT_POSTEXE
an END block
Definition parser.h:405
@ PM_CONTEXT_IF
an if statement
Definition parser.h:363
@ PM_CONTEXT_MULTI_TARGET
a multiple target expression
Definition parser.h:399
@ PM_CONTEXT_LAMBDA_RESCUE
a rescue statement within a lambda expression
Definition parser.h:378
@ PM_CONTEXT_BEGIN_ELSE
a rescue else statement with an explicit begin
Definition parser.h:285
@ PM_CONTEXT_NONE
a null context, used for returning a value from a function
Definition parser.h:276
@ PM_CONTEXT_CLASS_ELSE
a rescue else statement within a class statement
Definition parser.h:321
@ PM_CONTEXT_LAMBDA_ENSURE
an ensure statement within a lambda expression
Definition parser.h:372
@ PM_CONTEXT_BLOCK_ENSURE
an ensure statement within a do..end block
Definition parser.h:297
@ PM_CONTEXT_CLASS_ENSURE
an ensure statement within a class statement
Definition parser.h:318
@ PM_CONTEXT_LAMBDA_BRACES
a lambda expression with braces
Definition parser.h:366
@ PM_CONTEXT_MODULE_ELSE
a rescue else statement within a module statement
Definition parser.h:393
@ PM_CONTEXT_PARENS
a parenthesized expression
Definition parser.h:402
@ PM_CONTEXT_BLOCK_BRACES
expressions in block arguments using braces
Definition parser.h:291
@ PM_CONTEXT_BLOCK_PARAMETERS
expressions in block parameters foo do |...| end
Definition parser.h:303
@ PM_CONTEXT_DEF_ENSURE
an ensure statement within a method definition
Definition parser.h:330
@ PM_CONTEXT_SCLASS_RESCUE
a rescue statement with a singleton class
Definition parser.h:426
@ PM_CONTEXT_PREEXE
a BEGIN block
Definition parser.h:411
@ PM_CONTEXT_DEFINED
a defined? expression
Definition parser.h:342
@ PM_CONTEXT_MODULE_ENSURE
an ensure statement within a module statement
Definition parser.h:390
@ PM_CONTEXT_BEGIN_RESCUE
a rescue statement with an explicit begin
Definition parser.h:288
@ PM_CONTEXT_UNTIL
an until statement
Definition parser.h:435
@ PM_CONTEXT_DEF_ELSE
a rescue else statement within a method definition
Definition parser.h:333
@ PM_CONTEXT_FOR
a for loop
Definition parser.h:357
@ PM_CONTEXT_PREDICATE
a predicate inside an if/elsif/unless statement
Definition parser.h:408
@ PM_CONTEXT_BEGIN_ENSURE
an ensure statement with an explicit begin
Definition parser.h:282
@ PM_CONTEXT_SCLASS_ENSURE
an ensure statement with a singleton class
Definition parser.h:420
@ PM_CONTEXT_DEFAULT_PARAMS
a method definition's default parameter
Definition parser.h:345
@ PM_CONTEXT_LAMBDA_ELSE
a rescue else statement within a lambda expression
Definition parser.h:375
@ PM_CONTEXT_CLASS
a class declaration
Definition parser.h:315
@ PM_CONTEXT_MAIN
the top level context
Definition parser.h:384
@ PM_CONTEXT_LAMBDA_DO_END
a lambda expression with do..end
Definition parser.h:369
@ PM_CONTEXT_BEGIN
a begin statement
Definition parser.h:279
@ PM_CONTEXT_RESCUE_MODIFIER
a modifier rescue clause
Definition parser.h:414
@ PM_CONTEXT_EMBEXPR
an interpolated expression
Definition parser.h:354
@ PM_CONTEXT_TERNARY
a ternary expression
Definition parser.h:429
@ PM_CONTEXT_DEF
a method definition
Definition parser.h:327
@ PM_CONTEXT_SCLASS_ELSE
a rescue else statement with a singleton class
Definition parser.h:423
@ PM_CONTEXT_MODULE_RESCUE
a rescue statement within a module statement
Definition parser.h:396
@ PM_CONTEXT_BLOCK_KEYWORDS
expressions in block arguments using do..end
Definition parser.h:294
@ PM_CONTEXT_WHILE
a while statement
Definition parser.h:438
uint8_t pm_scope_parameters_t
The flags about scope parameters that can be set.
Definition parser.h:563
uint8_t pm_shareable_constant_value_t
The type of shareable constant value that can be set.
Definition parser.h:519
pm_comment_type_t
This is the type of a comment that we've found while parsing.
Definition parser.h:451
void pm_buffer_free(pm_buffer_t *buffer)
Free the memory associated with the buffer.
Definition pm_buffer.c:356
bool pm_buffer_init(pm_buffer_t *buffer)
Initialize a pm_buffer_t with its default values.
Definition pm_buffer.c:27
size_t pm_buffer_length(const pm_buffer_t *buffer)
Return the length of the buffer.
Definition pm_buffer.c:43
char * pm_buffer_value(const pm_buffer_t *buffer)
Return the value of the buffer.
Definition pm_buffer.c:35
#define PM_CONSTANT_ID_UNSET
When we allocate constants into the pool, we reserve 0 to mean that the slot is not yet filled.
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string)
Returns the length associated with the string.
Definition pm_string.c:352
PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string)
Returns the start pointer associated with the string.
Definition pm_string.c:360
PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string)
Free the associated memory of the given string.
Definition pm_string.c:368
#define PM_STRING_EMPTY
Defines an empty string.
Definition pm_string.h:70
#define PRISM_FALLTHROUGH
We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
Definition defines.h:274
#define PRISM_UNLIKELY(x)
The compiler should predicate that this branch will not be taken.
Definition defines.h:258
#define PRISM_ATTRIBUTE_UNUSED
GCC will warn if you specify a function or parameter that is unused at runtime.
Definition defines.h:81
#define PRISM_DEPTH_MAXIMUM
When we are parsing using recursive descent, we want to protect against malicious payloads that could...
Definition defines.h:37
#define PM_STATIC_ASSERT(line, condition, message)
We want to be able to use static assertions, but they weren't standardized until C11.
Definition defines.h:116
#define PRISM_EXPORTED_FUNCTION
By default, we compile with -fvisibility=hidden.
Definition defines.h:53
#define PM_ENCODING_US_ASCII_ENTRY
This is the US-ASCII encoding.
Definition encoding.h:252
#define PM_ENCODING_UTF_8_ENTRY
This is the default UTF-8 encoding.
Definition encoding.h:245
#define PRISM_ENCODING_ALPHABETIC_BIT
All of the lookup tables use the first bit of each embedded byte to indicate whether the codepoint is...
Definition encoding.h:68
#define PRISM_ENCODING_ALPHANUMERIC_BIT
All of the lookup tables use the second bit of each embedded byte to indicate whether the codepoint i...
Definition encoding.h:74
#define PM_NODE_LIST_FOREACH(list, index, node)
Loop through each node in the node list, writing each node to the given pm_node_t pointer.
Definition node.h:17
#define PRISM_VERSION
The version of the Prism library as a constant string.
Definition version.h:27
#define PRISM_VERSION_PATCH
The patch version of the Prism library as an int.
Definition version.h:22
#define PRISM_VERSION_MINOR
The minor version of the Prism library as an int.
Definition version.h:17
#define PRISM_VERSION_MAJOR
The major version of the Prism library as an int.
Definition version.h:12
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser)
Parse the Ruby source associated with the given parser and return the tree.
Definition prism.c:22359
PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback)
Register a callback that will be called whenever prism changes the encoding it is using to parse base...
Definition prism.c:22295
PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser)
Free any memory associated with the given parser.
Definition prism.c:22333
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options)
Parse a stream of Ruby source and return the tree.
Definition prism.c:22424
PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options)
Initialize a parser with the given start and end pointers.
Definition prism.c:22034
The main header file for the prism parser.
pm_string_query_t
Represents the results of a slice query.
Definition prism.h:267
@ PM_STRING_QUERY_TRUE
Returned if the result of the slice query is true.
Definition prism.h:275
@ PM_STRING_QUERY_ERROR
Returned if the encoding given to a slice query was invalid.
Definition prism.h:269
@ PM_STRING_QUERY_FALSE
Returned if the result of the slice query is false.
Definition prism.h:272
void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
Serialize the encoding, metadata, nodes, and constant pool.
Definition serialize.c:2147
char *() pm_parse_stream_fgets_t(char *string, int size, void *stream)
This function is used in pm_parse_stream() to retrieve a line of input from a stream.
Definition prism.h:105
void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer)
Serialize the name of the encoding to the buffer.
Definition serialize.c:2124
void pm_serialize_comment_list(pm_list_t *list, pm_buffer_t *buffer)
Serialize the given list of comments to the given buffer.
Definition serialize.c:2054
int() pm_parse_stream_feof_t(void *stream)
This function is used in pm_parse_stream to check whether a stream is EOF.
Definition prism.h:112
const char * pm_token_type_human(pm_token_type_t token_type)
Returns the human name of the given token type.
Definition token_type.c:358
This struct is used to pass information between the regular expression parser and the error callback.
Definition prism.c:17447
pm_parser_t * parser
The parser that we are parsing the regular expression for.
Definition prism.c:17449
const uint8_t * start
The start of the regular expression.
Definition prism.c:17452
bool shared
Whether or not the source of the regular expression is shared.
Definition prism.c:17463
const uint8_t * end
The end of the regular expression.
Definition prism.c:17455
This struct is used to pass information between the regular expression parser and the named capture c...
Definition prism.c:20412
pm_constant_id_list_t names
The list of names that have been parsed.
Definition prism.c:20423
pm_parser_t * parser
The parser that is parsing the regular expression.
Definition prism.c:20414
pm_match_write_node_t * match
The match write node that is being created.
Definition prism.c:20420
pm_call_node_t * call
The call node wrapping the regular expression node.
Definition prism.c:20417
bool shared
Whether the content of the regular expression is shared.
Definition prism.c:20430
AndNode.
Definition ast.h:1277
PM_NODE_ALIGNAS struct pm_node * left
AndNode::left.
Definition ast.h:1292
PM_NODE_ALIGNAS struct pm_node * right
AndNode::right.
Definition ast.h:1305
ArgumentsNode.
Definition ast.h:1337
pm_node_t base
The embedded base node.
Definition ast.h:1339
struct pm_node_list arguments
ArgumentsNode::arguments.
Definition ast.h:1349
This is a special out parameter to the parse_arguments_list function that includes opening and closin...
Definition prism.c:1658
pm_node_t * block
The optional block attached to the call.
Definition prism.c:1669
bool has_forwarding
The flag indicating whether this arguments list has forwarding argument.
Definition prism.c:1672
pm_location_t opening_loc
The optional location of the opening parenthesis or bracket.
Definition prism.c:1660
pm_arguments_node_t * arguments
The lazily-allocated optional arguments node.
Definition prism.c:1663
pm_location_t closing_loc
The optional location of the closing parenthesis or bracket.
Definition prism.c:1666
ArrayNode.
Definition ast.h:1367
struct pm_node_list elements
ArrayNode::elements.
Definition ast.h:1376
ArrayPatternNode.
Definition ast.h:1427
PM_NODE_ALIGNAS struct pm_node * constant
ArrayPatternNode::constant.
Definition ast.h:1445
pm_location_t opening_loc
ArrayPatternNode::opening_loc.
Definition ast.h:1485
pm_location_t closing_loc
ArrayPatternNode::closing_loc.
Definition ast.h:1495
AssocNode.
Definition ast.h:1510
PM_NODE_ALIGNAS struct pm_node * value
AssocNode::value.
Definition ast.h:1541
PM_NODE_ALIGNAS struct pm_node * key
AssocNode::key.
Definition ast.h:1528
BeginNode.
Definition ast.h:1633
PM_NODE_ALIGNAS struct pm_else_node * else_clause
BeginNode::else_clause.
Definition ast.h:1675
PM_NODE_ALIGNAS struct pm_ensure_node * ensure_clause
BeginNode::ensure_clause.
Definition ast.h:1685
PM_NODE_ALIGNAS struct pm_statements_node * statements
BeginNode::statements.
Definition ast.h:1655
PM_NODE_ALIGNAS struct pm_rescue_node * rescue_clause
BeginNode::rescue_clause.
Definition ast.h:1665
pm_node_t base
The embedded base node.
Definition ast.h:1635
This struct represents a set of binding powers used for a given token.
Definition prism.c:12234
bool binary
Whether or not this token can be used as a binary operator.
Definition prism.c:12242
pm_binding_power_t left
The left binding power.
Definition prism.c:12236
bool nonassoc
Whether or not this token can be used as non-associative binary operator.
Definition prism.c:12248
pm_binding_power_t right
The right binding power.
Definition prism.c:12239
BlockLocalVariableNode.
Definition ast.h:1750
BlockNode.
Definition ast.h:1777
pm_location_t opening_loc
BlockNode::opening_loc.
Definition ast.h:1823
BlockParametersNode.
Definition ast.h:1905
A pm_buffer_t is a simple memory buffer that stores data in a contiguous block of memory.
Definition pm_buffer.h:22
size_t length
The length of the buffer in bytes.
Definition pm_buffer.h:24
char * value
A pointer to the start of the buffer.
Definition pm_buffer.h:30
CallNode.
Definition ast.h:2129
pm_location_t opening_loc
CallNode::opening_loc.
Definition ast.h:2190
pm_location_t closing_loc
CallNode::closing_loc.
Definition ast.h:2210
pm_constant_id_t name
CallNode::name.
Definition ast.h:2170
PM_NODE_ALIGNAS struct pm_arguments_node * arguments
CallNode::arguments.
Definition ast.h:2200
pm_location_t equal_loc
CallNode::equal_loc.
Definition ast.h:2223
pm_location_t call_operator_loc
CallNode::call_operator_loc.
Definition ast.h:2160
pm_location_t message_loc
CallNode::message_loc.
Definition ast.h:2180
PM_NODE_ALIGNAS struct pm_node * block
CallNode::block.
Definition ast.h:2233
PM_NODE_ALIGNAS struct pm_node * receiver
CallNode::receiver.
Definition ast.h:2147
CaseMatchNode.
Definition ast.h:2564
struct pm_node_list conditions
CaseMatchNode::conditions.
Definition ast.h:2586
PM_NODE_ALIGNAS struct pm_else_node * else_clause
CaseMatchNode::else_clause.
Definition ast.h:2596
CaseNode.
Definition ast.h:2633
PM_NODE_ALIGNAS struct pm_else_node * else_clause
CaseNode::else_clause.
Definition ast.h:2665
struct pm_node_list conditions
CaseNode::conditions.
Definition ast.h:2655
ClassVariableReadNode.
Definition ast.h:2922
ClassVariableTargetNode.
Definition ast.h:2950
ClassVariableWriteNode.
Definition ast.h:2972
This is a node in the linked list of comments that we've found while parsing.
Definition parser.h:461
pm_comment_type_t type
The type of comment that we've found.
Definition parser.h:469
pm_location_t location
The location of the comment in the source.
Definition parser.h:466
A list of constant IDs.
size_t size
The number of constant ids in the list.
ConstantPathNode.
Definition ast.h:3181
ConstantPathTargetNode.
Definition ast.h:3316
ConstantReadNode.
Definition ast.h:3409
A constant in the pool which effectively stores a string.
size_t length
The length of the string.
const uint8_t * start
A pointer to the start of the string.
ConstantTargetNode.
Definition ast.h:3437
ConstantWriteNode.
Definition ast.h:3459
This is a node in a linked list of contexts.
Definition parser.h:442
pm_context_t context
The context that this node represents.
Definition parser.h:444
struct pm_context_node * prev
A pointer to the previous context in the linked list.
Definition parser.h:447
ElseNode.
Definition ast.h:3635
PM_NODE_ALIGNAS struct pm_statements_node * statements
ElseNode::statements.
Definition ast.h:3647
This struct defines the functions necessary to implement the encoding interface so we can determine h...
Definition encoding.h:23
size_t(* alpha_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphab...
Definition encoding.h:36
size_t(* char_width)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding.
Definition encoding.h:29
bool(* isupper_char)(const uint8_t *b, ptrdiff_t n)
Return true if the next character is valid in the encoding and is an uppercase character.
Definition encoding.h:50
const char * name
The name of the encoding.
Definition encoding.h:56
size_t(* alnum_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphan...
Definition encoding.h:43
EnsureNode.
Definition ast.h:3730
PM_NODE_ALIGNAS struct pm_statements_node * statements
EnsureNode::statements.
Definition ast.h:3742
FindPatternNode.
Definition ast.h:3788
pm_location_t opening_loc
FindPatternNode::opening_loc.
Definition ast.h:3852
PM_NODE_ALIGNAS struct pm_node * constant
FindPatternNode::constant.
Definition ast.h:3800
pm_location_t closing_loc
FindPatternNode::closing_loc.
Definition ast.h:3865
FlipFlopNode.
Definition ast.h:3883
FloatNode.
Definition ast.h:3915
double value
FloatNode::value.
Definition ast.h:3924
pm_node_t base
The embedded base node.
Definition ast.h:3917
ForwardingParameterNode.
Definition ast.h:4048
GlobalVariableReadNode.
Definition ast.h:4210
GlobalVariableTargetNode.
Definition ast.h:4238
GlobalVariableWriteNode.
Definition ast.h:4260
HashNode.
Definition ast.h:4321
struct pm_node_list elements
HashNode::elements.
Definition ast.h:4346
HashPatternNode.
Definition ast.h:4380
PM_NODE_ALIGNAS struct pm_node * constant
HashPatternNode::constant.
Definition ast.h:4395
pm_location_t opening_loc
HashPatternNode::opening_loc.
Definition ast.h:4434
pm_location_t closing_loc
HashPatternNode::closing_loc.
Definition ast.h:4447
All of the information necessary to store to lexing a heredoc.
Definition parser.h:88
size_t ident_length
The length of the heredoc identifier.
Definition parser.h:93
pm_heredoc_quote_t quote
The type of quote that the heredoc uses.
Definition parser.h:96
pm_heredoc_indent_t indent
The type of indentation that the heredoc uses.
Definition parser.h:99
const uint8_t * ident_start
A pointer to the start of the heredoc identifier.
Definition parser.h:90
IfNode.
Definition ast.h:4468
PM_NODE_ALIGNAS struct pm_statements_node * statements
IfNode::statements.
Definition ast.h:4527
PM_NODE_ALIGNAS struct pm_node * subsequent
IfNode::subsequent.
Definition ast.h:4546
ImaginaryNode.
Definition ast.h:4573
InNode.
Definition ast.h:4649
PM_NODE_ALIGNAS struct pm_statements_node * statements
InNode::statements.
Definition ast.h:4661
InstanceVariableReadNode.
Definition ast.h:5052
InstanceVariableTargetNode.
Definition ast.h:5080
InstanceVariableWriteNode.
Definition ast.h:5102
IntegerNode.
Definition ast.h:5169
pm_integer_t value
IntegerNode::value.
Definition ast.h:5178
pm_node_t base
The embedded base node.
Definition ast.h:5171
bool negative
Whether or not the integer is negative.
Definition pm_integer.h:42
InterpolatedMatchLastLineNode.
Definition ast.h:5206
InterpolatedRegularExpressionNode.
Definition ast.h:5251
InterpolatedStringNode.
Definition ast.h:5287
pm_node_t base
The embedded base node.
Definition ast.h:5289
pm_location_t opening_loc
InterpolatedStringNode::opening_loc.
Definition ast.h:5294
InterpolatedSymbolNode.
Definition ast.h:5319
InterpolatedXStringNode.
Definition ast.h:5351
pm_location_t opening_loc
InterpolatedXStringNode::opening_loc.
Definition ast.h:5358
pm_node_t base
The embedded base node.
Definition ast.h:5353
struct pm_node_list parts
InterpolatedXStringNode::parts.
Definition ast.h:5363
KeywordHashNode.
Definition ast.h:5420
void(* callback)(void *data, pm_parser_t *parser, pm_token_t *token)
This is the callback that is called when a token is lexed.
Definition parser.h:515
void * data
This opaque pointer is used to provide whatever information the user deemed necessary to the callback...
Definition parser.h:509
When lexing Ruby source, the lexer has a small amount of state to tell which kind of token it is curr...
Definition parser.h:109
uint8_t terminator
This is the terminator of the list literal.
Definition parser.h:165
size_t nesting
This keeps track of the nesting level of the list.
Definition parser.h:153
bool interpolation
Whether or not interpolation is allowed in this list.
Definition parser.h:156
uint8_t incrementor
When lexing a list, it takes into account balancing the terminator if the terminator is one of (),...
Definition parser.h:162
enum pm_lex_mode::@98 mode
The type of this lex mode.
uint8_t breakpoints[11]
This is the character set that should be used to delimit the tokens within the list.
Definition parser.h:171
pm_heredoc_lex_mode_t base
All of the data necessary to lex a heredoc.
Definition parser.h:233
bool line_continuation
True if the previous token ended with a line continuation.
Definition parser.h:249
struct pm_lex_mode * prev
The previous lex state so that it knows how to pop.
Definition parser.h:254
bool label_allowed
Whether or not at the end of the string we should allow a :, which would indicate this was a dynamic ...
Definition parser.h:208
const uint8_t * next_start
This is the pointer to the character where lexing should resume once the heredoc has been completely ...
Definition parser.h:239
union pm_lex_mode::@99 as
The data associated with this type of lex mode.
size_t * common_whitespace
This is used to track the amount of common whitespace on each line so that we know how much to dedent...
Definition parser.h:246
int32_t line
The line number.
uint32_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
This struct represents an abstract linked list that provides common functionality.
Definition pm_list.h:46
struct pm_list_node * next
A pointer to the next node in the list.
Definition pm_list.h:48
This represents the overall linked list.
Definition pm_list.h:55
pm_list_node_t * head
A pointer to the head of the list.
Definition pm_list.h:60
size_t size
The size of the list.
Definition pm_list.h:57
This tracks an individual local variable in a certain lexical context, as well as the number of times...
Definition parser.h:529
pm_constant_id_t name
The name of the local variable.
Definition parser.h:531
pm_location_t location
The location of the local variable in the source.
Definition parser.h:534
uint32_t hash
The hash of the local variable.
Definition parser.h:543
uint32_t index
The index of the local variable in the local table.
Definition parser.h:537
uint32_t reads
The number of times the local variable is read.
Definition parser.h:540
LocalVariableReadNode.
Definition ast.h:5656
uint32_t depth
LocalVariableReadNode::depth.
Definition ast.h:5686
pm_constant_id_t name
LocalVariableReadNode::name.
Definition ast.h:5673
LocalVariableTargetNode.
Definition ast.h:5704
LocalVariableWriteNode.
Definition ast.h:5731
uint32_t depth
LocalVariableWriteNode::depth.
Definition ast.h:5757
pm_constant_id_t name
LocalVariableWriteNode::name.
Definition ast.h:5744
This is a set of local variables in a certain lexical context (method, class, module,...
Definition parser.h:551
pm_local_t * locals
The nullable allocated memory for the local variables in the set.
Definition parser.h:559
uint32_t capacity
The capacity of the local variables set.
Definition parser.h:556
uint32_t size
The number of local variables in the set.
Definition parser.h:553
This struct represents a slice in the source code, defined by an offset and a length.
Definition ast.h:540
uint32_t start
The offset of the location from the start of the source.
Definition ast.h:542
uint32_t length
The length of the location.
Definition ast.h:545
This is a node in the linked list of magic comments that we've found while parsing.
Definition parser.h:478
MatchLastLineNode.
Definition ast.h:5822
MatchWriteNode.
Definition ast.h:5977
struct pm_node_list targets
MatchWriteNode::targets.
Definition ast.h:5989
MissingNode.
Definition ast.h:6001
MultiTargetNode.
Definition ast.h:6070
pm_location_t lparen_loc
MultiTargetNode::lparen_loc.
Definition ast.h:6127
struct pm_node_list lefts
MultiTargetNode::lefts.
Definition ast.h:6087
pm_location_t rparen_loc
MultiTargetNode::rparen_loc.
Definition ast.h:6137
MultiWriteNode.
Definition ast.h:6152
A list of nodes in the source, most often used for lists of children.
Definition ast.h:553
size_t size
The number of nodes in the list.
Definition ast.h:555
struct pm_node ** nodes
The nodes in the list.
Definition ast.h:561
size_t capacity
The capacity of the list that has been allocated.
Definition ast.h:558
This is the base structure that represents a node in the syntax tree.
Definition ast.h:1051
pm_node_type_t type
This represents the type of the node.
Definition ast.h:1056
pm_location_t location
This is the location of the node in the source.
Definition ast.h:1074
OptionalParameterNode.
Definition ast.h:6446
A scope of locals surrounding the code that is being parsed.
Definition options.h:36
size_t locals_count
The number of locals in the scope.
Definition options.h:38
uint8_t forwarding
Flags for the set of forwarding parameters in this scope.
Definition options.h:44
The options that can be passed to the parser.
Definition options.h:113
uint8_t command_line
A bitset of the various options that were set on the command line.
Definition options.h:162
void * shebang_callback_data
Any additional data that should be passed along to the shebang callback if one was set.
Definition options.h:124
bool encoding_locked
Whether or not the encoding magic comments should be respected.
Definition options.h:178
bool main_script
When the file being parsed is the main script, the shebang will be considered for command-line flags ...
Definition options.h:185
pm_string_t encoding
The name of the encoding that the source file is in.
Definition options.h:139
int32_t line
The line within the file that the parse starts on.
Definition options.h:133
pm_options_shebang_callback_t shebang_callback
The callback to call when additional switches are found in a shebang comment.
Definition options.h:118
int8_t frozen_string_literal
Whether or not the frozen string literal option has been set.
Definition options.h:171
bool partial_script
When the file being parsed is considered a "partial" script, jumps will not be marked as errors if th...
Definition options.h:195
size_t scopes_count
The number of scopes surrounding the code that is being parsed.
Definition options.h:144
pm_string_t filepath
The name of the file that is currently being parsed.
Definition options.h:127
pm_options_version_t version
The version of prism that we should be parsing with.
Definition options.h:159
OrNode.
Definition ast.h:6483
PM_NODE_ALIGNAS struct pm_node * right
OrNode::right.
Definition ast.h:6511
PM_NODE_ALIGNAS struct pm_node * left
OrNode::left.
Definition ast.h:6498
ParametersNode.
Definition ast.h:6537
PM_NODE_ALIGNAS struct pm_node * block
ParametersNode::block.
Definition ast.h:6574
PM_NODE_ALIGNAS struct pm_node * rest
ParametersNode::rest.
Definition ast.h:6554
PM_NODE_ALIGNAS struct pm_node * keyword_rest
ParametersNode::keyword_rest.
Definition ast.h:6569
ParenthesesNode.
Definition ast.h:6592
PM_NODE_ALIGNAS struct pm_node * body
ParenthesesNode::body.
Definition ast.h:6599
This struct represents the overall parser.
Definition parser.h:637
const pm_encoding_t * explicit_encoding
When a string-like expression is being lexed, any byte or escape sequence that resolves to a value wh...
Definition parser.h:837
pm_lex_state_t lex_state
The current state of the lexer.
Definition parser.h:646
uint8_t command_line
The command line flags given from the options.
Definition parser.h:856
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
Definition parser.h:752
bool partial_script
Whether or not we are parsing a "partial" script, which is a script that will be evaluated in the con...
Definition parser.h:879
bool pattern_matching_newlines
This flag indicates that we are currently parsing a pattern matching expression and impacts that calc...
Definition parser.h:906
const uint8_t * end
The pointer to the end of the source.
Definition parser.h:691
bool recovering
Whether or not we're currently recovering from a syntax error.
Definition parser.h:885
pm_node_flags_t integer_base
We want to add a flag to integer nodes that indicates their base.
Definition parser.h:794
bool warn_mismatched_indentation
By default, Ruby always warns about mismatched indentation.
Definition parser.h:927
pm_constant_pool_t constant_pool
This constant pool keeps all of the constants defined throughout the file so that we can reference th...
Definition parser.h:783
bool in_keyword_arg
This flag indicates that we are currently parsing a keyword argument.
Definition parser.h:909
const uint8_t * next_start
This is a special field set on the parser when we need the parser to jump to a specific location when...
Definition parser.h:704
pm_static_literals_t * current_hash_keys
The hash keys for the hash that is currently being parsed.
Definition parser.h:746
pm_list_t magic_comment_list
The list of magic comments that have been found while parsing.
Definition parser.h:718
int lambda_enclosure_nesting
Used to temporarily track the nesting of enclosures to determine if a { is the beginning of a lambda ...
Definition parser.h:655
pm_lex_callback_t * lex_callback
This is an optional callback that can be attached to the parser that will be called whenever a new to...
Definition parser.h:771
pm_options_version_t version
The version of prism that we should use to parse.
Definition parser.h:853
pm_token_t previous
The previous token we were considering.
Definition parser.h:694
pm_string_t current_string
This string is used to pass information from the lexer to the parser.
Definition parser.h:800
bool parsing_eval
Whether or not we are parsing an eval string.
Definition parser.h:872
bool current_regular_expression_ascii_only
True if the current regular expression being lexed contains only ASCII characters.
Definition parser.h:921
bool encoding_changed
Whether or not the encoding has been changed by a magic comment.
Definition parser.h:900
pm_location_t data_loc
An optional location that represents the location of the END marker and the rest of the content of th...
Definition parser.h:725
pm_context_node_t * current_context
The current parsing context.
Definition parser.h:737
const uint8_t * start
The pointer to the start of the source.
Definition parser.h:688
int enclosure_nesting
Tracks the current nesting of (), [], and {}.
Definition parser.h:649
pm_line_offset_list_t line_offsets
This is the list of line offsets in the source file.
Definition parser.h:786
pm_list_t error_list
The list of errors that have been found while parsing.
Definition parser.h:731
int8_t frozen_string_literal
Whether or not we have found a frozen_string_literal magic comment with a true or false value.
Definition parser.h:866
pm_node_list_t * current_block_exits
When parsing block exits (e.g., break, next, redo), we need to validate that they are in correct cont...
Definition parser.h:850
const uint8_t * encoding_comment_start
This pointer indicates where a comment must start if it is to be considered an encoding comment.
Definition parser.h:765
pm_lex_mode_t stack[PM_LEX_STACK_SIZE]
The stack of lexer modes.
Definition parser.h:681
pm_list_t warning_list
The list of warnings that have been found while parsing.
Definition parser.h:728
const uint8_t * heredoc_end
This field indicates the end of a heredoc whose identifier was found on the current line.
Definition parser.h:712
int brace_nesting
Used to track the nesting of braces to ensure we get the correct value when we are interpolating bloc...
Definition parser.h:661
pm_encoding_changed_callback_t encoding_changed_callback
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:759
struct pm_parser::@104 lex_modes
A stack of lex modes.
int32_t start_line
The line number at the start of the parse.
Definition parser.h:806
bool encoding_locked
This is very specialized behavior for when you want to parse in a context that does not respect encod...
Definition parser.h:893
pm_lex_mode_t * current
The current mode of the lexer.
Definition parser.h:678
pm_list_t comment_list
The list of comments that have been found while parsing.
Definition parser.h:715
size_t index
The current index into the lexer mode stack.
Definition parser.h:684
pm_string_t filepath
This is the path of the file being parsed.
Definition parser.h:777
pm_scope_t * current_scope
The current local scope.
Definition parser.h:734
bool command_start
Whether or not we're at the beginning of a command.
Definition parser.h:882
bool semantic_token_seen
Whether or not the parser has seen a token that has semantic meaning (i.e., a token that is not a com...
Definition parser.h:915
uint32_t node_id
The next node identifier that will be assigned.
Definition parser.h:643
RangeNode.
Definition ast.h:6822
PM_NODE_ALIGNAS struct pm_node * right
RangeNode::right.
Definition ast.h:6851
PM_NODE_ALIGNAS struct pm_node * left
RangeNode::left.
Definition ast.h:6837
RationalNode.
Definition ast.h:6879
pm_node_t base
The embedded base node.
Definition ast.h:6881
pm_integer_t numerator
RationalNode::numerator.
Definition ast.h:6890
In order to properly set a regular expression's encoding and to validate the byte sequence for the un...
Definition prism.c:9544
pm_buffer_t regexp_buffer
The buffer holding the regexp source.
Definition prism.c:9549
pm_token_buffer_t base
The embedded base buffer.
Definition prism.c:9546
RegularExpressionNode.
Definition ast.h:6944
pm_string_t unescaped
RegularExpressionNode::unescaped.
Definition ast.h:6966
RequiredParameterNode.
Definition ast.h:7016
RescueModifierNode.
Definition ast.h:7038
PM_NODE_ALIGNAS struct pm_node * rescue_expression
RescueModifierNode::rescue_expression.
Definition ast.h:7055
RescueNode.
Definition ast.h:7075
PM_NODE_ALIGNAS struct pm_rescue_node * subsequent
RescueNode::subsequent.
Definition ast.h:7112
pm_location_t then_keyword_loc
RescueNode::then_keyword_loc.
Definition ast.h:7102
This struct represents a node in a linked list of scopes.
Definition parser.h:577
struct pm_scope * previous
A pointer to the previous scope in the linked list.
Definition parser.h:579
pm_node_list_t implicit_parameters
This is a list of the implicit parameters contained within the block.
Definition parser.h:590
pm_shareable_constant_value_t shareable_constant
The current state of constant shareability for this scope.
Definition parser.h:617
pm_locals_t locals
The IDs of the locals in the given scope.
Definition parser.h:582
pm_scope_parameters_t parameters
This is a bitfield that indicates the parameters that are being used in this scope.
Definition parser.h:611
bool closed
A boolean indicating whether or not this scope can see into its parent.
Definition parser.h:623
SplatNode.
Definition ast.h:7365
PM_NODE_ALIGNAS struct pm_node * expression
SplatNode::expression.
Definition ast.h:7377
StatementsNode.
Definition ast.h:7392
struct pm_node_list body
StatementsNode::body.
Definition ast.h:7399
pm_node_t base
The embedded base node.
Definition ast.h:7394
Certain sets of nodes (hash keys and when clauses) check for duplicate nodes to alert the user of pot...
StringNode.
Definition ast.h:7426
pm_node_t base
The embedded base node.
Definition ast.h:7428
pm_string_t unescaped
StringNode::unescaped.
Definition ast.h:7448
pm_location_t content_loc
StringNode::content_loc.
Definition ast.h:7438
pm_location_t closing_loc
StringNode::closing_loc.
Definition ast.h:7443
pm_location_t opening_loc
StringNode::opening_loc.
Definition ast.h:7433
A generic string type that can have various ownership semantics.
Definition pm_string.h:33
const uint8_t * source
A pointer to the start of the string.
Definition pm_string.h:35
size_t length
The length of the string in bytes of memory.
Definition pm_string.h:38
enum pm_string_t::@105 type
The type of the string.
SymbolNode.
Definition ast.h:7520
pm_location_t value_loc
SymbolNode::value_loc.
Definition ast.h:7532
pm_string_t unescaped
SymbolNode::unescaped.
Definition ast.h:7542
When we're lexing certain types (strings, symbols, lists, etc.) we have string content associated wit...
Definition prism.c:9518
pm_buffer_t buffer
The buffer that we're using to keep track of the string content.
Definition prism.c:9523
const uint8_t * cursor
The cursor into the source string that points to how far we have currently copied into the buffer.
Definition prism.c:9529
This struct represents a token in the Ruby source.
Definition ast.h:521
const uint8_t * end
A pointer to the end location of the token in the source.
Definition ast.h:529
const uint8_t * start
A pointer to the start location of the token in the source.
Definition ast.h:526
pm_token_type_t type
The type of the token.
Definition ast.h:523
UndefNode.
Definition ast.h:7574
UnlessNode.
Definition ast.h:7604
PM_NODE_ALIGNAS struct pm_statements_node * statements
UnlessNode::statements.
Definition ast.h:7653
PM_NODE_ALIGNAS struct pm_else_node * else_clause
UnlessNode::else_clause.
Definition ast.h:7663
WhenNode.
Definition ast.h:7738
PM_NODE_ALIGNAS struct pm_statements_node * statements
WhenNode::statements.
Definition ast.h:7760
XStringNode.
Definition ast.h:7827