Ruby 4.1.0dev (2026-04-19 revision 699c13eede8f49f4a0bd2ec3e4bcc6aea47ea8d5)
prism.c
4
5#include "prism/internal/allocator.h"
6#include "prism/internal/arena.h"
7#include "prism/internal/bit.h"
8#include "prism/internal/buffer.h"
9#include "prism/internal/char.h"
10#include "prism/internal/comments.h"
11#include "prism/internal/constant_pool.h"
12#include "prism/internal/diagnostic.h"
13#include "prism/internal/encoding.h"
14#include "prism/internal/integer.h"
15#include "prism/internal/isinf.h"
16#include "prism/internal/line_offset_list.h"
17#include "prism/internal/list.h"
18#include "prism/internal/magic_comments.h"
19#include "prism/internal/memchr.h"
20#include "prism/internal/node.h"
21#include "prism/internal/options.h"
22#include "prism/internal/parser.h"
23#include "prism/internal/regexp.h"
24#include "prism/internal/serialize.h"
25#include "prism/internal/source.h"
26#include "prism/internal/static_literals.h"
27#include "prism/internal/stringy.h"
28#include "prism/internal/strncasecmp.h"
29#include "prism/internal/strpbrk.h"
30#include "prism/internal/tokens.h"
31
32#include "prism/excludes.h"
33#include "prism/serialize.h"
34#include "prism/stream.h"
35#include "prism/version.h"
36
37#include <assert.h>
38#include <errno.h>
39#include <limits.h>
40#include <locale.h>
41#include <math.h>
42#include <stdio.h>
43#include <stdlib.h>
44
50#ifndef PRISM_DEPTH_MAXIMUM
51 #define PRISM_DEPTH_MAXIMUM 10000
52#endif
53
58#define PM_CONCATENATE(left, right) left ## right
59
65#if defined(_Static_assert)
66# define PM_STATIC_ASSERT(line, condition, message) _Static_assert(condition, message)
67#else
68# define PM_STATIC_ASSERT(line, condition, message) typedef char PM_CONCATENATE(static_assert_, line)[(condition) ? 1 : -1]
69#endif
70
75#if defined(__GNUC__) || defined(__clang__)
77 #define PRISM_LIKELY(x) __builtin_expect(!!(x), 1)
78
80 #define PRISM_UNLIKELY(x) __builtin_expect(!!(x), 0)
81#else
83 #define PRISM_LIKELY(x) (x)
84
86 #define PRISM_UNLIKELY(x) (x)
87#endif
88
92const char *
93pm_version(void) {
94 return PRISM_VERSION;
95}
96
101#define PM_TAB_WHITESPACE_SIZE 8
102
103// Macros for min/max.
104#define MIN(a,b) (((a)<(b))?(a):(b))
105#define MAX(a,b) (((a)>(b))?(a):(b))
106
107/******************************************************************************/
108/* Helpful AST-related macros */
109/******************************************************************************/
110
111#define U32(value_) ((uint32_t) (value_))
112
113#define FL PM_NODE_FLAGS
114#define UP PM_NODE_UPCAST
115
116#define PM_LOCATION_START(location_) ((location_)->start)
117#define PM_LOCATION_END(location_) ((location_)->start + (location_)->length)
118
119#define PM_TOKEN_START(parser_, token_) U32((token_)->start - (parser_)->start)
120#define PM_TOKEN_END(parser_, token_) U32((token_)->end - (parser_)->start)
121#define PM_TOKEN_LENGTH(token_) U32((token_)->end - (token_)->start)
122#define PM_TOKENS_LENGTH(left_, right_) U32((right_)->end - (left_)->start)
123
124#define PM_NODE_START(node_) (UP(node_)->location.start)
125#define PM_NODE_LENGTH(node_) (UP(node_)->location.length)
126#define PM_NODE_END(node_) (UP(node_)->location.start + UP(node_)->location.length)
127#define PM_NODES_LENGTH(left_, right_) (PM_NODE_END(right_) - PM_NODE_START(left_))
128
129#define PM_TOKEN_NODE_LENGTH(parser_, token_, node_) (PM_NODE_END(node_) - PM_TOKEN_START(parser_, token_))
130#define PM_NODE_TOKEN_LENGTH(parser_, node_, token_) (PM_TOKEN_END(parser_, token_) - PM_NODE_START(node_))
131
132#define PM_NODE_START_SET_NODE(left_, right_) (PM_NODE_START(left_) = PM_NODE_START(right_))
133#define PM_NODE_START_SET_TOKEN(parser_, node_, token_) (PM_NODE_START(node_) = PM_TOKEN_START(parser_, token_))
134#define PM_NODE_LENGTH_SET_NODE(left_, right_) (PM_NODE_LENGTH(left_) = PM_NODE_END(right_) - PM_NODE_START(left_))
135#define PM_NODE_LENGTH_SET_TOKEN(parser_, node_, token_) (PM_NODE_LENGTH(node_) = PM_TOKEN_END(parser_, token_) - PM_NODE_START(node_))
136#define PM_NODE_LENGTH_SET_LOCATION(node_, location_) (PM_NODE_LENGTH(node_) = PM_LOCATION_END(location_) - PM_NODE_START(node_))
137
138#define PM_LOCATION_INIT(start_, length_) ((pm_location_t) { .start = (start_), .length = (length_) })
139#define PM_LOCATION_INIT_UNSET PM_LOCATION_INIT(0, 0)
140#define PM_LOCATION_INIT_TOKEN(parser_, token_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_))
141#define PM_LOCATION_INIT_NODE(node_) UP(node_)->location
142
143#define PM_LOCATION_INIT_TOKENS(parser_, left_, right_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, left_), PM_TOKENS_LENGTH(left_, right_))
144#define PM_LOCATION_INIT_NODES(left_, right_) PM_LOCATION_INIT(PM_NODE_START(left_), PM_NODES_LENGTH(left_, right_))
145#define PM_LOCATION_INIT_TOKEN_NODE(parser_, token_, node_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, token_), PM_TOKEN_NODE_LENGTH(parser_, token_, node_))
146#define PM_LOCATION_INIT_NODE_TOKEN(parser_, node_, token_) PM_LOCATION_INIT(PM_NODE_START(node_), PM_NODE_TOKEN_LENGTH(parser_, node_, token_))
147
148#define TOK2LOC(parser_, token_) PM_LOCATION_INIT_TOKEN(parser_, token_)
149#define NTOK2LOC(parser_, token_) ((token_) == NULL ? PM_LOCATION_INIT_UNSET : TOK2LOC(parser_, token_))
150#define NTOK2PTR(token_) ((token_).start == NULL ? NULL : &(token_))
151
152/******************************************************************************/
153/* Lex mode manipulations */
154/******************************************************************************/
155
160static PRISM_INLINE uint8_t
161lex_mode_incrementor(const uint8_t start) {
162 switch (start) {
163 case '(':
164 case '[':
165 case '{':
166 case '<':
167 return start;
168 default:
169 return '\0';
170 }
171}
172
177static PRISM_INLINE uint8_t
178lex_mode_terminator(const uint8_t start) {
179 switch (start) {
180 case '(':
181 return ')';
182 case '[':
183 return ']';
184 case '{':
185 return '}';
186 case '<':
187 return '>';
188 default:
189 return start;
190 }
191}
192
198static bool
199lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
200 lex_mode.prev = parser->lex_modes.current;
201 parser->lex_modes.index++;
202
203 if (parser->lex_modes.index > PM_LEX_STACK_SIZE - 1) {
204 parser->lex_modes.current = (pm_lex_mode_t *) xmalloc(sizeof(pm_lex_mode_t));
205 if (parser->lex_modes.current == NULL) return false;
206
207 *parser->lex_modes.current = lex_mode;
208 } else {
209 parser->lex_modes.stack[parser->lex_modes.index] = lex_mode;
210 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
211 }
212
213 return true;
214}
215
219static PRISM_INLINE bool
220lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
221 uint8_t incrementor = lex_mode_incrementor(delimiter);
222 uint8_t terminator = lex_mode_terminator(delimiter);
223
224 pm_lex_mode_t lex_mode = {
225 .mode = PM_LEX_LIST,
226 .as.list = {
227 .nesting = 0,
228 .interpolation = interpolation,
229 .incrementor = incrementor,
230 .terminator = terminator
231 }
232 };
233
234 // These are the places where we need to split up the content of the list.
235 // We'll use strpbrk to find the first of these characters.
236 uint8_t *breakpoints = lex_mode.as.list.breakpoints;
237 memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE);
238 memcpy(breakpoints, "\\ \t\f\r\v\n", sizeof("\\ \t\f\r\v\n") - 1);
239 size_t index = 7;
240
241 // Now we'll add the terminator to the list of breakpoints. If the
242 // terminator is not already a NULL byte, add it to the list.
243 if (terminator != '\0') {
244 breakpoints[index++] = terminator;
245 }
246
247 // If interpolation is allowed, then we're going to check for the #
248 // character. Otherwise we'll only look for escapes and the terminator.
249 if (interpolation) {
250 breakpoints[index++] = '#';
251 }
252
253 // If there is an incrementor, then we'll check for that as well.
254 if (incrementor != '\0') {
255 breakpoints[index++] = incrementor;
256 }
257
258 parser->explicit_encoding = NULL;
259 return lex_mode_push(parser, lex_mode);
260}
261
267static PRISM_INLINE bool
268lex_mode_push_list_eof(pm_parser_t *parser) {
269 return lex_mode_push_list(parser, false, '\0');
270}
271
275static PRISM_INLINE bool
276lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
277 pm_lex_mode_t lex_mode = {
278 .mode = PM_LEX_REGEXP,
279 .as.regexp = {
280 .nesting = 0,
281 .incrementor = incrementor,
282 .terminator = terminator
283 }
284 };
285
286 // These are the places where we need to split up the content of the
287 // regular expression. We'll use strpbrk to find the first of these
288 // characters.
289 uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
290 memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE);
291 memcpy(breakpoints, "\r\n\\#", sizeof("\r\n\\#") - 1);
292 size_t index = 4;
293
294 // First we'll add the terminator.
295 if (terminator != '\0') {
296 breakpoints[index++] = terminator;
297 }
298
299 // Next, if there is an incrementor, then we'll check for that as well.
300 if (incrementor != '\0') {
301 breakpoints[index++] = incrementor;
302 }
303
304 parser->explicit_encoding = NULL;
305 return lex_mode_push(parser, lex_mode);
306}
307
311static PRISM_INLINE bool
312lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
313 pm_lex_mode_t lex_mode = {
314 .mode = PM_LEX_STRING,
315 .as.string = {
316 .nesting = 0,
317 .interpolation = interpolation,
318 .label_allowed = label_allowed,
319 .incrementor = incrementor,
320 .terminator = terminator
321 }
322 };
323
324 // These are the places where we need to split up the content of the
325 // string. We'll use strpbrk to find the first of these characters.
326 uint8_t *breakpoints = lex_mode.as.string.breakpoints;
327 memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE);
328 memcpy(breakpoints, "\r\n\\", sizeof("\r\n\\") - 1);
329 size_t index = 3;
330
331 // Now add in the terminator. If the terminator is not already a NULL byte,
332 // then we'll add it.
333 if (terminator != '\0') {
334 breakpoints[index++] = terminator;
335 }
336
337 // If interpolation is allowed, then we're going to check for the #
338 // character. Otherwise we'll only look for escapes and the terminator.
339 if (interpolation) {
340 breakpoints[index++] = '#';
341 }
342
343 // If we have an incrementor, then we'll add that in as a breakpoint as
344 // well.
345 if (incrementor != '\0') {
346 breakpoints[index++] = incrementor;
347 }
348
349 parser->explicit_encoding = NULL;
350 return lex_mode_push(parser, lex_mode);
351}
352
358static PRISM_INLINE bool
359lex_mode_push_string_eof(pm_parser_t *parser) {
360 return lex_mode_push_string(parser, false, false, '\0', '\0');
361}
362
368static void
369lex_mode_pop(pm_parser_t *parser) {
370 if (parser->lex_modes.index == 0) {
371 parser->lex_modes.current->mode = PM_LEX_DEFAULT;
372 } else if (parser->lex_modes.index < PM_LEX_STACK_SIZE) {
373 parser->lex_modes.index--;
374 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
375 } else {
376 parser->lex_modes.index--;
377 pm_lex_mode_t *prev = parser->lex_modes.current->prev;
378 xfree_sized(parser->lex_modes.current, sizeof(pm_lex_mode_t));
379 parser->lex_modes.current = prev;
380 }
381}
382
386static PRISM_INLINE bool
387lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
388 return parser->lex_state & state;
389}
390
391typedef enum {
392 PM_IGNORED_NEWLINE_NONE = 0,
393 PM_IGNORED_NEWLINE_ALL,
394 PM_IGNORED_NEWLINE_PATTERN
395} pm_ignored_newline_type_t;
396
397static PRISM_INLINE pm_ignored_newline_type_t
398lex_state_ignored_p(pm_parser_t *parser) {
399 bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
400
401 if (ignored) {
402 return PM_IGNORED_NEWLINE_ALL;
403 } else if ((parser->lex_state & ~((unsigned int) PM_LEX_STATE_LABEL)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) {
404 return PM_IGNORED_NEWLINE_PATTERN;
405 } else {
406 return PM_IGNORED_NEWLINE_NONE;
407 }
408}
409
410static PRISM_INLINE bool
411lex_state_beg_p(pm_parser_t *parser) {
412 return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
413}
414
415static PRISM_INLINE bool
416lex_state_arg_p(pm_parser_t *parser) {
417 return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
418}
419
420static PRISM_INLINE bool
421lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
422 if (parser->current.end >= parser->end) {
423 return false;
424 }
425 return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
426}
427
428static PRISM_INLINE bool
429lex_state_end_p(pm_parser_t *parser) {
430 return lex_state_p(parser, PM_LEX_STATE_END_ANY);
431}
432
436static PRISM_INLINE bool
437lex_state_operator_p(pm_parser_t *parser) {
438 return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
439}
440
445static PRISM_INLINE void
446lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
447 parser->lex_state = state;
448}
449
450#ifndef PM_DEBUG_LOGGING
455#define PM_DEBUG_LOGGING 0
456#endif
457
458#if PM_DEBUG_LOGGING
459PRISM_UNUSED static void
460debug_state(pm_parser_t *parser) {
461 fprintf(stderr, "STATE: ");
462 bool first = true;
463
464 if (parser->lex_state == PM_LEX_STATE_NONE) {
465 fprintf(stderr, "NONE\n");
466 return;
467 }
468
469#define CHECK_STATE(state) \
470 if (parser->lex_state & state) { \
471 if (!first) fprintf(stderr, "|"); \
472 fprintf(stderr, "%s", #state); \
473 first = false; \
474 }
475
476 CHECK_STATE(PM_LEX_STATE_BEG)
477 CHECK_STATE(PM_LEX_STATE_END)
478 CHECK_STATE(PM_LEX_STATE_ENDARG)
479 CHECK_STATE(PM_LEX_STATE_ENDFN)
480 CHECK_STATE(PM_LEX_STATE_ARG)
481 CHECK_STATE(PM_LEX_STATE_CMDARG)
482 CHECK_STATE(PM_LEX_STATE_MID)
483 CHECK_STATE(PM_LEX_STATE_FNAME)
484 CHECK_STATE(PM_LEX_STATE_DOT)
485 CHECK_STATE(PM_LEX_STATE_CLASS)
486 CHECK_STATE(PM_LEX_STATE_LABEL)
487 CHECK_STATE(PM_LEX_STATE_LABELED)
488 CHECK_STATE(PM_LEX_STATE_FITEM)
489
490#undef CHECK_STATE
491
492 fprintf(stderr, "\n");
493}
494
495static void
496debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * caller_name, int line_number) {
497 fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
498 debug_state(parser);
499 lex_state_set(parser, state);
500 fprintf(stderr, "Now: ");
501 debug_state(parser);
502 fprintf(stderr, "\n");
503}
504
505#define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
506#endif
507
508/******************************************************************************/
509/* Command-line macro helpers */
510/******************************************************************************/
511
513#define PM_PARSER_COMMAND_LINE_OPTION(parser, option) ((parser)->command_line & (option))
514
516#define PM_PARSER_COMMAND_LINE_OPTION_A(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_A)
517
519#define PM_PARSER_COMMAND_LINE_OPTION_E(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_E)
520
522#define PM_PARSER_COMMAND_LINE_OPTION_L(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_L)
523
525#define PM_PARSER_COMMAND_LINE_OPTION_N(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_N)
526
528#define PM_PARSER_COMMAND_LINE_OPTION_P(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_P)
529
531#define PM_PARSER_COMMAND_LINE_OPTION_X(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_X)
532
533/******************************************************************************/
534/* Diagnostic-related functions */
535/******************************************************************************/
536
540static PRISM_INLINE void
541pm_parser_err(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) {
542 pm_diagnostic_list_append(&parser->metadata_arena, &parser->error_list, start, length, diag_id);
543}
544
549static PRISM_INLINE void
550pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
551 pm_parser_err(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id);
552}
553
558static PRISM_INLINE void
559pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
560 pm_parser_err_token(parser, &parser->current, diag_id);
561}
562
567static PRISM_INLINE void
568pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
569 pm_parser_err_token(parser, &parser->previous, diag_id);
570}
571
576static PRISM_INLINE void
577pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
578 pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id);
579}
580
584#define PM_PARSER_ERR_FORMAT(parser_, start_, length_, diag_id_, ...) \
585 pm_diagnostic_list_append_format(&(parser_)->metadata_arena, &(parser_)->error_list, start_, length_, diag_id_, __VA_ARGS__)
586
591#define PM_PARSER_ERR_NODE_FORMAT(parser_, node_, diag_id_, ...) \
592 PM_PARSER_ERR_FORMAT(parser_, PM_NODE_START(node_), PM_NODE_LENGTH(node_), diag_id_, __VA_ARGS__)
593
598#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser_, node_, diag_id_) \
599 PM_PARSER_ERR_NODE_FORMAT(parser_, node_, diag_id_, (int) PM_NODE_LENGTH(node_), (const char *) (parser_->start + PM_NODE_START(node_)))
600
605#define PM_PARSER_ERR_TOKEN_FORMAT(parser_, token_, diag_id, ...) \
606 PM_PARSER_ERR_FORMAT(parser_, PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_), diag_id, __VA_ARGS__)
607
612#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser_, token_, diag_id_) \
613 PM_PARSER_ERR_TOKEN_FORMAT(parser_, token_, diag_id_, (int) PM_TOKEN_LENGTH(token_), (const char *) (token_)->start)
614
618static PRISM_INLINE void
619pm_parser_warn(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) {
620 pm_diagnostic_list_append(&parser->metadata_arena, &parser->warning_list, start, length, diag_id);
621}
622
627static PRISM_INLINE void
628pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
629 pm_parser_warn(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id);
630}
631
636static PRISM_INLINE void
637pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
638 pm_parser_warn(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id);
639}
640
645#define PM_PARSER_WARN_FORMAT(parser_, start_, length_, diag_id_, ...) \
646 pm_diagnostic_list_append_format(&(parser_)->metadata_arena, &(parser_)->warning_list, start_, length_, diag_id_, __VA_ARGS__)
647
652#define PM_PARSER_WARN_TOKEN_FORMAT(parser_, token_, diag_id_, ...) \
653 PM_PARSER_WARN_FORMAT(parser_, PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_), diag_id_, __VA_ARGS__)
654
659#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser_, token_, diag_id_) \
660 PM_PARSER_WARN_TOKEN_FORMAT(parser_, token_, diag_id_, (int) PM_TOKEN_LENGTH(token_), (const char *) (token_)->start)
661
666#define PM_PARSER_WARN_NODE_FORMAT(parser_, node_, diag_id_, ...) \
667 PM_PARSER_WARN_FORMAT(parser_, PM_NODE_START(node_), PM_NODE_LENGTH(node_), diag_id_, __VA_ARGS__)
668
674static void
675pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
676 PM_PARSER_ERR_FORMAT(
677 parser,
678 U32(ident_start - parser->start),
679 U32(ident_length),
680 PM_ERR_HEREDOC_TERM,
681 (int) ident_length,
682 (const char *) ident_start
683 );
684}
685
686/******************************************************************************/
687/* Scope-related functions */
688/******************************************************************************/
689
693static bool
694pm_parser_scope_push(pm_parser_t *parser, bool closed) {
695 pm_scope_t *scope = (pm_scope_t *) xmalloc(sizeof(pm_scope_t));
696 if (scope == NULL) return false;
697
698 *scope = (pm_scope_t) {
699 .previous = parser->current_scope,
700 .locals = { 0 },
701 .parameters = PM_SCOPE_PARAMETERS_NONE,
702 .implicit_parameters = { 0 },
703 .shareable_constant = parser->current_scope == NULL ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
704 .closed = closed
705 };
706
707 parser->current_scope = scope;
708 return true;
709}
710
715static bool
716pm_parser_scope_toplevel_p(pm_parser_t *parser) {
717 pm_scope_t *scope = parser->current_scope;
718
719 do {
720 if (scope->previous == NULL) return true;
721 if (scope->closed) return false;
722 } while ((scope = scope->previous) != NULL);
723
724 assert(false && "unreachable");
725 return true;
726}
727
731static pm_scope_t *
732pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
733 pm_scope_t *scope = parser->current_scope;
734
735 while (depth-- > 0) {
736 assert(scope != NULL);
737 scope = scope->previous;
738 }
739
740 return scope;
741}
742
743typedef enum {
744 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
745 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
746 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
747} pm_scope_forwarding_param_check_result_t;
748
749static pm_scope_forwarding_param_check_result_t
750pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
751 pm_scope_t *scope = parser->current_scope;
752 bool conflict = false;
753
754 while (scope != NULL) {
755 if (scope->parameters & mask) {
756 if (scope->closed) {
757 if (conflict) {
758 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
759 } else {
760 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
761 }
762 }
763
764 conflict = true;
765 }
766
767 if (scope->closed) break;
768 scope = scope->previous;
769 }
770
771 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
772}
773
774static void
775pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
776 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
777 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
778 // Pass.
779 break;
780 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
781 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
782 break;
783 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
784 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
785 break;
786 }
787}
788
789static void
790pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
791 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
792 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
793 // Pass.
794 break;
795 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
796 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
797 break;
798 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
799 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
800 break;
801 }
802}
803
804static void
805pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
806 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
807 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
808 // Pass.
809 break;
810 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
811 // This shouldn't happen, because ... is not allowed in the
812 // declaration of blocks. If we get here, we assume we already have
813 // an error for this.
814 break;
815 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
816 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
817 break;
818 }
819}
820
821static void
822pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
823 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
824 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
825 // Pass.
826 break;
827 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
828 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
829 break;
830 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
831 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
832 break;
833 }
834}
835
839static PRISM_INLINE pm_shareable_constant_value_t
840pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
841 return parser->current_scope->shareable_constant;
842}
843
848static void
849pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constant_value_t shareable_constant) {
850 pm_scope_t *scope = parser->current_scope;
851
852 do {
853 scope->shareable_constant = shareable_constant;
854 } while (!scope->closed && (scope = scope->previous) != NULL);
855}
856
857/******************************************************************************/
858/* Local variable-related functions */
859/******************************************************************************/
860
864#define PM_LOCALS_HASH_THRESHOLD 5
865
866static void
867pm_locals_free(pm_locals_t *locals) {
868 if (locals->capacity > 0) {
869 xfree_sized(locals->locals, locals->capacity * sizeof(pm_local_t));
870 }
871}
872
877static uint32_t
878pm_locals_hash(pm_constant_id_t name) {
879 name = ((name >> 16) ^ name) * 0x45d9f3b;
880 name = ((name >> 16) ^ name) * 0x45d9f3b;
881 name = (name >> 16) ^ name;
882 return name;
883}
884
889static void
890pm_locals_resize(pm_locals_t *locals) {
891 uint32_t next_capacity = locals->capacity == 0 ? 4 : (locals->capacity * 2);
892 assert(next_capacity > locals->capacity);
893
894 pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
895 if (next_locals == NULL) abort();
896
897 if (next_capacity < PM_LOCALS_HASH_THRESHOLD) {
898 if (locals->size > 0) {
899 memcpy(next_locals, locals->locals, locals->size * sizeof(pm_local_t));
900 }
901 } else {
902 // If we just switched from a list to a hash, then we need to fill in
903 // the hash values of all of the locals.
904 bool hash_needed = (locals->capacity <= PM_LOCALS_HASH_THRESHOLD);
905 uint32_t mask = next_capacity - 1;
906
907 for (uint32_t index = 0; index < locals->capacity; index++) {
908 pm_local_t *local = &locals->locals[index];
909
910 if (local->name != PM_CONSTANT_ID_UNSET) {
911 if (hash_needed) local->hash = pm_locals_hash(local->name);
912
913 uint32_t hash = local->hash;
914 while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
915 next_locals[hash & mask] = *local;
916 }
917 }
918 }
919
920 pm_locals_free(locals);
921 locals->locals = next_locals;
922 locals->capacity = next_capacity;
923}
924
940static bool
941pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, uint32_t start, uint32_t length, uint32_t reads) {
942 if (locals->size >= (locals->capacity / 4 * 3)) {
943 pm_locals_resize(locals);
944 }
945
946 locals->bloom |= (1u << (name & 31));
947
948 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
949 for (uint32_t index = 0; index < locals->capacity; index++) {
950 pm_local_t *local = &locals->locals[index];
951
952 if (local->name == PM_CONSTANT_ID_UNSET) {
953 *local = (pm_local_t) {
954 .name = name,
955 .location = { .start = start, .length = length },
956 .index = locals->size++,
957 .reads = reads,
958 .hash = 0
959 };
960 return true;
961 } else if (local->name == name) {
962 return false;
963 }
964 }
965 } else {
966 uint32_t mask = locals->capacity - 1;
967 uint32_t hash = pm_locals_hash(name);
968 uint32_t initial_hash = hash;
969
970 do {
971 pm_local_t *local = &locals->locals[hash & mask];
972
973 if (local->name == PM_CONSTANT_ID_UNSET) {
974 *local = (pm_local_t) {
975 .name = name,
976 .location = { .start = start, .length = length },
977 .index = locals->size++,
978 .reads = reads,
979 .hash = initial_hash
980 };
981 return true;
982 } else if (local->name == name) {
983 return false;
984 } else {
985 hash++;
986 }
987 } while ((hash & mask) != initial_hash);
988 }
989
990 assert(false && "unreachable");
991 return true;
992}
993
998static uint32_t
999pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
1000 if (!(locals->bloom & (1u << (name & 31)))) return UINT32_MAX;
1001
1002 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
1003 for (uint32_t index = 0; index < locals->size; index++) {
1004 pm_local_t *local = &locals->locals[index];
1005 if (local->name == name) return index;
1006 }
1007 } else {
1008 uint32_t mask = locals->capacity - 1;
1009 uint32_t hash = pm_locals_hash(name);
1010 uint32_t initial_hash = hash & mask;
1011
1012 do {
1013 pm_local_t *local = &locals->locals[hash & mask];
1014
1015 if (local->name == PM_CONSTANT_ID_UNSET) {
1016 return UINT32_MAX;
1017 } else if (local->name == name) {
1018 return hash & mask;
1019 } else {
1020 hash++;
1021 }
1022 } while ((hash & mask) != initial_hash);
1023 }
1024
1025 return UINT32_MAX;
1026}
1027
1032static void
1033pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
1034 uint32_t index = pm_locals_find(locals, name);
1035 assert(index != UINT32_MAX);
1036
1037 pm_local_t *local = &locals->locals[index];
1038 assert(local->reads < UINT32_MAX);
1039
1040 local->reads++;
1041}
1042
1047static void
1048pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
1049 uint32_t index = pm_locals_find(locals, name);
1050 assert(index != UINT32_MAX);
1051
1052 pm_local_t *local = &locals->locals[index];
1053 assert(local->reads > 0);
1054
1055 local->reads--;
1056}
1057
1061static uint32_t
1062pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
1063 uint32_t index = pm_locals_find(locals, name);
1064 assert(index != UINT32_MAX);
1065
1066 return locals->locals[index].reads;
1067}
1068
1077static void
1078pm_locals_order(pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
1079 pm_constant_id_list_init_capacity(parser->arena, list, locals->size);
1080
1081 // If we're still below the threshold for switching to a hash, then we only
1082 // need to loop over the locals until we hit the size because the locals are
1083 // stored in a list.
1084 uint32_t capacity = locals->capacity < PM_LOCALS_HASH_THRESHOLD ? locals->size : locals->capacity;
1085
1086 // We will only warn for unused variables if we're not at the top level, or
1087 // if we're parsing a file outside of eval or -e.
1088 bool warn_unused = !toplevel || (!parser->parsing_eval && !PM_PARSER_COMMAND_LINE_OPTION_E(parser));
1089
1090 for (uint32_t index = 0; index < capacity; index++) {
1091 pm_local_t *local = &locals->locals[index];
1092
1093 if (local->name != PM_CONSTANT_ID_UNSET) {
1094 pm_constant_id_list_insert(list, (size_t) local->index, local->name);
1095
1096 if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_line_offset_list_line(&parser->line_offsets, local->location.start, parser->start_line) >= 0))) {
1097 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
1098
1099 if (constant->length >= 1 && *constant->start != '_') {
1100 PM_PARSER_WARN_FORMAT(
1101 parser,
1102 local->location.start,
1103 local->location.length,
1104 PM_WARN_UNUSED_LOCAL_VARIABLE,
1105 (int) constant->length,
1106 (const char *) constant->start
1107 );
1108 }
1109 }
1110 }
1111 }
1112}
1113
1114/******************************************************************************/
1115/* Node-related functions */
1116/******************************************************************************/
1117
1122pm_parser_constant_id_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
1123 /* Fast path: if this is the same token as the last lookup (same pointer
1124 * range), return the cached result. */
1125 if (start == parser->constant_cache.start && end == parser->constant_cache.end) {
1126 return parser->constant_cache.id;
1127 }
1128
1129 pm_constant_id_t id = pm_constant_pool_insert_shared(&parser->metadata_arena, &parser->constant_pool, start, (size_t) (end - start));
1130
1131 parser->constant_cache.start = start;
1132 parser->constant_cache.end = end;
1133 parser->constant_cache.id = id;
1134
1135 return id;
1136}
1137
1142pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
1143 return pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, start, length);
1144}
1145
1150pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
1151 return pm_constant_pool_insert_constant(&parser->metadata_arena, &parser->constant_pool, (const uint8_t *) start, length);
1152}
1153
1158pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1159 return pm_parser_constant_id_raw(parser, token->start, token->end);
1160}
1161
1166#define PM_CASE_VOID_VALUE PM_RETURN_NODE: case PM_BREAK_NODE: case PM_NEXT_NODE: \
1167 case PM_REDO_NODE: case PM_RETRY_NODE: case PM_MATCH_REQUIRED_NODE
1168
1174static pm_node_t *
1175pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
1176 pm_node_t *void_node = NULL;
1177
1178 while (node != NULL) {
1179 switch (PM_NODE_TYPE(node)) {
1180 case PM_CASE_VOID_VALUE:
1181 return void_node != NULL ? void_node : node;
1182 case PM_MATCH_PREDICATE_NODE:
1183 return NULL;
1184 case PM_BEGIN_NODE: {
1185 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1186
1187 if (cast->ensure_clause != NULL) {
1188 if (cast->rescue_clause != NULL) {
1189 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->rescue_clause));
1190 if (vn != NULL) return vn;
1191 }
1192
1193 if (cast->statements != NULL) {
1194 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1195 if (vn != NULL) return vn;
1196 }
1197
1198 node = UP(cast->ensure_clause);
1199 } else if (cast->rescue_clause != NULL) {
1200 // https://bugs.ruby-lang.org/issues/21669
1201 if (cast->else_clause == NULL || parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
1202 if (cast->statements == NULL) return NULL;
1203
1204 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1205 if (vn == NULL) return NULL;
1206 if (void_node == NULL) void_node = vn;
1207 }
1208
1209 for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
1210 pm_node_t *vn = pm_check_value_expression(parser, UP(rescue_clause->statements));
1211
1212 if (vn == NULL) {
1213 // https://bugs.ruby-lang.org/issues/21669
1214 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
1215 return NULL;
1216 }
1217 void_node = NULL;
1218 break;
1219 }
1220 }
1221
1222 if (cast->else_clause != NULL) {
1223 node = UP(cast->else_clause);
1224
1225 // https://bugs.ruby-lang.org/issues/21669
1226 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
1227 pm_node_t *vn = pm_check_value_expression(parser, node);
1228 if (vn != NULL) return vn;
1229 }
1230 } else {
1231 return void_node;
1232 }
1233 } else {
1234 node = UP(cast->statements);
1235 }
1236
1237 break;
1238 }
1239 case PM_CASE_NODE: {
1240 // https://bugs.ruby-lang.org/issues/21669
1241 if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
1242 return NULL;
1243 }
1244
1245 pm_case_node_t *cast = (pm_case_node_t *) node;
1246 if (cast->else_clause == NULL) return NULL;
1247
1248 pm_node_t *condition;
1249 PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
1250 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
1251
1252 pm_when_node_t *cast = (pm_when_node_t *) condition;
1253 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1254 if (vn == NULL) return NULL;
1255 if (void_node == NULL) void_node = vn;
1256 }
1257
1258 node = UP(cast->else_clause);
1259 break;
1260 }
1261 case PM_CASE_MATCH_NODE: {
1262 // https://bugs.ruby-lang.org/issues/21669
1263 if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
1264 return NULL;
1265 }
1266
1268 if (cast->else_clause == NULL) return NULL;
1269
1270 pm_node_t *condition;
1271 PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
1272 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
1273
1274 pm_in_node_t *cast = (pm_in_node_t *) condition;
1275 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1276 if (vn == NULL) return NULL;
1277 if (void_node == NULL) void_node = vn;
1278 }
1279
1280 node = UP(cast->else_clause);
1281 break;
1282 }
1283 case PM_ENSURE_NODE: {
1284 pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
1285 node = UP(cast->statements);
1286 break;
1287 }
1288 case PM_PARENTHESES_NODE: {
1290 node = UP(cast->body);
1291 break;
1292 }
1293 case PM_STATEMENTS_NODE: {
1295
1296 // https://bugs.ruby-lang.org/issues/21669
1297 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
1298 pm_node_t *body_part;
1299 PM_NODE_LIST_FOREACH(&cast->body, index, body_part) {
1300 switch (PM_NODE_TYPE(body_part)) {
1301 case PM_CASE_VOID_VALUE:
1302 if (void_node == NULL) {
1303 void_node = body_part;
1304 }
1305 return void_node;
1306 default: break;
1307 }
1308 }
1309 }
1310
1311 node = cast->body.nodes[cast->body.size - 1];
1312 break;
1313 }
1314 case PM_IF_NODE: {
1315 pm_if_node_t *cast = (pm_if_node_t *) node;
1316 if (cast->statements == NULL || cast->subsequent == NULL) {
1317 return NULL;
1318 }
1319 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1320 if (vn == NULL) {
1321 return NULL;
1322 }
1323 if (void_node == NULL) {
1324 void_node = vn;
1325 }
1326 node = cast->subsequent;
1327 break;
1328 }
1329 case PM_UNLESS_NODE: {
1330 pm_unless_node_t *cast = (pm_unless_node_t *) node;
1331 if (cast->statements == NULL || cast->else_clause == NULL) {
1332 return NULL;
1333 }
1334 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1335 if (vn == NULL) {
1336 return NULL;
1337 }
1338 if (void_node == NULL) {
1339 void_node = vn;
1340 }
1341 node = UP(cast->else_clause);
1342 break;
1343 }
1344 case PM_ELSE_NODE: {
1345 pm_else_node_t *cast = (pm_else_node_t *) node;
1346 node = UP(cast->statements);
1347 break;
1348 }
1349 case PM_AND_NODE: {
1350 pm_and_node_t *cast = (pm_and_node_t *) node;
1351 node = cast->left;
1352 break;
1353 }
1354 case PM_OR_NODE: {
1355 pm_or_node_t *cast = (pm_or_node_t *) node;
1356 node = cast->left;
1357 break;
1358 }
1359 case PM_LOCAL_VARIABLE_WRITE_NODE: {
1361
1362 pm_scope_t *scope = parser->current_scope;
1363 for (uint32_t depth = 0; depth < cast->depth; depth++) scope = scope->previous;
1364
1365 pm_locals_read(&scope->locals, cast->name);
1366 return NULL;
1367 }
1368 default:
1369 return NULL;
1370 }
1371 }
1372
1373 return NULL;
1374}
1375
1376static PRISM_INLINE void
1377pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
1378 pm_node_t *void_node = pm_check_value_expression(parser, node);
1379 if (void_node != NULL) {
1380 pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
1381 }
1382}
1383
1387static void
1388pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
1389 const char *type = NULL;
1390 int length = 0;
1391
1392 switch (PM_NODE_TYPE(node)) {
1393 case PM_BACK_REFERENCE_READ_NODE:
1394 case PM_CLASS_VARIABLE_READ_NODE:
1395 case PM_GLOBAL_VARIABLE_READ_NODE:
1396 case PM_INSTANCE_VARIABLE_READ_NODE:
1397 case PM_LOCAL_VARIABLE_READ_NODE:
1398 case PM_NUMBERED_REFERENCE_READ_NODE:
1399 type = "a variable";
1400 length = 10;
1401 break;
1402 case PM_CALL_NODE: {
1403 const pm_call_node_t *cast = (const pm_call_node_t *) node;
1404 if (cast->call_operator_loc.length > 0 || cast->message_loc.length == 0) break;
1405
1406 const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
1407 switch (message->length) {
1408 case 1:
1409 switch (message->start[0]) {
1410 case '+':
1411 case '-':
1412 case '*':
1413 case '/':
1414 case '%':
1415 case '|':
1416 case '^':
1417 case '&':
1418 case '>':
1419 case '<':
1420 type = (const char *) message->start;
1421 length = 1;
1422 break;
1423 }
1424 break;
1425 case 2:
1426 switch (message->start[1]) {
1427 case '=':
1428 if (message->start[0] == '<' || message->start[0] == '>' || message->start[0] == '!' || message->start[0] == '=') {
1429 type = (const char *) message->start;
1430 length = 2;
1431 }
1432 break;
1433 case '@':
1434 if (message->start[0] == '+' || message->start[0] == '-') {
1435 type = (const char *) message->start;
1436 length = 2;
1437 }
1438 break;
1439 case '*':
1440 if (message->start[0] == '*') {
1441 type = (const char *) message->start;
1442 length = 2;
1443 }
1444 break;
1445 }
1446 break;
1447 case 3:
1448 if (memcmp(message->start, "<=>", 3) == 0) {
1449 type = "<=>";
1450 length = 3;
1451 }
1452 break;
1453 }
1454
1455 break;
1456 }
1457 case PM_CONSTANT_PATH_NODE:
1458 type = "::";
1459 length = 2;
1460 break;
1461 case PM_CONSTANT_READ_NODE:
1462 type = "a constant";
1463 length = 10;
1464 break;
1465 case PM_DEFINED_NODE:
1466 type = "defined?";
1467 length = 8;
1468 break;
1469 case PM_FALSE_NODE:
1470 type = "false";
1471 length = 5;
1472 break;
1473 case PM_FLOAT_NODE:
1474 case PM_IMAGINARY_NODE:
1475 case PM_INTEGER_NODE:
1476 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1477 case PM_INTERPOLATED_STRING_NODE:
1478 case PM_RATIONAL_NODE:
1479 case PM_REGULAR_EXPRESSION_NODE:
1480 case PM_SOURCE_ENCODING_NODE:
1481 case PM_SOURCE_FILE_NODE:
1482 case PM_SOURCE_LINE_NODE:
1483 case PM_STRING_NODE:
1484 case PM_SYMBOL_NODE:
1485 type = "a literal";
1486 length = 9;
1487 break;
1488 case PM_NIL_NODE:
1489 type = "nil";
1490 length = 3;
1491 break;
1492 case PM_RANGE_NODE: {
1493 const pm_range_node_t *cast = (const pm_range_node_t *) node;
1494
1495 if (PM_NODE_FLAG_P(cast, PM_RANGE_FLAGS_EXCLUDE_END)) {
1496 type = "...";
1497 length = 3;
1498 } else {
1499 type = "..";
1500 length = 2;
1501 }
1502
1503 break;
1504 }
1505 case PM_SELF_NODE:
1506 type = "self";
1507 length = 4;
1508 break;
1509 case PM_TRUE_NODE:
1510 type = "true";
1511 length = 4;
1512 break;
1513 default:
1514 break;
1515 }
1516
1517 if (type != NULL) {
1518 PM_PARSER_WARN_NODE_FORMAT(parser, node, PM_WARN_VOID_STATEMENT, length, type);
1519 }
1520}
1521
1526static void
1527pm_void_statements_check(pm_parser_t *parser, const pm_statements_node_t *node, bool last_value) {
1528 assert(node->body.size > 0);
1529 const size_t size = node->body.size - (last_value ? 1 : 0);
1530 for (size_t index = 0; index < size; index++) {
1531 pm_void_statement_check(parser, node->body.nodes[index]);
1532 }
1533}
1534
1540typedef enum {
1541 PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL,
1542 PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP,
1543 PM_CONDITIONAL_PREDICATE_TYPE_NOT
1544} pm_conditional_predicate_type_t;
1545
1549static void
1550pm_parser_warn_conditional_predicate_literal(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type, pm_diagnostic_id_t diag_id, const char *prefix) {
1551 switch (type) {
1552 case PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL:
1553 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "condition");
1554 break;
1555 case PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP:
1556 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "flip-flop");
1557 break;
1558 case PM_CONDITIONAL_PREDICATE_TYPE_NOT:
1559 break;
1560 }
1561}
1562
1567static bool
1568pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1569 switch (PM_NODE_TYPE(node)) {
1570 case PM_ARRAY_NODE: {
1571 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1572
1573 const pm_array_node_t *cast = (const pm_array_node_t *) node;
1574 for (size_t index = 0; index < cast->elements.size; index++) {
1575 if (!pm_conditional_predicate_warn_write_literal_p(cast->elements.nodes[index])) return false;
1576 }
1577
1578 return true;
1579 }
1580 case PM_HASH_NODE: {
1581 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1582
1583 const pm_hash_node_t *cast = (const pm_hash_node_t *) node;
1584 for (size_t index = 0; index < cast->elements.size; index++) {
1585 const pm_node_t *element = cast->elements.nodes[index];
1586 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) return false;
1587
1588 const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element;
1589 if (!pm_conditional_predicate_warn_write_literal_p(assoc->key) || !pm_conditional_predicate_warn_write_literal_p(assoc->value)) return false;
1590 }
1591
1592 return true;
1593 }
1594 case PM_FALSE_NODE:
1595 case PM_FLOAT_NODE:
1596 case PM_IMAGINARY_NODE:
1597 case PM_INTEGER_NODE:
1598 case PM_NIL_NODE:
1599 case PM_RATIONAL_NODE:
1600 case PM_REGULAR_EXPRESSION_NODE:
1601 case PM_SOURCE_ENCODING_NODE:
1602 case PM_SOURCE_FILE_NODE:
1603 case PM_SOURCE_LINE_NODE:
1604 case PM_STRING_NODE:
1605 case PM_SYMBOL_NODE:
1606 case PM_TRUE_NODE:
1607 return true;
1608 default:
1609 return false;
1610 }
1611}
1612
1617static PRISM_INLINE void
1618pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1619 if (pm_conditional_predicate_warn_write_literal_p(node)) {
1620 pm_parser_warn_node(parser, node, parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1621 }
1622}
1623
1636static void
1637pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type) {
1638 switch (PM_NODE_TYPE(node)) {
1639 case PM_AND_NODE: {
1640 pm_and_node_t *cast = (pm_and_node_t *) node;
1641 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1642 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1643 break;
1644 }
1645 case PM_OR_NODE: {
1646 pm_or_node_t *cast = (pm_or_node_t *) node;
1647 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1648 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1649 break;
1650 }
1651 case PM_PARENTHESES_NODE: {
1653
1654 if ((cast->body != NULL) && PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE)) {
1655 pm_statements_node_t *statements = (pm_statements_node_t *) cast->body;
1656 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1657 }
1658
1659 break;
1660 }
1661 case PM_BEGIN_NODE: {
1662 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1663 if (cast->statements != NULL) {
1664 pm_statements_node_t *statements = cast->statements;
1665 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1666 }
1667 break;
1668 }
1669 case PM_RANGE_NODE: {
1670 pm_range_node_t *cast = (pm_range_node_t *) node;
1671
1672 if (cast->left != NULL) pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1673 if (cast->right != NULL) pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1674
1675 // Here we change the range node into a flip flop node. We can do
1676 // this since the nodes are exactly the same except for the type.
1677 // We're only asserting against the size when we should probably
1678 // assert against the entire layout, but we'll assume tests will
1679 // catch this.
1680 assert(sizeof(pm_range_node_t) == sizeof(pm_flip_flop_node_t));
1681 node->type = PM_FLIP_FLOP_NODE;
1682
1683 break;
1684 }
1685 case PM_REGULAR_EXPRESSION_NODE:
1686 // Here we change the regular expression node into a match last line
1687 // node. We can do this since the nodes are exactly the same except
1688 // for the type.
1690 node->type = PM_MATCH_LAST_LINE_NODE;
1691
1692 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1693 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "regex ");
1694 }
1695
1696 break;
1697 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1698 // Here we change the interpolated regular expression node into an
1699 // interpolated match last line node. We can do this since the nodes
1700 // are exactly the same except for the type.
1702 node->type = PM_INTERPOLATED_MATCH_LAST_LINE_NODE;
1703
1704 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1705 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "regex ");
1706 }
1707
1708 break;
1709 case PM_INTEGER_NODE:
1710 if (type == PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP) {
1711 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1712 pm_parser_warn_node(parser, node, PM_WARN_INTEGER_IN_FLIP_FLOP);
1713 }
1714 } else {
1715 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1716 }
1717 break;
1718 case PM_STRING_NODE:
1719 case PM_SOURCE_FILE_NODE:
1720 case PM_INTERPOLATED_STRING_NODE:
1721 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "string ");
1722 break;
1723 case PM_SYMBOL_NODE:
1724 case PM_INTERPOLATED_SYMBOL_NODE:
1725 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "symbol ");
1726 break;
1727 case PM_SOURCE_LINE_NODE:
1728 case PM_SOURCE_ENCODING_NODE:
1729 case PM_FLOAT_NODE:
1730 case PM_RATIONAL_NODE:
1731 case PM_IMAGINARY_NODE:
1732 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1733 break;
1734 case PM_CLASS_VARIABLE_WRITE_NODE:
1735 pm_conditional_predicate_warn_write_literal(parser, ((pm_class_variable_write_node_t *) node)->value);
1736 break;
1737 case PM_CONSTANT_WRITE_NODE:
1738 pm_conditional_predicate_warn_write_literal(parser, ((pm_constant_write_node_t *) node)->value);
1739 break;
1740 case PM_GLOBAL_VARIABLE_WRITE_NODE:
1741 pm_conditional_predicate_warn_write_literal(parser, ((pm_global_variable_write_node_t *) node)->value);
1742 break;
1743 case PM_INSTANCE_VARIABLE_WRITE_NODE:
1744 pm_conditional_predicate_warn_write_literal(parser, ((pm_instance_variable_write_node_t *) node)->value);
1745 break;
1746 case PM_LOCAL_VARIABLE_WRITE_NODE:
1747 pm_conditional_predicate_warn_write_literal(parser, ((pm_local_variable_write_node_t *) node)->value);
1748 break;
1749 case PM_MULTI_WRITE_NODE:
1750 pm_conditional_predicate_warn_write_literal(parser, ((pm_multi_write_node_t *) node)->value);
1751 break;
1752 default:
1753 break;
1754 }
1755}
1756
1779
1783static PRISM_INLINE const pm_location_t *
1784pm_arguments_end(pm_arguments_t *arguments) {
1785 if (arguments->block != NULL) {
1786 uint32_t end = PM_NODE_END(arguments->block);
1787
1788 if (arguments->closing_loc.length > 0) {
1789 uint32_t arguments_end = PM_LOCATION_END(&arguments->closing_loc);
1790 if (arguments_end > end) {
1791 return &arguments->closing_loc;
1792 }
1793 }
1794 return &arguments->block->location;
1795 }
1796 if (arguments->closing_loc.length > 0) {
1797 return &arguments->closing_loc;
1798 }
1799 if (arguments->arguments != NULL) {
1800 return &arguments->arguments->base.location;
1801 }
1802 if (arguments->opening_loc.length > 0) {
1803 return &arguments->opening_loc;
1804 }
1805 return NULL;
1806}
1807
1812static void
1813pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
1814 // First, check that we have arguments and that we don't have a closing
1815 // location for them.
1816 if (arguments->arguments == NULL || arguments->closing_loc.length > 0) {
1817 return;
1818 }
1819
1820 // Next, check that we don't have a single parentheses argument. This would
1821 // look like:
1822 //
1823 // foo (1) {}
1824 //
1825 // In this case, it's actually okay for the block to be attached to the
1826 // call, even though it looks like it's attached to the argument.
1827 if (arguments->arguments->arguments.size == 1 && PM_NODE_TYPE_P(arguments->arguments->arguments.nodes[0], PM_PARENTHESES_NODE)) {
1828 return;
1829 }
1830
1831 // If we didn't hit a case before this check, then at this point we need to
1832 // add a syntax error.
1833 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
1834}
1835
1836/******************************************************************************/
1837/* Basic character checks */
1838/******************************************************************************/
1839
1846static PRISM_INLINE size_t
1847char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1848 if (n <= 0) return 0;
1849
1850 if (parser->encoding_changed) {
1851 size_t width;
1852
1853 if ((width = parser->encoding->alpha_char(b, n)) != 0) {
1854 return width;
1855 } else if (*b == '_') {
1856 return 1;
1857 } else if (*b >= 0x80) {
1858 return parser->encoding->char_width(b, n);
1859 } else {
1860 return 0;
1861 }
1862 } else if (*b < 0x80) {
1863 return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
1864 } else {
1865 return pm_encoding_utf_8_char_width(b, n);
1866 }
1867}
1868
1873static PRISM_INLINE size_t
1874char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
1875 if (n <= 0) {
1876 return 0;
1877 } else if (*b < 0x80) {
1878 return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
1879 } else {
1880 return pm_encoding_utf_8_char_width(b, n);
1881 }
1882}
1883
1897#if defined(PRISM_HAS_NEON)
1898#include <arm_neon.h>
1899
1900static PRISM_INLINE size_t
1901scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
1902 const uint8_t *cursor = start;
1903
1904 // Nibble-based lookup tables for classifying [a-zA-Z0-9_].
1905 // Each high nibble is assigned a unique bit; the low nibble table
1906 // contains the OR of bits for all high nibbles that have an
1907 // identifier character at that low nibble position. A byte is an
1908 // identifier character iff (low_lut[lo] & high_lut[hi]) != 0.
1909 static const uint8_t low_lut_data[16] = {
1910 0x15, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F,
1911 0x1F, 0x1F, 0x1E, 0x0A, 0x0A, 0x0A, 0x0A, 0x0E
1912 };
1913 static const uint8_t high_lut_data[16] = {
1914 0x00, 0x00, 0x00, 0x01, 0x02, 0x04, 0x08, 0x10,
1915 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1916 };
1917 const uint8x16_t low_lut = vld1q_u8(low_lut_data);
1918 const uint8x16_t high_lut = vld1q_u8(high_lut_data);
1919 const uint8x16_t mask_0f = vdupq_n_u8(0x0F);
1920
1921 while (cursor + 16 <= end) {
1922 uint8x16_t v = vld1q_u8(cursor);
1923
1924 uint8x16_t lo_class = vqtbl1q_u8(low_lut, vandq_u8(v, mask_0f));
1925 uint8x16_t hi_class = vqtbl1q_u8(high_lut, vshrq_n_u8(v, 4));
1926 uint8x16_t ident = vandq_u8(lo_class, hi_class);
1927
1928 // Fast check: if the per-byte minimum is nonzero, every byte matched.
1929 if (vminvq_u8(ident) != 0) {
1930 cursor += 16;
1931 continue;
1932 }
1933
1934 // Find the first non-identifier byte (zero in ident).
1935 uint8x16_t is_zero = vceqq_u8(ident, vdupq_n_u8(0));
1936 uint64_t lo = vgetq_lane_u64(vreinterpretq_u64_u8(is_zero), 0);
1937
1938 if (lo != 0) {
1939 cursor += pm_ctzll(lo) / 8;
1940 } else {
1941 uint64_t hi = vgetq_lane_u64(vreinterpretq_u64_u8(is_zero), 1);
1942 cursor += 8 + pm_ctzll(hi) / 8;
1943 }
1944
1945 return (size_t) (cursor - start);
1946 }
1947
1948 return (size_t) (cursor - start);
1949}
1950
1951#elif defined(PRISM_HAS_SSSE3)
1952#include <tmmintrin.h>
1953
1954static PRISM_INLINE size_t
1955scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
1956 const uint8_t *cursor = start;
1957
1958 while (cursor + 16 <= end) {
1959 __m128i v = _mm_loadu_si128((const __m128i *) cursor);
1960 __m128i zero = _mm_setzero_si128();
1961
1962 // Unsigned range check via saturating subtraction:
1963 // byte >= lo ⟺ saturate(lo - byte) == 0
1964 // byte <= hi ⟺ saturate(byte - hi) == 0
1965
1966 // Fold case: OR with 0x20 maps A-Z to a-z.
1967 __m128i lowered = _mm_or_si128(v, _mm_set1_epi8(0x20));
1968 __m128i letter = _mm_and_si128(
1969 _mm_cmpeq_epi8(_mm_subs_epu8(_mm_set1_epi8(0x61), lowered), zero),
1970 _mm_cmpeq_epi8(_mm_subs_epu8(lowered, _mm_set1_epi8(0x7A)), zero));
1971
1972 __m128i digit = _mm_and_si128(
1973 _mm_cmpeq_epi8(_mm_subs_epu8(_mm_set1_epi8(0x30), v), zero),
1974 _mm_cmpeq_epi8(_mm_subs_epu8(v, _mm_set1_epi8(0x39)), zero));
1975
1976 __m128i underscore = _mm_cmpeq_epi8(v, _mm_set1_epi8(0x5F));
1977
1978 __m128i ident = _mm_or_si128(_mm_or_si128(letter, digit), underscore);
1979 int mask = _mm_movemask_epi8(ident);
1980
1981 if (mask == 0xFFFF) {
1982 cursor += 16;
1983 continue;
1984 }
1985
1986 cursor += pm_ctzll((uint64_t) (~mask & 0xFFFF));
1987 return (size_t) (cursor - start);
1988 }
1989
1990 return (size_t) (cursor - start);
1991}
1992
1993// The SWAR path uses pm_ctzll to find the first non-matching byte within a
1994// word, which only yields the correct byte index on little-endian targets.
1995// We gate on a positive little-endian check so that unknown-endianness
1996// platforms safely fall through to the no-op fallback.
1997#elif defined(PRISM_HAS_SWAR)
1998
2008static PRISM_INLINE size_t
2009scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
2010 static const uint64_t ones = 0x0101010101010101ULL;
2011 static const uint64_t highs = 0x8080808080808080ULL;
2012 const uint8_t *cursor = start;
2013
2014 while (cursor + 8 <= end) {
2015 uint64_t word;
2016 memcpy(&word, cursor, 8);
2017
2018 // Bail on any non-ASCII byte.
2019 if (word & highs) break;
2020
2021 uint64_t digit = ((word | highs) - ones * 0x30) & ((ones * 0x39 | highs) - word) & highs;
2022
2023 // Fold upper- and lowercase together by forcing bit 5 (OR 0x20),
2024 // then check the lowercase range once. A-Z maps to a-z; the
2025 // only non-letter byte that could alias into [0x61,0x7A] is one
2026 // whose original value was in [0x41,0x5A] — which is exactly
2027 // the uppercase letters we want to match.
2028 uint64_t lowered = word | (ones * 0x20);
2029 uint64_t letter = ((lowered | highs) - ones * 0x61) & ((ones * 0x7A | highs) - lowered) & highs;
2030
2031 // Standard SWAR "has zero byte" idiom on (word XOR 0x5F) to find
2032 // bytes equal to underscore. Safe from cross-byte borrows because
2033 // the ASCII guard above ensures all bytes are < 0x80.
2034 uint64_t xor_us = word ^ (ones * 0x5F);
2035 uint64_t underscore = (xor_us - ones) & ~xor_us & highs;
2036
2037 uint64_t ident = digit | letter | underscore;
2038
2039 if (ident == highs) {
2040 cursor += 8;
2041 continue;
2042 }
2043
2044 // Find the first non-identifier byte. On little-endian the first
2045 // byte sits in the least-significant position.
2046 uint64_t not_ident = ~ident & highs;
2047 cursor += pm_ctzll(not_ident) / 8;
2048 return (size_t) (cursor - start);
2049 }
2050
2051 return (size_t) (cursor - start);
2052}
2053
2054#else
2055
2056// No-op fallback for big-endian or other unsupported platforms.
2057// The caller's byte-at-a-time loop handles everything.
2058#define scan_identifier_ascii(start, end) ((size_t) 0)
2059
2060#endif
2061
2067static PRISM_INLINE size_t
2068char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
2069 if (n <= 0) {
2070 return 0;
2071 } else if (parser->encoding_changed) {
2072 size_t width;
2073
2074 if ((width = parser->encoding->alnum_char(b, n)) != 0) {
2075 return width;
2076 } else if (*b == '_') {
2077 return 1;
2078 } else if (*b >= 0x80) {
2079 return parser->encoding->char_width(b, n);
2080 } else {
2081 return 0;
2082 }
2083 } else {
2084 return char_is_identifier_utf8(b, n);
2085 }
2086}
2087
2088// Here we're defining a perfect hash for the characters that are allowed in
2089// global names. This is used to quickly check the next character after a $ to
2090// see if it's a valid character for a global name.
2091#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
2092#define PUNCT(idx) ( \
2093 BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
2094 BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
2095 BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
2096 BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
2097 BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
2098 BIT('0', idx))
2099
2100const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
2101
2102#undef BIT
2103#undef PUNCT
2104
2105static PRISM_INLINE bool
2106char_is_global_name_punctuation(const uint8_t b) {
2107 const unsigned int i = (const unsigned int) b;
2108 if (i <= 0x20 || 0x7e < i) return false;
2109
2110 return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
2111}
2112
2113static PRISM_INLINE bool
2114token_is_setter_name(pm_token_t *token) {
2115 return (
2116 (token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
2117 ((token->type == PM_TOKEN_IDENTIFIER) &&
2118 (token->end - token->start >= 2) &&
2119 (token->end[-1] == '='))
2120 );
2121}
2122
2126static bool
2127pm_local_is_keyword(const char *source, size_t length) {
2128#define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
2129
2130 switch (length) {
2131 case 2:
2132 switch (source[0]) {
2133 case 'd': KEYWORD("do"); return false;
2134 case 'i': KEYWORD("if"); KEYWORD("in"); return false;
2135 case 'o': KEYWORD("or"); return false;
2136 default: return false;
2137 }
2138 case 3:
2139 switch (source[0]) {
2140 case 'a': KEYWORD("and"); return false;
2141 case 'd': KEYWORD("def"); return false;
2142 case 'e': KEYWORD("end"); return false;
2143 case 'f': KEYWORD("for"); return false;
2144 case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
2145 default: return false;
2146 }
2147 case 4:
2148 switch (source[0]) {
2149 case 'c': KEYWORD("case"); return false;
2150 case 'e': KEYWORD("else"); return false;
2151 case 'n': KEYWORD("next"); return false;
2152 case 'r': KEYWORD("redo"); return false;
2153 case 's': KEYWORD("self"); return false;
2154 case 't': KEYWORD("then"); KEYWORD("true"); return false;
2155 case 'w': KEYWORD("when"); return false;
2156 default: return false;
2157 }
2158 case 5:
2159 switch (source[0]) {
2160 case 'a': KEYWORD("alias"); return false;
2161 case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
2162 case 'c': KEYWORD("class"); return false;
2163 case 'e': KEYWORD("elsif"); return false;
2164 case 'f': KEYWORD("false"); return false;
2165 case 'r': KEYWORD("retry"); return false;
2166 case 's': KEYWORD("super"); return false;
2167 case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
2168 case 'w': KEYWORD("while"); return false;
2169 case 'y': KEYWORD("yield"); return false;
2170 default: return false;
2171 }
2172 case 6:
2173 switch (source[0]) {
2174 case 'e': KEYWORD("ensure"); return false;
2175 case 'm': KEYWORD("module"); return false;
2176 case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
2177 case 'u': KEYWORD("unless"); return false;
2178 default: return false;
2179 }
2180 case 8:
2181 KEYWORD("__LINE__");
2182 KEYWORD("__FILE__");
2183 return false;
2184 case 12:
2185 KEYWORD("__ENCODING__");
2186 return false;
2187 default:
2188 return false;
2189 }
2190
2191#undef KEYWORD
2192}
2193
2194/******************************************************************************/
2195/* Node flag handling functions */
2196/******************************************************************************/
2197
2201static PRISM_INLINE void
2202pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
2203 node->flags |= flag;
2204}
2205
2209static PRISM_INLINE void
2210pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
2211 node->flags &= (pm_node_flags_t) ~flag;
2212}
2213
2217static PRISM_INLINE void
2218pm_node_flag_set_repeated_parameter(pm_node_t *node) {
2219 assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
2220 PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
2221 PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
2222 PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
2223 PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
2224 PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
2225 PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
2226 PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
2227
2228 pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
2229}
2230
2231/******************************************************************************/
2232/* Node creation functions */
2233/******************************************************************************/
2234
2240#define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
2241
2245static PRISM_INLINE pm_node_flags_t
2246pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
2247 pm_node_flags_t flags = 0;
2248
2249 if (closing->type == PM_TOKEN_REGEXP_END) {
2250 pm_buffer_t unknown_flags = { 0 };
2251
2252 for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
2253 switch (*flag) {
2254 case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
2255 case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
2256 case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
2257 case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
2258
2259 case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
2260 case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
2261 case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
2262 case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
2263
2264 default: pm_buffer_append_byte(&unknown_flags, *flag);
2265 }
2266 }
2267
2268 size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
2269 if (unknown_flags_length != 0) {
2270 const char *word = unknown_flags_length >= 2 ? "options" : "option";
2271 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
2272 }
2273 pm_buffer_cleanup(&unknown_flags);
2274 }
2275
2276 return flags;
2277}
2278
2279#undef PM_REGULAR_EXPRESSION_ENCODING_MASK
2280
2281static pm_statements_node_t *
2282pm_statements_node_create(pm_parser_t *parser);
2283
2284static void
2285pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline);
2286
2287static size_t
2288pm_statements_node_body_length(pm_statements_node_t *node);
2289
2294static PRISM_INLINE void
2295pm_integer_arena_move(pm_arena_t *arena, pm_integer_t *integer) {
2296 if (integer->values != NULL) {
2297 size_t byte_size = integer->length * sizeof(uint32_t);
2298 uint32_t *old_values = integer->values;
2299 integer->values = (uint32_t *) pm_arena_memdup(arena, old_values, byte_size, PRISM_ALIGNOF(uint32_t));
2300 xfree(old_values);
2301 }
2302}
2303
2307static pm_error_recovery_node_t *
2308pm_error_recovery_node_create(pm_parser_t *parser, uint32_t start, uint32_t length) {
2309 return pm_error_recovery_node_new(
2310 parser->arena,
2311 ++parser->node_id,
2312 0,
2313 ((pm_location_t) { .start = start, .length = length }),
2314 NULL
2315 );
2316}
2317
2321static pm_error_recovery_node_t *
2322pm_error_recovery_node_create_unexpected(pm_parser_t *parser, pm_node_t *unexpected) {
2323 return pm_error_recovery_node_new(
2324 parser->arena,
2325 ++parser->node_id,
2326 0,
2327 unexpected->location,
2328 unexpected
2329 );
2330}
2331
2335static pm_alias_global_variable_node_t *
2336pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
2337 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
2338
2339 return pm_alias_global_variable_node_new(
2340 parser->arena,
2341 ++parser->node_id,
2342 0,
2343 PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, old_name),
2344 new_name,
2345 old_name,
2346 TOK2LOC(parser, keyword)
2347 );
2348}
2349
2353static pm_alias_method_node_t *
2354pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
2355 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
2356
2357 return pm_alias_method_node_new(
2358 parser->arena,
2359 ++parser->node_id,
2360 0,
2361 PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, old_name),
2362 new_name,
2363 old_name,
2364 TOK2LOC(parser, keyword)
2365 );
2366}
2367
2371static pm_alternation_pattern_node_t *
2372pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
2373 return pm_alternation_pattern_node_new(
2374 parser->arena,
2375 ++parser->node_id,
2376 0,
2377 PM_LOCATION_INIT_NODES(left, right),
2378 left,
2379 right,
2380 TOK2LOC(parser, operator)
2381 );
2382}
2383
2387static pm_and_node_t *
2388pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
2389 pm_assert_value_expression(parser, left);
2390
2391 return pm_and_node_new(
2392 parser->arena,
2393 ++parser->node_id,
2394 0,
2395 PM_LOCATION_INIT_NODES(left, right),
2396 left,
2397 right,
2398 TOK2LOC(parser, operator)
2399 );
2400}
2401
2405static pm_arguments_node_t *
2406pm_arguments_node_create(pm_parser_t *parser) {
2407 return pm_arguments_node_new(
2408 parser->arena,
2409 ++parser->node_id,
2410 0,
2411 PM_LOCATION_INIT_UNSET,
2412 ((pm_node_list_t) { 0 })
2413 );
2414}
2415
2419static size_t
2420pm_arguments_node_size(pm_arguments_node_t *node) {
2421 return node->arguments.size;
2422}
2423
2427static void
2428pm_arguments_node_arguments_append(pm_arena_t *arena, pm_arguments_node_t *node, pm_node_t *argument) {
2429 if (pm_arguments_node_size(node) == 0) {
2430 PM_NODE_START_SET_NODE(node, argument);
2431 }
2432
2433 if (PM_NODE_END(node) < PM_NODE_END(argument)) {
2434 PM_NODE_LENGTH_SET_NODE(node, argument);
2435 }
2436
2437 pm_node_list_append(arena, &node->arguments, argument);
2438
2439 if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
2440 if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
2441 pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
2442 } else {
2443 pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
2444 }
2445 }
2446}
2447
2451static pm_array_node_t *
2452pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
2453 if (opening == NULL) {
2454 return pm_array_node_new(
2455 parser->arena,
2456 ++parser->node_id,
2457 PM_NODE_FLAG_STATIC_LITERAL,
2458 PM_LOCATION_INIT_UNSET,
2459 ((pm_node_list_t) { 0 }),
2460 ((pm_location_t) { 0 }),
2461 ((pm_location_t) { 0 })
2462 );
2463 } else {
2464 return pm_array_node_new(
2465 parser->arena,
2466 ++parser->node_id,
2467 PM_NODE_FLAG_STATIC_LITERAL,
2468 PM_LOCATION_INIT_TOKEN(parser, opening),
2469 ((pm_node_list_t) { 0 }),
2470 TOK2LOC(parser, opening),
2471 TOK2LOC(parser, opening)
2472 );
2473 }
2474}
2475
2479static PRISM_INLINE void
2480pm_array_node_elements_append(pm_arena_t *arena, pm_array_node_t *node, pm_node_t *element) {
2481 if (!node->elements.size && !node->opening_loc.length) {
2482 PM_NODE_START_SET_NODE(node, element);
2483 }
2484
2485 pm_node_list_append(arena, &node->elements, element);
2486 PM_NODE_LENGTH_SET_NODE(node, element);
2487
2488 // If the element is not a static literal, then the array is not a static
2489 // literal. Turn that flag off.
2490 if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
2491 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
2492 }
2493
2494 if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
2495 pm_node_flag_set(UP(node), PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
2496 }
2497}
2498
2502static void
2503pm_array_node_close_set(const pm_parser_t *parser, pm_array_node_t *node, const pm_token_t *closing) {
2504 assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == 0);
2505 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
2506 node->closing_loc = TOK2LOC(parser, closing);
2507}
2508
2513static pm_array_pattern_node_t *
2514pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
2515 pm_array_pattern_node_t *node = pm_array_pattern_node_new(
2516 parser->arena,
2517 ++parser->node_id,
2518 0,
2519 PM_LOCATION_INIT_NODES(nodes->nodes[0], nodes->nodes[nodes->size - 1]),
2520 NULL,
2521 ((pm_node_list_t) { 0 }),
2522 NULL,
2523 ((pm_node_list_t) { 0 }),
2524 ((pm_location_t) { 0 }),
2525 ((pm_location_t) { 0 })
2526 );
2527
2528 // For now we're going to just copy over each pointer manually. This could be
2529 // much more efficient, as we could instead resize the node list.
2530 bool found_rest = false;
2531 pm_node_t *child;
2532
2533 PM_NODE_LIST_FOREACH(nodes, index, child) {
2534 if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
2535 node->rest = child;
2536 found_rest = true;
2537 } else if (found_rest) {
2538 pm_node_list_append(parser->arena, &node->posts, child);
2539 } else {
2540 pm_node_list_append(parser->arena, &node->requireds, child);
2541 }
2542 }
2543
2544 return node;
2545}
2546
2550static pm_array_pattern_node_t *
2551pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
2552 return pm_array_pattern_node_new(
2553 parser->arena,
2554 ++parser->node_id,
2555 0,
2556 PM_LOCATION_INIT_NODE(rest),
2557 NULL,
2558 ((pm_node_list_t) { 0 }),
2559 rest,
2560 ((pm_node_list_t) { 0 }),
2561 ((pm_location_t) { 0 }),
2562 ((pm_location_t) { 0 })
2563 );
2564}
2565
2570static pm_array_pattern_node_t *
2571pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
2572 return pm_array_pattern_node_new(
2573 parser->arena,
2574 ++parser->node_id,
2575 0,
2576 PM_LOCATION_INIT_NODE_TOKEN(parser, constant, closing),
2577 constant,
2578 ((pm_node_list_t) { 0 }),
2579 NULL,
2580 ((pm_node_list_t) { 0 }),
2581 TOK2LOC(parser, opening),
2582 TOK2LOC(parser, closing)
2583 );
2584}
2585
2590static pm_array_pattern_node_t *
2591pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
2592 return pm_array_pattern_node_new(
2593 parser->arena,
2594 ++parser->node_id,
2595 0,
2596 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
2597 NULL,
2598 ((pm_node_list_t) { 0 }),
2599 NULL,
2600 ((pm_node_list_t) { 0 }),
2601 TOK2LOC(parser, opening),
2602 TOK2LOC(parser, closing)
2603 );
2604}
2605
2606static PRISM_INLINE void
2607pm_array_pattern_node_requireds_append(pm_arena_t *arena, pm_array_pattern_node_t *node, pm_node_t *inner) {
2608 pm_node_list_append(arena, &node->requireds, inner);
2609}
2610
2614static pm_assoc_node_t *
2615pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
2616 uint32_t end;
2617
2618 if (value != NULL && PM_NODE_END(value) > PM_NODE_END(key)) {
2619 end = PM_NODE_END(value);
2620 } else if (operator != NULL) {
2621 end = PM_TOKEN_END(parser, operator);
2622 } else {
2623 end = PM_NODE_END(key);
2624 }
2625
2626 // Hash string keys will be frozen, so we can mark them as frozen here so
2627 // that the compiler picks them up and also when we check for static literal
2628 // on the keys it gets factored in.
2629 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
2630 key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
2631 }
2632
2633 // If the key and value of this assoc node are both static literals, then
2634 // we can mark this node as a static literal.
2635 pm_node_flags_t flags = 0;
2636 if (
2637 !PM_NODE_TYPE_P(key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(key, PM_HASH_NODE) && !PM_NODE_TYPE_P(key, PM_RANGE_NODE) &&
2638 value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)
2639 ) {
2640 flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
2641 }
2642
2643 return pm_assoc_node_new(
2644 parser->arena,
2645 ++parser->node_id,
2646 flags,
2647 ((pm_location_t) { .start = PM_NODE_START(key), .length = U32(end - PM_NODE_START(key)) }),
2648 key,
2649 value,
2650 NTOK2LOC(parser, operator)
2651 );
2652}
2653
2657static pm_assoc_splat_node_t *
2658pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
2659 assert(operator->type == PM_TOKEN_USTAR_STAR);
2660
2661 return pm_assoc_splat_node_new(
2662 parser->arena,
2663 ++parser->node_id,
2664 0,
2665 (value == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, value),
2666 value,
2667 TOK2LOC(parser, operator)
2668 );
2669}
2670
2674static pm_back_reference_read_node_t *
2675pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
2676 assert(name->type == PM_TOKEN_BACK_REFERENCE);
2677
2678 return pm_back_reference_read_node_new(
2679 parser->arena,
2680 ++parser->node_id,
2681 0,
2682 PM_LOCATION_INIT_TOKEN(parser, name),
2683 pm_parser_constant_id_token(parser, name)
2684 );
2685}
2686
2690static pm_begin_node_t *
2691pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
2692 uint32_t start = begin_keyword == NULL ? 0 : PM_TOKEN_START(parser, begin_keyword);
2693 uint32_t end = statements == NULL ? (begin_keyword == NULL ? 0 : PM_TOKEN_END(parser, begin_keyword)) : PM_NODE_END(statements);
2694
2695 return pm_begin_node_new(
2696 parser->arena,
2697 ++parser->node_id,
2698 0,
2699 ((pm_location_t) { .start = start, .length = U32(end - start) }),
2700 NTOK2LOC(parser, begin_keyword),
2701 statements,
2702 NULL,
2703 NULL,
2704 NULL,
2705 ((pm_location_t) { 0 })
2706 );
2707}
2708
2712static void
2713pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
2714 if (node->begin_keyword_loc.length == 0) {
2715 PM_NODE_START_SET_NODE(node, rescue_clause);
2716 }
2717 PM_NODE_LENGTH_SET_NODE(node, rescue_clause);
2718 node->rescue_clause = rescue_clause;
2719}
2720
2724static void
2725pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
2726 if ((node->begin_keyword_loc.length == 0) && PM_NODE_START(node) == 0) {
2727 PM_NODE_START_SET_NODE(node, else_clause);
2728 }
2729 PM_NODE_LENGTH_SET_NODE(node, else_clause);
2730 node->else_clause = else_clause;
2731}
2732
2736static void
2737pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
2738 if ((node->begin_keyword_loc.length == 0) && PM_NODE_START(node) == 0) {
2739 PM_NODE_START_SET_NODE(node, ensure_clause);
2740 }
2741 PM_NODE_LENGTH_SET_NODE(node, ensure_clause);
2742 node->ensure_clause = ensure_clause;
2743}
2744
2748static void
2749pm_begin_node_end_keyword_set(const pm_parser_t *parser, pm_begin_node_t *node, const pm_token_t *end_keyword) {
2750 assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == 0);
2751 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
2752 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
2753}
2754
2758static pm_block_argument_node_t *
2759pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
2760 assert(operator->type == PM_TOKEN_UAMPERSAND);
2761
2762 return pm_block_argument_node_new(
2763 parser->arena,
2764 ++parser->node_id,
2765 0,
2766 (expression == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, expression),
2767 expression,
2768 TOK2LOC(parser, operator)
2769 );
2770}
2771
2775static pm_block_node_t *
2776pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
2777 return pm_block_node_new(
2778 parser->arena,
2779 ++parser->node_id,
2780 0,
2781 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
2782 *locals,
2783 parameters,
2784 body,
2785 TOK2LOC(parser, opening),
2786 TOK2LOC(parser, closing)
2787 );
2788}
2789
2793static pm_block_parameter_node_t *
2794pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
2795 assert(operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
2796
2797 return pm_block_parameter_node_new(
2798 parser->arena,
2799 ++parser->node_id,
2800 0,
2801 (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
2802 name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
2803 NTOK2LOC(parser, name),
2804 TOK2LOC(parser, operator)
2805 );
2806}
2807
2811static pm_block_parameters_node_t *
2812pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
2813 uint32_t start;
2814 if (opening != NULL) {
2815 start = PM_TOKEN_START(parser, opening);
2816 } else if (parameters != NULL) {
2817 start = PM_NODE_START(parameters);
2818 } else {
2819 start = 0;
2820 }
2821
2822 uint32_t end;
2823 if (parameters != NULL) {
2824 end = PM_NODE_END(parameters);
2825 } else if (opening != NULL) {
2826 end = PM_TOKEN_END(parser, opening);
2827 } else {
2828 end = 0;
2829 }
2830
2831 return pm_block_parameters_node_new(
2832 parser->arena,
2833 ++parser->node_id,
2834 0,
2835 ((pm_location_t) { .start = start, .length = U32(end - start) }),
2836 parameters,
2837 ((pm_node_list_t) { 0 }),
2838 NTOK2LOC(parser, opening),
2839 ((pm_location_t) { 0 })
2840 );
2841}
2842
2846static void
2847pm_block_parameters_node_closing_set(const pm_parser_t *parser, pm_block_parameters_node_t *node, const pm_token_t *closing) {
2848 assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == 0);
2849 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
2850 node->closing_loc = TOK2LOC(parser, closing);
2851}
2852
2856static pm_block_local_variable_node_t *
2857pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
2858 return pm_block_local_variable_node_new(
2859 parser->arena,
2860 ++parser->node_id,
2861 0,
2862 PM_LOCATION_INIT_TOKEN(parser, name),
2863 pm_parser_constant_id_token(parser, name)
2864 );
2865}
2866
2870static void
2871pm_block_parameters_node_append_local(pm_arena_t *arena, pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
2872 pm_node_list_append(arena, &node->locals, UP(local));
2873
2874 if (PM_NODE_LENGTH(node) == 0) {
2875 PM_NODE_START_SET_NODE(node, local);
2876 }
2877
2878 PM_NODE_LENGTH_SET_NODE(node, local);
2879}
2880
2884static pm_break_node_t *
2885pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
2886 assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
2887
2888 return pm_break_node_new(
2889 parser->arena,
2890 ++parser->node_id,
2891 0,
2892 (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
2893 arguments,
2894 TOK2LOC(parser, keyword)
2895 );
2896}
2897
2898// There are certain flags that we want to use internally but don't want to
2899// expose because they are not relevant beyond parsing. Therefore we'll define
2900// them here and not define them in config.yml/a header file.
2901static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = (1 << 2);
2902
2903static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = ((PM_CALL_NODE_FLAGS_LAST - 1) << 1);
2904static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = ((PM_CALL_NODE_FLAGS_LAST - 1) << 2);
2905static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = ((PM_CALL_NODE_FLAGS_LAST - 1) << 3);
2906
2912static pm_call_node_t *
2913pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
2914 return pm_call_node_new(
2915 parser->arena,
2916 ++parser->node_id,
2917 flags,
2918 PM_LOCATION_INIT_UNSET,
2919 NULL,
2920 ((pm_location_t) { 0 }),
2921 0,
2922 ((pm_location_t) { 0 }),
2923 ((pm_location_t) { 0 }),
2924 NULL,
2925 ((pm_location_t) { 0 }),
2926 ((pm_location_t) { 0 }),
2927 NULL
2928 );
2929}
2930
2935static PRISM_INLINE pm_node_flags_t
2936pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
2937 return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
2938}
2939
2944static pm_call_node_t *
2945pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
2946 pm_assert_value_expression(parser, receiver);
2947
2948 pm_node_flags_t flags = pm_call_node_ignore_visibility_flag(receiver);
2949 if (arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_ARGUMENT_NODE)) {
2950 flags |= PM_CALL_NODE_FLAGS_INDEX;
2951 }
2952
2953 pm_call_node_t *node = pm_call_node_create(parser, flags);
2954
2955 PM_NODE_START_SET_NODE(node, receiver);
2956
2957 const pm_location_t *end = pm_arguments_end(arguments);
2958 assert(end != NULL && "unreachable");
2959 PM_NODE_LENGTH_SET_LOCATION(node, end);
2960
2961 node->receiver = receiver;
2962 node->message_loc.start = arguments->opening_loc.start;
2963 node->message_loc.length = (arguments->closing_loc.start + arguments->closing_loc.length) - arguments->opening_loc.start;
2964
2965 node->opening_loc = arguments->opening_loc;
2966 node->arguments = arguments->arguments;
2967 node->closing_loc = arguments->closing_loc;
2968 node->block = arguments->block;
2969
2970 node->name = pm_parser_constant_id_constant(parser, "[]", 2);
2971 return node;
2972}
2973
2977static pm_call_node_t *
2978pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument, pm_node_flags_t flags) {
2979 pm_assert_value_expression(parser, receiver);
2980 pm_assert_value_expression(parser, argument);
2981
2982 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
2983
2984 PM_NODE_START_SET_NODE(node, PM_NODE_START(receiver) < PM_NODE_START(argument) ? receiver : argument);
2985 PM_NODE_LENGTH_SET_NODE(node, PM_NODE_END(receiver) > PM_NODE_END(argument) ? receiver : argument);
2986
2987 node->receiver = receiver;
2988 node->message_loc = TOK2LOC(parser, operator);
2989
2990 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
2991 pm_arguments_node_arguments_append(parser->arena, arguments, argument);
2992 node->arguments = arguments;
2993
2994 node->name = pm_parser_constant_id_token(parser, operator);
2995 return node;
2996}
2997
2998static const uint8_t * parse_operator_symbol_name(const pm_token_t *);
2999
3003static pm_call_node_t *
3004pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
3005 pm_assert_value_expression(parser, receiver);
3006
3007 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
3008
3009 PM_NODE_START_SET_NODE(node, receiver);
3010 const pm_location_t *end = pm_arguments_end(arguments);
3011 if (end == NULL) {
3012 PM_NODE_LENGTH_SET_TOKEN(parser, node, message);
3013 } else {
3014 PM_NODE_LENGTH_SET_LOCATION(node, end);
3015 }
3016
3017 node->receiver = receiver;
3018 node->call_operator_loc = TOK2LOC(parser, operator);
3019 node->message_loc = TOK2LOC(parser, message);
3020 node->opening_loc = arguments->opening_loc;
3021 node->arguments = arguments->arguments;
3022 node->closing_loc = arguments->closing_loc;
3023 node->block = arguments->block;
3024
3025 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
3026 pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
3027 }
3028
3033 node->name = pm_parser_constant_id_raw(parser, message->start, parse_operator_symbol_name(message));
3034 return node;
3035}
3036
3040static pm_call_node_t *
3041pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
3042 pm_call_node_t *node = pm_call_node_create(parser, 0);
3043 node->base.location = (pm_location_t) { .start = 0, .length = U32(parser->end - parser->start) };
3044
3045 node->receiver = receiver;
3046 node->arguments = arguments;
3047
3048 node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
3049 return node;
3050}
3051
3056static pm_call_node_t *
3057pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
3058 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
3059
3060 PM_NODE_START_SET_TOKEN(parser, node, message);
3061 const pm_location_t *end = pm_arguments_end(arguments);
3062 assert(end != NULL && "unreachable");
3063 PM_NODE_LENGTH_SET_LOCATION(node, end);
3064
3065 node->message_loc = TOK2LOC(parser, message);
3066 node->opening_loc = arguments->opening_loc;
3067 node->arguments = arguments->arguments;
3068 node->closing_loc = arguments->closing_loc;
3069 node->block = arguments->block;
3070
3071 node->name = pm_parser_constant_id_token(parser, message);
3072 return node;
3073}
3074
3079static pm_call_node_t *
3080pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
3081 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
3082
3083 node->base.location = (pm_location_t) { 0 };
3084 node->arguments = arguments;
3085
3086 node->name = name;
3087 return node;
3088}
3089
3093static pm_call_node_t *
3094pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
3095 pm_assert_value_expression(parser, receiver);
3096 if (receiver != NULL) pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
3097
3098 pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
3099
3100 PM_NODE_START_SET_TOKEN(parser, node, message);
3101 if (arguments->closing_loc.length > 0) {
3102 PM_NODE_LENGTH_SET_LOCATION(node, &arguments->closing_loc);
3103 } else {
3104 assert(receiver != NULL);
3105 PM_NODE_LENGTH_SET_NODE(node, receiver);
3106 }
3107
3108 node->receiver = receiver;
3109 node->message_loc = TOK2LOC(parser, message);
3110 node->opening_loc = arguments->opening_loc;
3111 node->arguments = arguments->arguments;
3112 node->closing_loc = arguments->closing_loc;
3113
3114 node->name = pm_parser_constant_id_constant(parser, "!", 1);
3115 return node;
3116}
3117
3121static pm_call_node_t *
3122pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
3123 pm_assert_value_expression(parser, receiver);
3124
3125 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
3126
3127 PM_NODE_START_SET_NODE(node, receiver);
3128 const pm_location_t *end = pm_arguments_end(arguments);
3129 assert(end != NULL && "unreachable");
3130 PM_NODE_LENGTH_SET_LOCATION(node, end);
3131
3132 node->receiver = receiver;
3133 node->call_operator_loc = TOK2LOC(parser, operator);
3134 node->opening_loc = arguments->opening_loc;
3135 node->arguments = arguments->arguments;
3136 node->closing_loc = arguments->closing_loc;
3137 node->block = arguments->block;
3138
3139 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
3140 pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
3141 }
3142
3143 node->name = pm_parser_constant_id_constant(parser, "call", 4);
3144 return node;
3145}
3146
3150static pm_call_node_t *
3151pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
3152 pm_assert_value_expression(parser, receiver);
3153
3154 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
3155
3156 PM_NODE_START_SET_TOKEN(parser, node, operator);
3157 PM_NODE_LENGTH_SET_NODE(node, receiver);
3158
3159 node->receiver = receiver;
3160 node->message_loc = TOK2LOC(parser, operator);
3161
3162 node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
3163 return node;
3164}
3165
3170static pm_call_node_t *
3171pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
3172 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
3173
3174 node->base.location = TOK2LOC(parser, message);
3175 node->message_loc = TOK2LOC(parser, message);
3176
3177 node->name = pm_parser_constant_id_token(parser, message);
3178 return node;
3179}
3180
3185static PRISM_INLINE bool
3186pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
3187 return (
3188 (node->message_loc.length > 0) &&
3189 (parser->start[node->message_loc.start + node->message_loc.length - 1] != '!') &&
3190 (parser->start[node->message_loc.start + node->message_loc.length - 1] != '?') &&
3191 char_is_identifier_start(parser, parser->start + node->message_loc.start, (ptrdiff_t) node->message_loc.length) &&
3192 (node->opening_loc.length == 0) &&
3193 (node->arguments == NULL) &&
3194 (node->block == NULL)
3195 );
3196}
3197
3201static void
3202pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
3203 pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
3204
3205 if (write_constant->length > 0) {
3206 size_t length = write_constant->length - 1;
3207
3208 uint8_t *memory = (uint8_t *) pm_arena_alloc(parser->arena, length, 1);
3209 memcpy(memory, write_constant->start, length);
3210
3211 *read_name = pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, memory, length);
3212 } else {
3213 // We can get here if the message was missing because of a syntax error.
3214 *read_name = pm_parser_constant_id_constant(parser, "", 0);
3215 }
3216}
3217
3221static pm_call_and_write_node_t *
3222pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3223 assert(target->block == NULL);
3224 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3225
3226 pm_call_and_write_node_t *node = pm_call_and_write_node_new(
3227 parser->arena,
3228 ++parser->node_id,
3229 FL(target),
3230 PM_LOCATION_INIT_NODES(target, value),
3231 target->receiver,
3232 target->call_operator_loc,
3233 target->message_loc,
3234 0,
3235 target->name,
3236 TOK2LOC(parser, operator),
3237 value
3238 );
3239
3240 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3241
3242 // The target is no longer necessary because we've reused its children.
3243 // It is arena-allocated so no explicit free is needed.
3244
3245 return node;
3246}
3247
3252static void
3253pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
3254 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
3255 if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
3256 pm_node_t *node;
3257 PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
3258 if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
3259 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
3260 break;
3261 }
3262 }
3263 }
3264
3265 if (block != NULL) {
3266 pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
3267 }
3268 }
3269}
3270
3274static pm_index_and_write_node_t *
3275pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3276 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3277
3278 pm_index_arguments_check(parser, target->arguments, target->block);
3279
3280 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3281
3282 pm_index_and_write_node_t *node = pm_index_and_write_node_new(
3283 parser->arena,
3284 ++parser->node_id,
3285 FL(target),
3286 PM_LOCATION_INIT_NODES(target, value),
3287 target->receiver,
3288 target->call_operator_loc,
3289 target->opening_loc,
3290 target->arguments,
3291 target->closing_loc,
3292 (pm_block_argument_node_t *) target->block,
3293 TOK2LOC(parser, operator),
3294 value
3295 );
3296
3297 // The target is no longer necessary because we've reused its children.
3298 // It is arena-allocated so no explicit free is needed.
3299
3300 return node;
3301}
3302
3306static pm_call_operator_write_node_t *
3307pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3308 assert(target->block == NULL);
3309
3310 pm_call_operator_write_node_t *node = pm_call_operator_write_node_new(
3311 parser->arena,
3312 ++parser->node_id,
3313 FL(target),
3314 PM_LOCATION_INIT_NODES(target, value),
3315 target->receiver,
3316 target->call_operator_loc,
3317 target->message_loc,
3318 0,
3319 target->name,
3320 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
3321 TOK2LOC(parser, operator),
3322 value
3323 );
3324
3325 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3326
3327 // The target is no longer necessary because we've reused its children.
3328 // It is arena-allocated so no explicit free is needed.
3329
3330 return node;
3331}
3332
3336static pm_index_operator_write_node_t *
3337pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3338 pm_index_arguments_check(parser, target->arguments, target->block);
3339
3340 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3341
3342 pm_index_operator_write_node_t *node = pm_index_operator_write_node_new(
3343 parser->arena,
3344 ++parser->node_id,
3345 FL(target),
3346 PM_LOCATION_INIT_NODES(target, value),
3347 target->receiver,
3348 target->call_operator_loc,
3349 target->opening_loc,
3350 target->arguments,
3351 target->closing_loc,
3352 (pm_block_argument_node_t *) target->block,
3353 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
3354 TOK2LOC(parser, operator),
3355 value
3356 );
3357
3358 // The target is no longer necessary because we've reused its children.
3359 // It is arena-allocated so no explicit free is needed.
3360
3361 return node;
3362}
3363
3367static pm_call_or_write_node_t *
3368pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3369 assert(target->block == NULL);
3370 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3371
3372 pm_call_or_write_node_t *node = pm_call_or_write_node_new(
3373 parser->arena,
3374 ++parser->node_id,
3375 FL(target),
3376 PM_LOCATION_INIT_NODES(target, value),
3377 target->receiver,
3378 target->call_operator_loc,
3379 target->message_loc,
3380 0,
3381 target->name,
3382 TOK2LOC(parser, operator),
3383 value
3384 );
3385
3386 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3387
3388 // The target is no longer necessary because we've reused its children.
3389 // It is arena-allocated so no explicit free is needed.
3390
3391 return node;
3392}
3393
3397static pm_index_or_write_node_t *
3398pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3399 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3400
3401 pm_index_arguments_check(parser, target->arguments, target->block);
3402
3403 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3404
3405 pm_index_or_write_node_t *node = pm_index_or_write_node_new(
3406 parser->arena,
3407 ++parser->node_id,
3408 FL(target),
3409 PM_LOCATION_INIT_NODES(target, value),
3410 target->receiver,
3411 target->call_operator_loc,
3412 target->opening_loc,
3413 target->arguments,
3414 target->closing_loc,
3415 (pm_block_argument_node_t *) target->block,
3416 TOK2LOC(parser, operator),
3417 value
3418 );
3419
3420 // The target is no longer necessary because we've reused its children.
3421 // It is arena-allocated so no explicit free is needed.
3422
3423 return node;
3424}
3425
3430static pm_call_target_node_t *
3431pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3432 pm_call_target_node_t *node = pm_call_target_node_new(
3433 parser->arena,
3434 ++parser->node_id,
3435 FL(target),
3436 PM_LOCATION_INIT_NODE(target),
3437 target->receiver,
3438 target->call_operator_loc,
3439 target->name,
3440 target->message_loc
3441 );
3442
3443 /* It is possible to get here where we have parsed an invalid syntax tree
3444 * where the call operator was not present. In that case we will have a
3445 * problem because it is a required location. In this case we need to fill
3446 * it in with a fake location so that the syntax tree remains valid. */
3447 if (node->call_operator_loc.length == 0) {
3448 node->call_operator_loc = target->base.location;
3449 }
3450
3451 // The target is no longer necessary because we've reused its children.
3452 // It is arena-allocated so no explicit free is needed.
3453
3454 return node;
3455}
3456
3461static pm_index_target_node_t *
3462pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3463 pm_index_arguments_check(parser, target->arguments, target->block);
3464 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3465
3466 pm_index_target_node_t *node = pm_index_target_node_new(
3467 parser->arena,
3468 ++parser->node_id,
3469 FL(target) | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
3470 PM_LOCATION_INIT_NODE(target),
3471 target->receiver,
3472 target->opening_loc,
3473 target->arguments,
3474 target->closing_loc,
3475 (pm_block_argument_node_t *) target->block
3476 );
3477
3478 // The target is no longer necessary because we've reused its children.
3479 // It is arena-allocated so no explicit free is needed.
3480
3481 return node;
3482}
3483
3487static pm_capture_pattern_node_t *
3488pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
3489 return pm_capture_pattern_node_new(
3490 parser->arena,
3491 ++parser->node_id,
3492 0,
3493 PM_LOCATION_INIT_NODES(value, target),
3494 value,
3495 target,
3496 TOK2LOC(parser, operator)
3497 );
3498}
3499
3503static pm_case_node_t *
3504pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3505 return pm_case_node_new(
3506 parser->arena,
3507 ++parser->node_id,
3508 0,
3509 PM_LOCATION_INIT_TOKENS(parser, case_keyword, end_keyword == NULL ? case_keyword : end_keyword),
3510 predicate,
3511 ((pm_node_list_t) { 0 }),
3512 NULL,
3513 TOK2LOC(parser, case_keyword),
3514 NTOK2LOC(parser, end_keyword)
3515 );
3516}
3517
3521static void
3522pm_case_node_condition_append(pm_arena_t *arena, pm_case_node_t *node, pm_node_t *condition) {
3523 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
3524
3525 pm_node_list_append(arena, &node->conditions, condition);
3526 PM_NODE_LENGTH_SET_NODE(node, condition);
3527}
3528
3532static void
3533pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
3534 node->else_clause = else_clause;
3535 PM_NODE_LENGTH_SET_NODE(node, else_clause);
3536}
3537
3541static void
3542pm_case_node_end_keyword_loc_set(const pm_parser_t *parser, pm_case_node_t *node, const pm_token_t *end_keyword) {
3543 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
3544 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
3545}
3546
3550static pm_case_match_node_t *
3551pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate) {
3552 return pm_case_match_node_new(
3553 parser->arena,
3554 ++parser->node_id,
3555 0,
3556 PM_LOCATION_INIT_TOKEN(parser, case_keyword),
3557 predicate,
3558 ((pm_node_list_t) { 0 }),
3559 NULL,
3560 TOK2LOC(parser, case_keyword),
3561 ((pm_location_t) { 0 })
3562 );
3563}
3564
3568static void
3569pm_case_match_node_condition_append(pm_arena_t *arena, pm_case_match_node_t *node, pm_node_t *condition) {
3570 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
3571
3572 pm_node_list_append(arena, &node->conditions, condition);
3573 PM_NODE_LENGTH_SET_NODE(node, condition);
3574}
3575
3579static void
3580pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
3581 node->else_clause = else_clause;
3582 PM_NODE_LENGTH_SET_NODE(node, else_clause);
3583}
3584
3588static void
3589pm_case_match_node_end_keyword_loc_set(const pm_parser_t *parser, pm_case_match_node_t *node, const pm_token_t *end_keyword) {
3590 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
3591 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
3592}
3593
3597static pm_class_node_t *
3598pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
3599 return pm_class_node_new(
3600 parser->arena,
3601 ++parser->node_id,
3602 0,
3603 PM_LOCATION_INIT_TOKENS(parser, class_keyword, end_keyword),
3604 *locals,
3605 TOK2LOC(parser, class_keyword),
3606 constant_path,
3607 NTOK2LOC(parser, inheritance_operator),
3608 superclass,
3609 body,
3610 TOK2LOC(parser, end_keyword),
3611 pm_parser_constant_id_token(parser, name)
3612 );
3613}
3614
3618static pm_class_variable_and_write_node_t *
3619pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3620 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3621
3622 return pm_class_variable_and_write_node_new(
3623 parser->arena,
3624 ++parser->node_id,
3625 0,
3626 PM_LOCATION_INIT_NODES(target, value),
3627 target->name,
3628 target->base.location,
3629 TOK2LOC(parser, operator),
3630 value
3631 );
3632}
3633
3637static pm_class_variable_operator_write_node_t *
3638pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3639 return pm_class_variable_operator_write_node_new(
3640 parser->arena,
3641 ++parser->node_id,
3642 0,
3643 PM_LOCATION_INIT_NODES(target, value),
3644 target->name,
3645 target->base.location,
3646 TOK2LOC(parser, operator),
3647 value,
3648 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
3649 );
3650}
3651
3655static pm_class_variable_or_write_node_t *
3656pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3657 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3658
3659 return pm_class_variable_or_write_node_new(
3660 parser->arena,
3661 ++parser->node_id,
3662 0,
3663 PM_LOCATION_INIT_NODES(target, value),
3664 target->name,
3665 target->base.location,
3666 TOK2LOC(parser, operator),
3667 value
3668 );
3669}
3670
3674static pm_class_variable_read_node_t *
3675pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
3676 assert(token->type == PM_TOKEN_CLASS_VARIABLE);
3677
3678 return pm_class_variable_read_node_new(
3679 parser->arena,
3680 ++parser->node_id,
3681 0,
3682 PM_LOCATION_INIT_TOKEN(parser, token),
3683 pm_parser_constant_id_token(parser, token)
3684 );
3685}
3686
3693static PRISM_INLINE pm_node_flags_t
3694pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
3695 if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.length == 0) {
3696 return flags;
3697 }
3698 return 0;
3699}
3700
3704static pm_class_variable_write_node_t *
3705pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
3706 return pm_class_variable_write_node_new(
3707 parser->arena,
3708 ++parser->node_id,
3709 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3710 PM_LOCATION_INIT_NODES(read_node, value),
3711 read_node->name,
3712 read_node->base.location,
3713 value,
3714 TOK2LOC(parser, operator)
3715 );
3716}
3717
3721static pm_constant_path_and_write_node_t *
3722pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3723 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3724
3725 return pm_constant_path_and_write_node_new(
3726 parser->arena,
3727 ++parser->node_id,
3728 0,
3729 PM_LOCATION_INIT_NODES(target, value),
3730 target,
3731 TOK2LOC(parser, operator),
3732 value
3733 );
3734}
3735
3739static pm_constant_path_operator_write_node_t *
3740pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3741 return pm_constant_path_operator_write_node_new(
3742 parser->arena,
3743 ++parser->node_id,
3744 0,
3745 PM_LOCATION_INIT_NODES(target, value),
3746 target,
3747 TOK2LOC(parser, operator),
3748 value,
3749 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
3750 );
3751}
3752
3756static pm_constant_path_or_write_node_t *
3757pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3758 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3759
3760 return pm_constant_path_or_write_node_new(
3761 parser->arena,
3762 ++parser->node_id,
3763 0,
3764 PM_LOCATION_INIT_NODES(target, value),
3765 target,
3766 TOK2LOC(parser, operator),
3767 value
3768 );
3769}
3770
3774static pm_constant_path_node_t *
3775pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3776 pm_assert_value_expression(parser, parent);
3777
3778 pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3779 if (name_token->type == PM_TOKEN_CONSTANT) {
3780 name = pm_parser_constant_id_token(parser, name_token);
3781 }
3782
3783 return pm_constant_path_node_new(
3784 parser->arena,
3785 ++parser->node_id,
3786 0,
3787 (parent == NULL) ? PM_LOCATION_INIT_TOKENS(parser, delimiter, name_token) : PM_LOCATION_INIT_NODE_TOKEN(parser, parent, name_token),
3788 parent,
3789 name,
3790 TOK2LOC(parser, delimiter),
3791 TOK2LOC(parser, name_token)
3792 );
3793}
3794
3798static pm_constant_path_write_node_t *
3799pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3800 return pm_constant_path_write_node_new(
3801 parser->arena,
3802 ++parser->node_id,
3803 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3804 PM_LOCATION_INIT_NODES(target, value),
3805 target,
3806 TOK2LOC(parser, operator),
3807 value
3808 );
3809}
3810
3814static pm_constant_and_write_node_t *
3815pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3816 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3817
3818 return pm_constant_and_write_node_new(
3819 parser->arena,
3820 ++parser->node_id,
3821 0,
3822 PM_LOCATION_INIT_NODES(target, value),
3823 target->name,
3824 target->base.location,
3825 TOK2LOC(parser, operator),
3826 value
3827 );
3828}
3829
3833static pm_constant_operator_write_node_t *
3834pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3835 return pm_constant_operator_write_node_new(
3836 parser->arena,
3837 ++parser->node_id,
3838 0,
3839 PM_LOCATION_INIT_NODES(target, value),
3840 target->name,
3841 target->base.location,
3842 TOK2LOC(parser, operator),
3843 value,
3844 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
3845 );
3846}
3847
3851static pm_constant_or_write_node_t *
3852pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3853 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3854
3855 return pm_constant_or_write_node_new(
3856 parser->arena,
3857 ++parser->node_id,
3858 0,
3859 PM_LOCATION_INIT_NODES(target, value),
3860 target->name,
3861 target->base.location,
3862 TOK2LOC(parser, operator),
3863 value
3864 );
3865}
3866
3870static pm_constant_read_node_t *
3871pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
3872 assert(name->type == PM_TOKEN_CONSTANT || name->type == 0);
3873
3874 return pm_constant_read_node_new(
3875 parser->arena,
3876 ++parser->node_id,
3877 0,
3878 PM_LOCATION_INIT_TOKEN(parser, name),
3879 pm_parser_constant_id_token(parser, name)
3880 );
3881}
3882
3886static pm_constant_write_node_t *
3887pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3888 return pm_constant_write_node_new(
3889 parser->arena,
3890 ++parser->node_id,
3891 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3892 PM_LOCATION_INIT_NODES(target, value),
3893 target->name,
3894 target->base.location,
3895 value,
3896 TOK2LOC(parser, operator)
3897 );
3898}
3899
3903static void
3904pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3905 switch (PM_NODE_TYPE(node)) {
3906 case PM_BEGIN_NODE: {
3907 const pm_begin_node_t *cast = (pm_begin_node_t *) node;
3908 if (cast->statements != NULL) pm_def_node_receiver_check(parser, UP(cast->statements));
3909 break;
3910 }
3911 case PM_PARENTHESES_NODE: {
3912 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
3913 if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
3914 break;
3915 }
3916 case PM_STATEMENTS_NODE: {
3917 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
3918 pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
3919 break;
3920 }
3921 case PM_ARRAY_NODE:
3922 case PM_FLOAT_NODE:
3923 case PM_IMAGINARY_NODE:
3924 case PM_INTEGER_NODE:
3925 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
3926 case PM_INTERPOLATED_STRING_NODE:
3927 case PM_INTERPOLATED_SYMBOL_NODE:
3928 case PM_INTERPOLATED_X_STRING_NODE:
3929 case PM_RATIONAL_NODE:
3930 case PM_REGULAR_EXPRESSION_NODE:
3931 case PM_SOURCE_ENCODING_NODE:
3932 case PM_SOURCE_FILE_NODE:
3933 case PM_SOURCE_LINE_NODE:
3934 case PM_STRING_NODE:
3935 case PM_SYMBOL_NODE:
3936 case PM_X_STRING_NODE:
3937 pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
3938 break;
3939 default:
3940 break;
3941 }
3942}
3943
3947static pm_def_node_t *
3948pm_def_node_create(
3949 pm_parser_t *parser,
3950 pm_constant_id_t name,
3951 const pm_token_t *name_loc,
3952 pm_node_t *receiver,
3953 pm_parameters_node_t *parameters,
3954 pm_node_t *body,
3955 pm_constant_id_list_t *locals,
3956 const pm_token_t *def_keyword,
3957 const pm_token_t *operator,
3958 const pm_token_t *lparen,
3959 const pm_token_t *rparen,
3960 const pm_token_t *equal,
3961 const pm_token_t *end_keyword
3962) {
3963 if (receiver != NULL) {
3964 pm_def_node_receiver_check(parser, receiver);
3965 }
3966
3967 return pm_def_node_new(
3968 parser->arena,
3969 ++parser->node_id,
3970 0,
3971 (end_keyword == NULL) ? PM_LOCATION_INIT_TOKEN_NODE(parser, def_keyword, body) : PM_LOCATION_INIT_TOKENS(parser, def_keyword, end_keyword),
3972 name,
3973 TOK2LOC(parser, name_loc),
3974 receiver,
3975 parameters,
3976 body,
3977 *locals,
3978 TOK2LOC(parser, def_keyword),
3979 NTOK2LOC(parser, operator),
3980 NTOK2LOC(parser, lparen),
3981 NTOK2LOC(parser, rparen),
3982 NTOK2LOC(parser, equal),
3983 NTOK2LOC(parser, end_keyword)
3984 );
3985}
3986
3990static pm_defined_node_t *
3991pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_token_t *keyword) {
3992 return pm_defined_node_new(
3993 parser->arena,
3994 ++parser->node_id,
3995 0,
3996 (rparen == NULL) ? PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, value) : PM_LOCATION_INIT_TOKENS(parser, keyword, rparen),
3997 NTOK2LOC(parser, lparen),
3998 value,
3999 NTOK2LOC(parser, rparen),
4000 TOK2LOC(parser, keyword)
4001 );
4002}
4003
4007static pm_else_node_t *
4008pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
4009 return pm_else_node_new(
4010 parser->arena,
4011 ++parser->node_id,
4012 0,
4013 ((end_keyword == NULL) && (statements != NULL)) ? PM_LOCATION_INIT_TOKEN_NODE(parser, else_keyword, statements) : PM_LOCATION_INIT_TOKENS(parser, else_keyword, end_keyword),
4014 TOK2LOC(parser, else_keyword),
4015 statements,
4016 NTOK2LOC(parser, end_keyword)
4017 );
4018}
4019
4023static pm_embedded_statements_node_t *
4024pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
4025 return pm_embedded_statements_node_new(
4026 parser->arena,
4027 ++parser->node_id,
4028 0,
4029 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
4030 TOK2LOC(parser, opening),
4031 statements,
4032 TOK2LOC(parser, closing)
4033 );
4034}
4035
4039static pm_embedded_variable_node_t *
4040pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
4041 return pm_embedded_variable_node_new(
4042 parser->arena,
4043 ++parser->node_id,
4044 0,
4045 PM_LOCATION_INIT_TOKEN_NODE(parser, operator, variable),
4046 TOK2LOC(parser, operator),
4047 variable
4048 );
4049}
4050
4054static pm_ensure_node_t *
4055pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
4056 return pm_ensure_node_new(
4057 parser->arena,
4058 ++parser->node_id,
4059 0,
4060 PM_LOCATION_INIT_TOKENS(parser, ensure_keyword, end_keyword),
4061 TOK2LOC(parser, ensure_keyword),
4062 statements,
4063 TOK2LOC(parser, end_keyword)
4064 );
4065}
4066
4070static pm_false_node_t *
4071pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
4072 assert(token->type == PM_TOKEN_KEYWORD_FALSE);
4073
4074 return pm_false_node_new(
4075 parser->arena,
4076 ++parser->node_id,
4077 PM_NODE_FLAG_STATIC_LITERAL,
4078 PM_LOCATION_INIT_TOKEN(parser, token)
4079 );
4080}
4081
4086static pm_find_pattern_node_t *
4087pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
4088 assert(nodes->size >= 2);
4089 pm_node_t *left = nodes->nodes[0];
4090 pm_node_t *right = nodes->nodes[nodes->size - 1];
4091
4092 assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
4093 assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
4094
4095 pm_find_pattern_node_t *node = pm_find_pattern_node_new(
4096 parser->arena,
4097 ++parser->node_id,
4098 0,
4099 PM_LOCATION_INIT_NODES(left, right),
4100 NULL,
4101 (pm_splat_node_t *) left,
4102 ((pm_node_list_t) { 0 }),
4103 (pm_splat_node_t *) right,
4104 ((pm_location_t) { 0 }),
4105 ((pm_location_t) { 0 })
4106 );
4107
4108 // For now we're going to just copy over each pointer manually. This could be
4109 // much more efficient, as we could instead resize the node list to only point
4110 // to 1...-1.
4111 for (size_t index = 1; index < nodes->size - 1; index++) {
4112 pm_node_list_append(parser->arena, &node->requireds, nodes->nodes[index]);
4113 }
4114
4115 return node;
4116}
4117
4122static double
4123pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
4124 ptrdiff_t diff = token->end - token->start;
4125 if (diff <= 0) return 0.0;
4126
4127 // First, get a buffer of the content.
4128 size_t length = (size_t) diff;
4129 const size_t buffer_size = sizeof(char) * (length + 1);
4130 char *buffer = xmalloc(buffer_size);
4131 memcpy((void *) buffer, token->start, length);
4132
4133 // Next, determine if we need to replace the decimal point because of
4134 // locale-specific options, and then normalize them if we have to.
4135 char decimal_point = *localeconv()->decimal_point;
4136 if (decimal_point != '.') {
4137 for (size_t index = 0; index < length; index++) {
4138 if (buffer[index] == '.') buffer[index] = decimal_point;
4139 }
4140 }
4141
4142 // Next, handle underscores by removing them from the buffer.
4143 for (size_t index = 0; index < length; index++) {
4144 if (buffer[index] == '_') {
4145 memmove((void *) (buffer + index), (void *) (buffer + index + 1), length - index);
4146 length--;
4147 }
4148 }
4149
4150 // Null-terminate the buffer so that strtod cannot read off the end.
4151 buffer[length] = '\0';
4152
4153 // Now, call strtod to parse the value. Note that CRuby has their own
4154 // version of strtod which avoids locales. We're okay using the locale-aware
4155 // version because we've already validated through the parser that the token
4156 // is in a valid format.
4157 errno = 0;
4158 char *eptr;
4159 double value = strtod(buffer, &eptr);
4160
4161 // This should never happen, because we've already checked that the token
4162 // is in a valid format. However it's good to be safe.
4163 if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
4164 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, PM_ERR_FLOAT_PARSE);
4165 xfree_sized(buffer, buffer_size);
4166 return 0.0;
4167 }
4168
4169 // If errno is set, then it should only be ERANGE. At this point we need to
4170 // check if it's infinity (it should be).
4171 if (errno == ERANGE && PRISM_ISINF(value)) {
4172 int warn_width;
4173 const char *ellipsis;
4174
4175 if (length > 20) {
4176 warn_width = 20;
4177 ellipsis = "...";
4178 } else {
4179 warn_width = (int) length;
4180 ellipsis = "";
4181 }
4182
4183 pm_diagnostic_list_append_format(&parser->metadata_arena, &parser->warning_list, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
4184 value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
4185 }
4186
4187 // Finally we can free the buffer and return the value.
4188 xfree_sized(buffer, buffer_size);
4189 return value;
4190}
4191
4195static pm_float_node_t *
4196pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
4197 assert(token->type == PM_TOKEN_FLOAT);
4198
4199 return pm_float_node_new(
4200 parser->arena,
4201 ++parser->node_id,
4202 PM_NODE_FLAG_STATIC_LITERAL,
4203 PM_LOCATION_INIT_TOKEN(parser, token),
4204 pm_double_parse(parser, token)
4205 );
4206}
4207
4211static pm_imaginary_node_t *
4212pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4213 assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
4214
4215 return pm_imaginary_node_new(
4216 parser->arena,
4217 ++parser->node_id,
4218 PM_NODE_FLAG_STATIC_LITERAL,
4219 PM_LOCATION_INIT_TOKEN(parser, token),
4220 UP(pm_float_node_create(parser, &((pm_token_t) {
4221 .type = PM_TOKEN_FLOAT,
4222 .start = token->start,
4223 .end = token->end - 1
4224 })))
4225 );
4226}
4227
4231static pm_rational_node_t *
4232pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
4233 assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
4234
4235 pm_rational_node_t *node = pm_rational_node_new(
4236 parser->arena,
4237 ++parser->node_id,
4238 PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
4239 PM_LOCATION_INIT_TOKEN(parser, token),
4240 ((pm_integer_t) { 0 }),
4241 ((pm_integer_t) { 0 })
4242 );
4243
4244 const uint8_t *start = token->start;
4245 const uint8_t *end = token->end - 1; // r
4246
4247 while (start < end && *start == '0') start++; // 0.1 -> .1
4248 while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
4249
4250 size_t length = (size_t) (end - start);
4251 if (length == 1) {
4252 node->denominator.value = 1;
4253 return node;
4254 }
4255
4256 const uint8_t *point = memchr(start, '.', length);
4257 assert(point && "should have a decimal point");
4258
4259 uint8_t *digits = xmalloc(length);
4260 if (digits == NULL) {
4261 fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
4262 abort();
4263 }
4264
4265 memcpy(digits, start, (unsigned long) (point - start));
4266 memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
4267 pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
4268
4269 size_t fract_length = 0;
4270 for (const uint8_t *fract = point; fract < end; ++fract) {
4271 if (*fract != '_') ++fract_length;
4272 }
4273 digits[0] = '1';
4274 if (fract_length > 1) memset(digits + 1, '0', fract_length - 1);
4275 pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + fract_length);
4276 xfree_sized(digits, length);
4277
4278 pm_integers_reduce(&node->numerator, &node->denominator);
4279 pm_integer_arena_move(parser->arena, &node->numerator);
4280 pm_integer_arena_move(parser->arena, &node->denominator);
4281 return node;
4282}
4283
4288static pm_imaginary_node_t *
4289pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4290 assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
4291
4292 return pm_imaginary_node_new(
4293 parser->arena,
4294 ++parser->node_id,
4295 PM_NODE_FLAG_STATIC_LITERAL,
4296 PM_LOCATION_INIT_TOKEN(parser, token),
4297 UP(pm_float_node_rational_create(parser, &((pm_token_t) {
4298 .type = PM_TOKEN_FLOAT_RATIONAL,
4299 .start = token->start,
4300 .end = token->end - 1
4301 })))
4302 );
4303}
4304
4308static pm_for_node_t *
4309pm_for_node_create(
4310 pm_parser_t *parser,
4311 pm_node_t *index,
4312 pm_node_t *collection,
4313 pm_statements_node_t *statements,
4314 const pm_token_t *for_keyword,
4315 const pm_token_t *in_keyword,
4316 const pm_token_t *do_keyword,
4317 const pm_token_t *end_keyword
4318) {
4319 return pm_for_node_new(
4320 parser->arena,
4321 ++parser->node_id,
4322 0,
4323 PM_LOCATION_INIT_TOKENS(parser, for_keyword, end_keyword),
4324 index,
4325 collection,
4326 statements,
4327 TOK2LOC(parser, for_keyword),
4328 TOK2LOC(parser, in_keyword),
4329 NTOK2LOC(parser, do_keyword),
4330 TOK2LOC(parser, end_keyword)
4331 );
4332}
4333
4337static pm_forwarding_arguments_node_t *
4338pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
4339 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4340
4341 return pm_forwarding_arguments_node_new(
4342 parser->arena,
4343 ++parser->node_id,
4344 0,
4345 PM_LOCATION_INIT_TOKEN(parser, token)
4346 );
4347}
4348
4352static pm_forwarding_parameter_node_t *
4353pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
4354 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4355
4356 return pm_forwarding_parameter_node_new(
4357 parser->arena,
4358 ++parser->node_id,
4359 0,
4360 PM_LOCATION_INIT_TOKEN(parser, token)
4361 );
4362}
4363
4367static pm_forwarding_super_node_t *
4368pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
4369 assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
4370 assert(token->type == PM_TOKEN_KEYWORD_SUPER);
4371
4372 pm_block_node_t *block = NULL;
4373 if (arguments->block != NULL) {
4374 block = (pm_block_node_t *) arguments->block;
4375 }
4376
4377 return pm_forwarding_super_node_new(
4378 parser->arena,
4379 ++parser->node_id,
4380 0,
4381 (block == NULL) ? PM_LOCATION_INIT_TOKEN(parser, token) : PM_LOCATION_INIT_TOKEN_NODE(parser, token, block),
4382 PM_LOCATION_INIT_TOKEN(parser, token),
4383 block
4384 );
4385}
4386
4391static pm_hash_pattern_node_t *
4392pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4393 return pm_hash_pattern_node_new(
4394 parser->arena,
4395 ++parser->node_id,
4396 0,
4397 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
4398 NULL,
4399 ((pm_node_list_t) { 0 }),
4400 NULL,
4401 TOK2LOC(parser, opening),
4402 TOK2LOC(parser, closing)
4403 );
4404}
4405
4409static pm_hash_pattern_node_t *
4410pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
4411 uint32_t start;
4412 uint32_t end;
4413
4414 if (elements->size > 0) {
4415 if (rest) {
4416 start = MIN(PM_NODE_START(rest), PM_NODE_START(elements->nodes[0]));
4417 end = MAX(PM_NODE_END(rest), PM_NODE_END(elements->nodes[elements->size - 1]));
4418 } else {
4419 start = PM_NODE_START(elements->nodes[0]);
4420 end = PM_NODE_END(elements->nodes[elements->size - 1]);
4421 }
4422 } else {
4423 assert(rest != NULL);
4424 start = PM_NODE_START(rest);
4425 end = PM_NODE_END(rest);
4426 }
4427
4428 pm_hash_pattern_node_t *node = pm_hash_pattern_node_new(
4429 parser->arena,
4430 ++parser->node_id,
4431 0,
4432 ((pm_location_t) { .start = start, .length = U32(end - start) }),
4433 NULL,
4434 ((pm_node_list_t) { 0 }),
4435 rest,
4436 ((pm_location_t) { 0 }),
4437 ((pm_location_t) { 0 })
4438 );
4439
4440 pm_node_list_concat(parser->arena, &node->elements, elements);
4441 return node;
4442}
4443
4447static pm_constant_id_t
4448pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
4449 switch (PM_NODE_TYPE(target)) {
4450 case PM_GLOBAL_VARIABLE_READ_NODE:
4451 return ((pm_global_variable_read_node_t *) target)->name;
4452 case PM_BACK_REFERENCE_READ_NODE:
4453 return ((pm_back_reference_read_node_t *) target)->name;
4454 case PM_NUMBERED_REFERENCE_READ_NODE:
4455 // This will only ever happen in the event of a syntax error, but we
4456 // still need to provide something for the node.
4457 return pm_parser_constant_id_raw(parser, parser->start + PM_NODE_START(target), parser->start + PM_NODE_END(target));
4458 default:
4459 assert(false && "unreachable");
4460 return (pm_constant_id_t) -1;
4461 }
4462}
4463
4467static pm_global_variable_and_write_node_t *
4468pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4469 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4470
4471 return pm_global_variable_and_write_node_new(
4472 parser->arena,
4473 ++parser->node_id,
4474 0,
4475 PM_LOCATION_INIT_NODES(target, value),
4476 pm_global_variable_write_name(parser, target),
4477 target->location,
4478 TOK2LOC(parser, operator),
4479 value
4480 );
4481}
4482
4486static pm_global_variable_operator_write_node_t *
4487pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4488 return pm_global_variable_operator_write_node_new(
4489 parser->arena,
4490 ++parser->node_id,
4491 0,
4492 PM_LOCATION_INIT_NODES(target, value),
4493 pm_global_variable_write_name(parser, target),
4494 target->location,
4495 TOK2LOC(parser, operator),
4496 value,
4497 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
4498 );
4499}
4500
4504static pm_global_variable_or_write_node_t *
4505pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4506 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4507
4508 return pm_global_variable_or_write_node_new(
4509 parser->arena,
4510 ++parser->node_id,
4511 0,
4512 PM_LOCATION_INIT_NODES(target, value),
4513 pm_global_variable_write_name(parser, target),
4514 target->location,
4515 TOK2LOC(parser, operator),
4516 value
4517 );
4518}
4519
4523static pm_global_variable_read_node_t *
4524pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4525 return pm_global_variable_read_node_new(
4526 parser->arena,
4527 ++parser->node_id,
4528 0,
4529 PM_LOCATION_INIT_TOKEN(parser, name),
4530 pm_parser_constant_id_token(parser, name)
4531 );
4532}
4533
4537static pm_global_variable_read_node_t *
4538pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
4539 return pm_global_variable_read_node_new(
4540 parser->arena,
4541 ++parser->node_id,
4542 0,
4543 PM_LOCATION_INIT_UNSET,
4544 name
4545 );
4546}
4547
4551static pm_global_variable_write_node_t *
4552pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4553 return pm_global_variable_write_node_new(
4554 parser->arena,
4555 ++parser->node_id,
4556 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
4557 PM_LOCATION_INIT_NODES(target, value),
4558 pm_global_variable_write_name(parser, target),
4559 target->location,
4560 value,
4561 TOK2LOC(parser, operator)
4562 );
4563}
4564
4568static pm_global_variable_write_node_t *
4569pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
4570 return pm_global_variable_write_node_new(
4571 parser->arena,
4572 ++parser->node_id,
4573 0,
4574 PM_LOCATION_INIT_UNSET,
4575 name,
4576 ((pm_location_t) { 0 }),
4577 value,
4578 ((pm_location_t) { 0 })
4579 );
4580}
4581
4585static pm_hash_node_t *
4586pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4587 assert(opening != NULL);
4588
4589 return pm_hash_node_new(
4590 parser->arena,
4591 ++parser->node_id,
4592 PM_NODE_FLAG_STATIC_LITERAL,
4593 PM_LOCATION_INIT_TOKEN(parser, opening),
4594 TOK2LOC(parser, opening),
4595 ((pm_node_list_t) { 0 }),
4596 ((pm_location_t) { 0 })
4597 );
4598}
4599
4603static PRISM_INLINE void
4604pm_hash_node_elements_append(pm_arena_t *arena, pm_hash_node_t *hash, pm_node_t *element) {
4605 pm_node_list_append(arena, &hash->elements, element);
4606
4607 bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
4608 if (static_literal) {
4609 pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
4610 static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
4611 static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
4612 static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
4613 }
4614
4615 if (!static_literal) {
4616 pm_node_flag_unset(UP(hash), PM_NODE_FLAG_STATIC_LITERAL);
4617 }
4618}
4619
4620static PRISM_INLINE void
4621pm_hash_node_closing_loc_set(const pm_parser_t *parser, pm_hash_node_t *hash, pm_token_t *token) {
4622 PM_NODE_LENGTH_SET_TOKEN(parser, hash, token);
4623 hash->closing_loc = TOK2LOC(parser, token);
4624}
4625
4629static pm_if_node_t *
4630pm_if_node_create(pm_parser_t *parser,
4631 const pm_token_t *if_keyword,
4632 pm_node_t *predicate,
4633 const pm_token_t *then_keyword,
4634 pm_statements_node_t *statements,
4635 pm_node_t *subsequent,
4636 const pm_token_t *end_keyword
4637) {
4638 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4639
4640 uint32_t start = PM_TOKEN_START(parser, if_keyword);
4641 uint32_t end;
4642
4643 if (end_keyword != NULL) {
4644 end = PM_TOKEN_END(parser, end_keyword);
4645 } else if (subsequent != NULL) {
4646 end = PM_NODE_END(subsequent);
4647 } else if (pm_statements_node_body_length(statements) != 0) {
4648 end = PM_NODE_END(statements);
4649 } else {
4650 end = PM_NODE_END(predicate);
4651 }
4652
4653 return pm_if_node_new(
4654 parser->arena,
4655 ++parser->node_id,
4656 PM_NODE_FLAG_NEWLINE,
4657 ((pm_location_t) { .start = start, .length = U32(end - start) }),
4658 TOK2LOC(parser, if_keyword),
4659 predicate,
4660 NTOK2LOC(parser, then_keyword),
4661 statements,
4662 subsequent,
4663 NTOK2LOC(parser, end_keyword)
4664 );
4665}
4666
4670static pm_if_node_t *
4671pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
4672 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4673
4674 pm_statements_node_t *statements = pm_statements_node_create(parser);
4675 pm_statements_node_body_append(parser, statements, statement, true);
4676
4677 return pm_if_node_new(
4678 parser->arena,
4679 ++parser->node_id,
4680 PM_NODE_FLAG_NEWLINE,
4681 PM_LOCATION_INIT_NODES(statement, predicate),
4682 TOK2LOC(parser, if_keyword),
4683 predicate,
4684 ((pm_location_t) { 0 }),
4685 statements,
4686 NULL,
4687 ((pm_location_t) { 0 })
4688 );
4689}
4690
4694static pm_if_node_t *
4695pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
4696 pm_assert_value_expression(parser, predicate);
4697 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4698
4699 pm_statements_node_t *if_statements = pm_statements_node_create(parser);
4700 pm_statements_node_body_append(parser, if_statements, true_expression, true);
4701
4702 pm_statements_node_t *else_statements = pm_statements_node_create(parser);
4703 pm_statements_node_body_append(parser, else_statements, false_expression, true);
4704
4705 pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, NULL);
4706 return pm_if_node_new(
4707 parser->arena,
4708 ++parser->node_id,
4709 PM_NODE_FLAG_NEWLINE,
4710 PM_LOCATION_INIT_NODES(predicate, false_expression),
4711 ((pm_location_t) { 0 }),
4712 predicate,
4713 TOK2LOC(parser, qmark),
4714 if_statements,
4715 UP(else_node),
4716 ((pm_location_t) { 0 })
4717 );
4718}
4719
4720static PRISM_INLINE void
4721pm_if_node_end_keyword_loc_set(const pm_parser_t *parser, pm_if_node_t *node, const pm_token_t *keyword) {
4722 PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword);
4723 node->end_keyword_loc = TOK2LOC(parser, keyword);
4724}
4725
4726static PRISM_INLINE void
4727pm_else_node_end_keyword_loc_set(const pm_parser_t *parser, pm_else_node_t *node, const pm_token_t *keyword) {
4728 PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword);
4729 node->end_keyword_loc = TOK2LOC(parser, keyword);
4730}
4731
4735static pm_implicit_node_t *
4736pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
4737 return pm_implicit_node_new(
4738 parser->arena,
4739 ++parser->node_id,
4740 0,
4741 PM_LOCATION_INIT_NODE(value),
4742 value
4743 );
4744}
4745
4749static pm_implicit_rest_node_t *
4750pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
4751 assert(token->type == PM_TOKEN_COMMA);
4752
4753 return pm_implicit_rest_node_new(
4754 parser->arena,
4755 ++parser->node_id,
4756 0,
4757 PM_LOCATION_INIT_TOKEN(parser, token)
4758 );
4759}
4760
4764static pm_integer_node_t *
4765pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4766 assert(token->type == PM_TOKEN_INTEGER);
4767
4768 pm_integer_node_t *node = pm_integer_node_new(
4769 parser->arena,
4770 ++parser->node_id,
4771 base | PM_NODE_FLAG_STATIC_LITERAL,
4772 PM_LOCATION_INIT_TOKEN(parser, token),
4773 ((pm_integer_t) { 0 })
4774 );
4775
4776 if (parser->integer.lexed) {
4777 // The value was already computed during lexing.
4778 node->value.value = parser->integer.value;
4779 parser->integer.lexed = false;
4780 } else {
4781 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4782 switch (base) {
4783 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4784 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4785 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4786 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4787 default: assert(false && "unreachable"); break;
4788 }
4789
4790 pm_integer_parse(&node->value, integer_base, token->start, token->end);
4791 pm_integer_arena_move(parser->arena, &node->value);
4792 }
4793
4794 return node;
4795}
4796
4801static pm_imaginary_node_t *
4802pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4803 assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
4804
4805 return pm_imaginary_node_new(
4806 parser->arena,
4807 ++parser->node_id,
4808 PM_NODE_FLAG_STATIC_LITERAL,
4809 PM_LOCATION_INIT_TOKEN(parser, token),
4810 UP(pm_integer_node_create(parser, base, &((pm_token_t) {
4811 .type = PM_TOKEN_INTEGER,
4812 .start = token->start,
4813 .end = token->end - 1
4814 })))
4815 );
4816}
4817
4822static pm_rational_node_t *
4823pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4824 assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
4825
4826 pm_rational_node_t *node = pm_rational_node_new(
4827 parser->arena,
4828 ++parser->node_id,
4829 base | PM_NODE_FLAG_STATIC_LITERAL,
4830 PM_LOCATION_INIT_TOKEN(parser, token),
4831 ((pm_integer_t) { 0 }),
4832 ((pm_integer_t) { .value = 1 })
4833 );
4834
4835 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4836 switch (base) {
4837 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4838 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4839 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4840 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4841 default: assert(false && "unreachable"); break;
4842 }
4843
4844 pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
4845 pm_integer_arena_move(parser->arena, &node->numerator);
4846
4847 return node;
4848}
4849
4854static pm_imaginary_node_t *
4855pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4856 assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
4857
4858 return pm_imaginary_node_new(
4859 parser->arena,
4860 ++parser->node_id,
4861 PM_NODE_FLAG_STATIC_LITERAL,
4862 PM_LOCATION_INIT_TOKEN(parser, token),
4863 UP(pm_integer_node_rational_create(parser, base, &((pm_token_t) {
4864 .type = PM_TOKEN_INTEGER_RATIONAL,
4865 .start = token->start,
4866 .end = token->end - 1
4867 })))
4868 );
4869}
4870
4874static pm_in_node_t *
4875pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
4876 uint32_t start = PM_TOKEN_START(parser, in_keyword);
4877 uint32_t end;
4878
4879 if (statements != NULL) {
4880 end = PM_NODE_END(statements);
4881 } else if (then_keyword != NULL) {
4882 end = PM_TOKEN_END(parser, then_keyword);
4883 } else {
4884 end = PM_NODE_END(pattern);
4885 }
4886
4887 return pm_in_node_new(
4888 parser->arena,
4889 ++parser->node_id,
4890 0,
4891 ((pm_location_t) { .start = start, .length = U32(end - start) }),
4892 pattern,
4893 statements,
4894 TOK2LOC(parser, in_keyword),
4895 NTOK2LOC(parser, then_keyword)
4896 );
4897}
4898
4902static pm_instance_variable_and_write_node_t *
4903pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4904 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4905
4906 return pm_instance_variable_and_write_node_new(
4907 parser->arena,
4908 ++parser->node_id,
4909 0,
4910 PM_LOCATION_INIT_NODES(target, value),
4911 target->name,
4912 target->base.location,
4913 TOK2LOC(parser, operator),
4914 value
4915 );
4916}
4917
4921static pm_instance_variable_operator_write_node_t *
4922pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4923 return pm_instance_variable_operator_write_node_new(
4924 parser->arena,
4925 ++parser->node_id,
4926 0,
4927 PM_LOCATION_INIT_NODES(target, value),
4928 target->name,
4929 target->base.location,
4930 TOK2LOC(parser, operator),
4931 value,
4932 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
4933 );
4934}
4935
4939static pm_instance_variable_or_write_node_t *
4940pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4941 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4942
4943 return pm_instance_variable_or_write_node_new(
4944 parser->arena,
4945 ++parser->node_id,
4946 0,
4947 PM_LOCATION_INIT_NODES(target, value),
4948 target->name,
4949 target->base.location,
4950 TOK2LOC(parser, operator),
4951 value
4952 );
4953}
4954
4958static pm_instance_variable_read_node_t *
4959pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
4960 assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
4961
4962 return pm_instance_variable_read_node_new(
4963 parser->arena,
4964 ++parser->node_id,
4965 0,
4966 PM_LOCATION_INIT_TOKEN(parser, token),
4967 pm_parser_constant_id_token(parser, token)
4968 );
4969}
4970
4975static pm_instance_variable_write_node_t *
4976pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
4977 return pm_instance_variable_write_node_new(
4978 parser->arena,
4979 ++parser->node_id,
4980 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
4981 PM_LOCATION_INIT_NODES(read_node, value),
4982 read_node->name,
4983 read_node->base.location,
4984 value,
4985 TOK2LOC(parser, operator)
4986 );
4987}
4988
4994static void
4995pm_interpolated_node_append(pm_arena_t *arena, pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
4996 switch (PM_NODE_TYPE(part)) {
4997 case PM_STRING_NODE:
4998 pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4999 break;
5000 case PM_EMBEDDED_STATEMENTS_NODE: {
5001 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5002 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5003
5004 if (embedded == NULL) {
5005 // If there are no statements or more than one statement, then
5006 // we lose the static literal flag.
5007 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5008 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5009 // If the embedded statement is a string, then we can keep the
5010 // static literal flag and mark the string as frozen.
5011 pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5012 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5013 // If the embedded statement is an interpolated string and it's
5014 // a static literal, then we can keep the static literal flag.
5015 } else {
5016 // Otherwise we lose the static literal flag.
5017 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5018 }
5019
5020 break;
5021 }
5022 case PM_EMBEDDED_VARIABLE_NODE:
5023 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
5024 break;
5025 default:
5026 assert(false && "unexpected node type");
5027 break;
5028 }
5029
5030 pm_node_list_append(arena, parts, part);
5031}
5032
5036static pm_interpolated_regular_expression_node_t *
5037pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
5038 return pm_interpolated_regular_expression_node_new(
5039 parser->arena,
5040 ++parser->node_id,
5041 PM_NODE_FLAG_STATIC_LITERAL,
5042 PM_LOCATION_INIT_TOKEN(parser, opening),
5043 TOK2LOC(parser, opening),
5044 ((pm_node_list_t) { 0 }),
5045 TOK2LOC(parser, opening)
5046 );
5047}
5048
5049static PRISM_INLINE void
5050pm_interpolated_regular_expression_node_append(pm_arena_t *arena, pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
5051 if (PM_NODE_START(node) > PM_NODE_START(part)) {
5052 PM_NODE_START_SET_NODE(node, part);
5053 }
5054 if (PM_NODE_END(node) < PM_NODE_END(part)) {
5055 PM_NODE_LENGTH_SET_NODE(node, part);
5056 }
5057
5058 pm_interpolated_node_append(arena, UP(node), &node->parts, part);
5059}
5060
5061static PRISM_INLINE void
5062pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
5063 node->closing_loc = TOK2LOC(parser, closing);
5064 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
5065 pm_node_flag_set(UP(node), pm_regular_expression_flags_create(parser, closing));
5066}
5067
5091static PRISM_INLINE void
5092pm_interpolated_string_node_append(pm_parser_t *parser, pm_interpolated_string_node_t *node, pm_node_t *part) {
5093 pm_arena_t *arena = parser->arena;
5094#define CLEAR_FLAGS(node) \
5095 node->base.flags = (pm_node_flags_t) (FL(node) & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
5096
5097#define MUTABLE_FLAGS(node) \
5098 node->base.flags = (pm_node_flags_t) ((FL(node) | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
5099
5100 if (node->parts.size == 0 && node->opening_loc.length == 0) {
5101 PM_NODE_START_SET_NODE(node, part);
5102 }
5103
5104 if (PM_NODE_END(part) > PM_NODE_END(node)) {
5105 PM_NODE_LENGTH_SET_NODE(node, part);
5106 }
5107
5108 switch (PM_NODE_TYPE(part)) {
5109 case PM_STRING_NODE:
5110 // If inner string is not frozen, it stops being a static literal. We should *not* clear other flags,
5111 // because concatenating two frozen strings (`'foo' 'bar'`) is still frozen. This holds true for
5112 // as long as this interpolation only consists of other string literals.
5113 if (!PM_NODE_FLAG_P(part, PM_STRING_FLAGS_FROZEN)) {
5114 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
5115 }
5116 part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5117 break;
5118 case PM_INTERPOLATED_STRING_NODE:
5119 if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
5120 // If the string that we're concatenating is a static literal,
5121 // then we can keep the static literal flag for this string.
5122 } else {
5123 // Otherwise, we lose the static literal flag here and we should
5124 // also clear the mutability flags.
5125 CLEAR_FLAGS(node);
5126 }
5127 break;
5128 case PM_EMBEDDED_STATEMENTS_NODE: {
5129 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5130 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5131
5132 if (embedded == NULL) {
5133 // If we're embedding multiple statements or no statements, then
5134 // the string is not longer a static literal.
5135 CLEAR_FLAGS(node);
5136 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5137 // If the embedded statement is a string, then we can make that
5138 // string as frozen and static literal, and not touch the static
5139 // literal status of this string.
5140 embedded->flags = (pm_node_flags_t) ((embedded->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5141
5142 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5143 MUTABLE_FLAGS(node);
5144 }
5145 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5146 // If the embedded statement is an interpolated string, but that
5147 // string is marked as static literal, then we can keep our
5148 // static literal status for this string.
5149 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5150 MUTABLE_FLAGS(node);
5151 }
5152 } else {
5153 // In all other cases, we lose the static literal flag here and
5154 // become mutable.
5155 CLEAR_FLAGS(node);
5156 }
5157
5158 break;
5159 }
5160 case PM_EMBEDDED_VARIABLE_NODE:
5161 // Embedded variables clear static literal, which means we also
5162 // should clear the mutability flags.
5163 CLEAR_FLAGS(node);
5164 break;
5165 case PM_X_STRING_NODE:
5166 case PM_INTERPOLATED_X_STRING_NODE:
5167 case PM_SYMBOL_NODE:
5168 case PM_INTERPOLATED_SYMBOL_NODE:
5169 // These will only happen in error cases. But we want to handle it
5170 // here so that we don't fail the assertion.
5171 CLEAR_FLAGS(node);
5172 pm_node_list_append(arena, &node->parts, UP(pm_error_recovery_node_create_unexpected(parser, part)));
5173 return;
5174 case PM_ERROR_RECOVERY_NODE:
5175 CLEAR_FLAGS(node);
5176 break;
5177 default:
5178 assert(false && "unexpected node type");
5179 break;
5180 }
5181
5182 pm_node_list_append(arena, &node->parts, part);
5183
5184#undef CLEAR_FLAGS
5185#undef MUTABLE_FLAGS
5186}
5187
5191static pm_interpolated_string_node_t *
5192pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5193 pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
5194
5195 switch (parser->frozen_string_literal) {
5196 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
5197 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
5198 break;
5199 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
5200 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
5201 break;
5202 }
5203
5204 uint32_t start = opening == NULL ? 0 : PM_TOKEN_START(parser, opening);
5205 uint32_t end = closing == NULL ? 0 : PM_TOKEN_END(parser, closing);
5206
5207 pm_interpolated_string_node_t *node = pm_interpolated_string_node_new(
5208 parser->arena,
5209 ++parser->node_id,
5210 flags,
5211 ((pm_location_t) { .start = start, .length = U32(end - start) }),
5212 NTOK2LOC(parser, opening),
5213 ((pm_node_list_t) { 0 }),
5214 NTOK2LOC(parser, closing)
5215 );
5216
5217 if (parts != NULL) {
5218 pm_node_t *part;
5219 PM_NODE_LIST_FOREACH(parts, index, part) {
5220 pm_interpolated_string_node_append(parser, node, part);
5221 }
5222 }
5223
5224 return node;
5225}
5226
5230static void
5231pm_interpolated_string_node_closing_set(const pm_parser_t *parser, pm_interpolated_string_node_t *node, const pm_token_t *closing) {
5232 node->closing_loc = TOK2LOC(parser, closing);
5233 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
5234}
5235
5236static void
5237pm_interpolated_symbol_node_append(pm_arena_t *arena, pm_interpolated_symbol_node_t *node, pm_node_t *part) {
5238 if (node->parts.size == 0 && node->opening_loc.length == 0) {
5239 PM_NODE_START_SET_NODE(node, part);
5240 }
5241
5242 pm_interpolated_node_append(arena, UP(node), &node->parts, part);
5243
5244 if (PM_NODE_END(part) > PM_NODE_END(node)) {
5245 PM_NODE_LENGTH_SET_NODE(node, part);
5246 }
5247}
5248
5249static void
5250pm_interpolated_symbol_node_closing_loc_set(const pm_parser_t *parser, pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
5251 node->closing_loc = TOK2LOC(parser, closing);
5252 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
5253}
5254
5258static pm_interpolated_symbol_node_t *
5259pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5260 uint32_t start = opening == NULL ? 0 : PM_TOKEN_START(parser, opening);
5261 uint32_t end = closing == NULL ? 0 : PM_TOKEN_END(parser, closing);
5262
5263 pm_interpolated_symbol_node_t *node = pm_interpolated_symbol_node_new(
5264 parser->arena,
5265 ++parser->node_id,
5266 PM_NODE_FLAG_STATIC_LITERAL,
5267 ((pm_location_t) { .start = start, .length = U32(end - start) }),
5268 NTOK2LOC(parser, opening),
5269 ((pm_node_list_t) { 0 }),
5270 NTOK2LOC(parser, closing)
5271 );
5272
5273 if (parts != NULL) {
5274 pm_node_t *part;
5275 PM_NODE_LIST_FOREACH(parts, index, part) {
5276 pm_interpolated_symbol_node_append(parser->arena, node, part);
5277 }
5278 }
5279
5280 return node;
5281}
5282
5286static pm_interpolated_x_string_node_t *
5287pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5288 return pm_interpolated_x_string_node_new(
5289 parser->arena,
5290 ++parser->node_id,
5291 0,
5292 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
5293 TOK2LOC(parser, opening),
5294 ((pm_node_list_t) { 0 }),
5295 TOK2LOC(parser, closing)
5296 );
5297}
5298
5299static PRISM_INLINE void
5300pm_interpolated_xstring_node_append(pm_arena_t *arena, pm_interpolated_x_string_node_t *node, pm_node_t *part) {
5301 pm_interpolated_node_append(arena, UP(node), &node->parts, part);
5302 PM_NODE_LENGTH_SET_NODE(node, part);
5303}
5304
5305static PRISM_INLINE void
5306pm_interpolated_xstring_node_closing_set(const pm_parser_t *parser, pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
5307 node->closing_loc = TOK2LOC(parser, closing);
5308 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
5309}
5310
5314static pm_it_local_variable_read_node_t *
5315pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5316 return pm_it_local_variable_read_node_new(
5317 parser->arena,
5318 ++parser->node_id,
5319 0,
5320 PM_LOCATION_INIT_TOKEN(parser, name)
5321 );
5322}
5323
5327static pm_it_parameters_node_t *
5328pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5329 return pm_it_parameters_node_new(
5330 parser->arena,
5331 ++parser->node_id,
5332 0,
5333 PM_LOCATION_INIT_TOKENS(parser, opening, closing)
5334 );
5335}
5336
5340static pm_keyword_hash_node_t *
5341pm_keyword_hash_node_create(pm_parser_t *parser) {
5342 return pm_keyword_hash_node_new(
5343 parser->arena,
5344 ++parser->node_id,
5345 PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS,
5346 PM_LOCATION_INIT_UNSET,
5347 ((pm_node_list_t) { 0 })
5348 );
5349}
5350
5354static void
5355pm_keyword_hash_node_elements_append(pm_arena_t *arena, pm_keyword_hash_node_t *hash, pm_node_t *element) {
5356 // If the element being added is not an AssocNode or does not have a symbol
5357 // key, then we want to turn the SYMBOL_KEYS flag off.
5358 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
5359 pm_node_flag_unset(UP(hash), PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
5360 }
5361
5362 pm_node_list_append(arena, &hash->elements, element);
5363 if (PM_NODE_LENGTH(hash) == 0) {
5364 PM_NODE_START_SET_NODE(hash, element);
5365 }
5366 PM_NODE_LENGTH_SET_NODE(hash, element);
5367}
5368
5372static pm_required_keyword_parameter_node_t *
5373pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
5374 return pm_required_keyword_parameter_node_new(
5375 parser->arena,
5376 ++parser->node_id,
5377 0,
5378 PM_LOCATION_INIT_TOKEN(parser, name),
5379 pm_parser_constant_id_raw(parser, name->start, name->end - 1),
5380 TOK2LOC(parser, name)
5381 );
5382}
5383
5387static pm_optional_keyword_parameter_node_t *
5388pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
5389 return pm_optional_keyword_parameter_node_new(
5390 parser->arena,
5391 ++parser->node_id,
5392 0,
5393 PM_LOCATION_INIT_TOKEN_NODE(parser, name, value),
5394 pm_parser_constant_id_raw(parser, name->start, name->end - 1),
5395 TOK2LOC(parser, name),
5396 value
5397 );
5398}
5399
5403static pm_keyword_rest_parameter_node_t *
5404pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
5405 return pm_keyword_rest_parameter_node_new(
5406 parser->arena,
5407 ++parser->node_id,
5408 0,
5409 (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
5410 name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
5411 NTOK2LOC(parser, name),
5412 TOK2LOC(parser, operator)
5413 );
5414}
5415
5419static pm_lambda_node_t *
5420pm_lambda_node_create(
5421 pm_parser_t *parser,
5422 pm_constant_id_list_t *locals,
5423 const pm_token_t *operator,
5424 const pm_token_t *opening,
5425 const pm_token_t *closing,
5426 pm_node_t *parameters,
5427 pm_node_t *body
5428) {
5429 return pm_lambda_node_new(
5430 parser->arena,
5431 ++parser->node_id,
5432 0,
5433 PM_LOCATION_INIT_TOKENS(parser, operator, closing),
5434 *locals,
5435 TOK2LOC(parser, operator),
5436 TOK2LOC(parser, opening),
5437 TOK2LOC(parser, closing),
5438 parameters,
5439 body
5440 );
5441}
5442
5446static pm_local_variable_and_write_node_t *
5447pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5448 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5449 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5450
5451 return pm_local_variable_and_write_node_new(
5452 parser->arena,
5453 ++parser->node_id,
5454 0,
5455 PM_LOCATION_INIT_NODES(target, value),
5456 target->location,
5457 TOK2LOC(parser, operator),
5458 value,
5459 name,
5460 depth
5461 );
5462}
5463
5467static pm_local_variable_operator_write_node_t *
5468pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5469 return pm_local_variable_operator_write_node_new(
5470 parser->arena,
5471 ++parser->node_id,
5472 0,
5473 PM_LOCATION_INIT_NODES(target, value),
5474 target->location,
5475 TOK2LOC(parser, operator),
5476 value,
5477 name,
5478 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
5479 depth
5480 );
5481}
5482
5486static pm_local_variable_or_write_node_t *
5487pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5488 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5489 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5490
5491 return pm_local_variable_or_write_node_new(
5492 parser->arena,
5493 ++parser->node_id,
5494 0,
5495 PM_LOCATION_INIT_NODES(target, value),
5496 target->location,
5497 TOK2LOC(parser, operator),
5498 value,
5499 name,
5500 depth
5501 );
5502}
5503
5507static pm_local_variable_read_node_t *
5508pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
5509 if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
5510
5511 return pm_local_variable_read_node_new(
5512 parser->arena,
5513 ++parser->node_id,
5514 0,
5515 PM_LOCATION_INIT_TOKEN(parser, name),
5516 name_id,
5517 depth
5518 );
5519}
5520
5524static pm_local_variable_read_node_t *
5525pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5526 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5527 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, false);
5528}
5529
5534static pm_local_variable_read_node_t *
5535pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5536 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5537 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, true);
5538}
5539
5543static pm_local_variable_write_node_t *
5544pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
5545 return pm_local_variable_write_node_new(
5546 parser->arena,
5547 ++parser->node_id,
5548 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5549 ((pm_location_t) { .start = name_loc->start, .length = PM_NODE_END(value) - name_loc->start }),
5550 name,
5551 depth,
5552 *name_loc,
5553 value,
5554 TOK2LOC(parser, operator)
5555 );
5556}
5557
5561static PRISM_INLINE bool
5562pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5563 return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5564}
5565
5570static PRISM_INLINE bool
5571pm_token_is_numbered_parameter(const pm_parser_t *parser, uint32_t start, uint32_t length) {
5572 return (
5573 (length == 2) &&
5574 (parser->start[start] == '_') &&
5575 (parser->start[start + 1] != '0') &&
5576 pm_char_is_decimal_digit(parser->start[start + 1])
5577 );
5578}
5579
5584static PRISM_INLINE void
5585pm_refute_numbered_parameter(pm_parser_t *parser, uint32_t start, uint32_t length) {
5586 if (pm_token_is_numbered_parameter(parser, start, length)) {
5587 PM_PARSER_ERR_FORMAT(parser, start, length, PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + start);
5588 }
5589}
5590
5595static pm_local_variable_target_node_t *
5596pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
5597 pm_refute_numbered_parameter(parser, location->start, location->length);
5598
5599 return pm_local_variable_target_node_new(
5600 parser->arena,
5601 ++parser->node_id,
5602 0,
5603 ((pm_location_t) { .start = location->start, .length = location->length }),
5604 name,
5605 depth
5606 );
5607}
5608
5612static pm_match_predicate_node_t *
5613pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5614 pm_assert_value_expression(parser, value);
5615
5616 return pm_match_predicate_node_new(
5617 parser->arena,
5618 ++parser->node_id,
5619 0,
5620 PM_LOCATION_INIT_NODES(value, pattern),
5621 value,
5622 pattern,
5623 TOK2LOC(parser, operator)
5624 );
5625}
5626
5630static pm_match_required_node_t *
5631pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5632 pm_assert_value_expression(parser, value);
5633
5634 return pm_match_required_node_new(
5635 parser->arena,
5636 ++parser->node_id,
5637 0,
5638 PM_LOCATION_INIT_NODES(value, pattern),
5639 value,
5640 pattern,
5641 TOK2LOC(parser, operator)
5642 );
5643}
5644
5648static pm_match_write_node_t *
5649pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
5650 return pm_match_write_node_new(
5651 parser->arena,
5652 ++parser->node_id,
5653 0,
5654 PM_LOCATION_INIT_NODE(call),
5655 call,
5656 ((pm_node_list_t) { 0 })
5657 );
5658}
5659
5663static pm_module_node_t *
5664pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
5665 return pm_module_node_new(
5666 parser->arena,
5667 ++parser->node_id,
5668 0,
5669 PM_LOCATION_INIT_TOKENS(parser, module_keyword, end_keyword),
5670 (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
5671 TOK2LOC(parser, module_keyword),
5672 constant_path,
5673 body,
5674 TOK2LOC(parser, end_keyword),
5675 pm_parser_constant_id_token(parser, name)
5676 );
5677}
5678
5682static pm_multi_target_node_t *
5683pm_multi_target_node_create(pm_parser_t *parser) {
5684 return pm_multi_target_node_new(
5685 parser->arena,
5686 ++parser->node_id,
5687 0,
5688 PM_LOCATION_INIT_UNSET,
5689 ((pm_node_list_t) { 0 }),
5690 NULL,
5691 ((pm_node_list_t) { 0 }),
5692 ((pm_location_t) { 0 }),
5693 ((pm_location_t) { 0 })
5694 );
5695}
5696
5700static void
5701pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
5702 if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
5703 if (node->rest == NULL) {
5704 node->rest = target;
5705 } else {
5706 pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
5707 pm_node_list_append(parser->arena, &node->rights, target);
5708 }
5709 } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
5710 if (node->rest == NULL) {
5711 node->rest = target;
5712 } else {
5713 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
5714 pm_node_list_append(parser->arena, &node->rights, target);
5715 }
5716 } else if (node->rest == NULL) {
5717 pm_node_list_append(parser->arena, &node->lefts, target);
5718 } else {
5719 pm_node_list_append(parser->arena, &node->rights, target);
5720 }
5721
5722 if (PM_NODE_LENGTH(node) == 0 || (PM_NODE_START(node) > PM_NODE_START(target))) {
5723 PM_NODE_START_SET_NODE(node, target);
5724 }
5725
5726 if (PM_NODE_LENGTH(node) == 0 || (PM_NODE_END(node) < PM_NODE_END(target))) {
5727 PM_NODE_LENGTH_SET_NODE(node, target);
5728 }
5729}
5730
5734static void
5735pm_multi_target_node_opening_set(const pm_parser_t *parser, pm_multi_target_node_t *node, const pm_token_t *lparen) {
5736 PM_NODE_START_SET_TOKEN(parser, node, lparen);
5737 PM_NODE_LENGTH_SET_TOKEN(parser, node, lparen);
5738 node->lparen_loc = TOK2LOC(parser, lparen);
5739}
5740
5744static void
5745pm_multi_target_node_closing_set(const pm_parser_t *parser, pm_multi_target_node_t *node, const pm_token_t *rparen) {
5746 PM_NODE_LENGTH_SET_TOKEN(parser, node, rparen);
5747 node->rparen_loc = TOK2LOC(parser, rparen);
5748}
5749
5753static pm_multi_write_node_t *
5754pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5755 /* The target is no longer necessary because we have reused its children. It
5756 * is arena-allocated so no explicit free is needed. */
5757 return pm_multi_write_node_new(
5758 parser->arena,
5759 ++parser->node_id,
5760 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5761 PM_LOCATION_INIT_NODES(target, value),
5762 target->lefts,
5763 target->rest,
5764 target->rights,
5765 target->lparen_loc,
5766 target->rparen_loc,
5767 TOK2LOC(parser, operator),
5768 value
5769 );
5770}
5771
5775static pm_next_node_t *
5776pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
5777 assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
5778
5779 return pm_next_node_new(
5780 parser->arena,
5781 ++parser->node_id,
5782 0,
5783 (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
5784 arguments,
5785 TOK2LOC(parser, keyword)
5786 );
5787}
5788
5792static pm_nil_node_t *
5793pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
5794 assert(token->type == PM_TOKEN_KEYWORD_NIL);
5795
5796 return pm_nil_node_new(
5797 parser->arena,
5798 ++parser->node_id,
5799 PM_NODE_FLAG_STATIC_LITERAL,
5800 PM_LOCATION_INIT_TOKEN(parser, token)
5801 );
5802}
5803
5807static pm_no_block_parameter_node_t *
5808pm_no_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
5809 assert(operator->type == PM_TOKEN_AMPERSAND || operator->type == PM_TOKEN_UAMPERSAND);
5810 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
5811
5812 return pm_no_block_parameter_node_new(
5813 parser->arena,
5814 ++parser->node_id,
5815 0,
5816 PM_LOCATION_INIT_TOKENS(parser, operator, keyword),
5817 TOK2LOC(parser, operator),
5818 TOK2LOC(parser, keyword)
5819 );
5820}
5821
5825static pm_no_keywords_parameter_node_t *
5826pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
5827 assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
5828 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
5829
5830 return pm_no_keywords_parameter_node_new(
5831 parser->arena,
5832 ++parser->node_id,
5833 0,
5834 PM_LOCATION_INIT_TOKENS(parser, operator, keyword),
5835 TOK2LOC(parser, operator),
5836 TOK2LOC(parser, keyword)
5837 );
5838}
5839
5843static pm_numbered_parameters_node_t *
5844pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing, uint8_t maximum) {
5845 return pm_numbered_parameters_node_new(
5846 parser->arena,
5847 ++parser->node_id,
5848 0,
5849 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
5850 maximum
5851 );
5852}
5853
5858#define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1))
5859
5866static uint32_t
5867pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) {
5868 const uint8_t *start = token->start + 1;
5869 const uint8_t *end = token->end;
5870
5871 ptrdiff_t diff = end - start;
5872 assert(diff > 0);
5873#if PTRDIFF_MAX > SIZE_MAX
5874 assert(diff < (ptrdiff_t) SIZE_MAX);
5875#endif
5876 size_t length = (size_t) diff;
5877
5878 char *digits = xcalloc(length + 1, sizeof(char));
5879 memcpy(digits, start, length);
5880 digits[length] = '\0';
5881
5882 char *endptr;
5883 errno = 0;
5884 unsigned long value = strtoul(digits, &endptr, 10);
5885
5886 if ((digits == endptr) || (*endptr != '\0')) {
5887 pm_parser_err(parser, U32(start - parser->start), U32(length), PM_ERR_INVALID_NUMBER_DECIMAL);
5888 value = 0;
5889 }
5890
5891 xfree_sized(digits, sizeof(char) * (length + 1));
5892
5893 if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
5894 PM_PARSER_WARN_FORMAT(parser, U32(start - parser->start), U32(length), PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
5895 value = 0;
5896 }
5897
5898 return (uint32_t) value;
5899}
5900
5901#undef NTH_REF_MAX
5902
5906static pm_numbered_reference_read_node_t *
5907pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5908 assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
5909
5910 return pm_numbered_reference_read_node_new(
5911 parser->arena,
5912 ++parser->node_id,
5913 0,
5914 PM_LOCATION_INIT_TOKEN(parser, name),
5915 pm_numbered_reference_read_node_number(parser, name)
5916 );
5917}
5918
5922static pm_optional_parameter_node_t *
5923pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
5924 return pm_optional_parameter_node_new(
5925 parser->arena,
5926 ++parser->node_id,
5927 0,
5928 PM_LOCATION_INIT_TOKEN_NODE(parser, name, value),
5929 pm_parser_constant_id_token(parser, name),
5930 TOK2LOC(parser, name),
5931 TOK2LOC(parser, operator),
5932 value
5933 );
5934}
5935
5939static pm_or_node_t *
5940pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
5941 pm_assert_value_expression(parser, left);
5942
5943 return pm_or_node_new(
5944 parser->arena,
5945 ++parser->node_id,
5946 0,
5947 PM_LOCATION_INIT_NODES(left, right),
5948 left,
5949 right,
5950 TOK2LOC(parser, operator)
5951 );
5952}
5953
5957static pm_parameters_node_t *
5958pm_parameters_node_create(pm_parser_t *parser) {
5959 return pm_parameters_node_new(
5960 parser->arena,
5961 ++parser->node_id,
5962 0,
5963 PM_LOCATION_INIT_UNSET,
5964 ((pm_node_list_t) { 0 }),
5965 ((pm_node_list_t) { 0 }),
5966 NULL,
5967 ((pm_node_list_t) { 0 }),
5968 ((pm_node_list_t) { 0 }),
5969 NULL,
5970 NULL
5971 );
5972}
5973
5977static void
5978pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
5979 if ((params->base.location.length == 0) || PM_NODE_START(params) > PM_NODE_START(param)) {
5980 PM_NODE_START_SET_NODE(params, param);
5981 }
5982
5983 if ((params->base.location.length == 0) || (PM_NODE_END(params) < PM_NODE_END(param))) {
5984 PM_NODE_LENGTH_SET_NODE(params, param);
5985 }
5986}
5987
5991static void
5992pm_parameters_node_requireds_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
5993 pm_parameters_node_location_set(params, param);
5994 pm_node_list_append(arena, &params->requireds, param);
5995}
5996
6000static void
6001pm_parameters_node_optionals_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
6002 pm_parameters_node_location_set(params, UP(param));
6003 pm_node_list_append(arena, &params->optionals, UP(param));
6004}
6005
6009static void
6010pm_parameters_node_posts_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
6011 pm_parameters_node_location_set(params, param);
6012 pm_node_list_append(arena, &params->posts, param);
6013}
6014
6018static void
6019pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6020 pm_parameters_node_location_set(params, param);
6021 params->rest = param;
6022}
6023
6027static void
6028pm_parameters_node_keywords_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
6029 pm_parameters_node_location_set(params, param);
6030 pm_node_list_append(arena, &params->keywords, param);
6031}
6032
6036static void
6037pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6038 assert(params->keyword_rest == NULL);
6039 pm_parameters_node_location_set(params, param);
6040 params->keyword_rest = param;
6041}
6042
6046static void
6047pm_parameters_node_block_set(pm_parameters_node_t *params, pm_node_t *param) {
6048 assert(params->block == NULL);
6049 pm_parameters_node_location_set(params, param);
6050 params->block = param;
6051}
6052
6056static pm_program_node_t *
6057pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
6058 return pm_program_node_new(
6059 parser->arena,
6060 ++parser->node_id,
6061 0,
6062 PM_LOCATION_INIT_NODE(statements),
6063 *locals,
6064 statements
6065 );
6066}
6067
6071static pm_parentheses_node_t *
6072pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing, pm_node_flags_t flags) {
6073 return pm_parentheses_node_new(
6074 parser->arena,
6075 ++parser->node_id,
6076 flags,
6077 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
6078 body,
6079 TOK2LOC(parser, opening),
6080 TOK2LOC(parser, closing)
6081 );
6082}
6083
6087static pm_pinned_expression_node_t *
6088pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
6089 return pm_pinned_expression_node_new(
6090 parser->arena,
6091 ++parser->node_id,
6092 0,
6093 PM_LOCATION_INIT_TOKENS(parser, operator, rparen),
6094 expression,
6095 TOK2LOC(parser, operator),
6096 TOK2LOC(parser, lparen),
6097 TOK2LOC(parser, rparen)
6098 );
6099}
6100
6104static pm_pinned_variable_node_t *
6105pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
6106 return pm_pinned_variable_node_new(
6107 parser->arena,
6108 ++parser->node_id,
6109 0,
6110 PM_LOCATION_INIT_TOKEN_NODE(parser, operator, variable),
6111 variable,
6112 TOK2LOC(parser, operator)
6113 );
6114}
6115
6119static pm_post_execution_node_t *
6120pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6121 return pm_post_execution_node_new(
6122 parser->arena,
6123 ++parser->node_id,
6124 0,
6125 PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
6126 statements,
6127 TOK2LOC(parser, keyword),
6128 TOK2LOC(parser, opening),
6129 TOK2LOC(parser, closing)
6130 );
6131}
6132
6136static pm_pre_execution_node_t *
6137pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6138 return pm_pre_execution_node_new(
6139 parser->arena,
6140 ++parser->node_id,
6141 0,
6142 PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
6143 statements,
6144 TOK2LOC(parser, keyword),
6145 TOK2LOC(parser, opening),
6146 TOK2LOC(parser, closing)
6147 );
6148}
6149
6153static pm_range_node_t *
6154pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6155 pm_assert_value_expression(parser, left);
6156 pm_assert_value_expression(parser, right);
6157 pm_node_flags_t flags = 0;
6158
6159 // Indicate that this node is an exclusive range if the operator is `...`.
6160 if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
6161 flags |= PM_RANGE_FLAGS_EXCLUDE_END;
6162 }
6163
6164 // Indicate that this node is a static literal (i.e., can be compiled with
6165 // a putobject in CRuby) if the left and right are implicit nil, explicit
6166 // nil, or integers.
6167 if (
6168 (left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
6169 (right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
6170 ) {
6171 flags |= PM_NODE_FLAG_STATIC_LITERAL;
6172 }
6173
6174 uint32_t start = left == NULL ? PM_TOKEN_START(parser, operator) : PM_NODE_START(left);
6175 uint32_t end = right == NULL ? PM_TOKEN_END(parser, operator) : PM_NODE_END(right);
6176
6177 return pm_range_node_new(
6178 parser->arena,
6179 ++parser->node_id,
6180 flags,
6181 ((pm_location_t) { .start = start, .length = U32(end - start) }),
6182 left,
6183 right,
6184 TOK2LOC(parser, operator)
6185 );
6186}
6187
6191static pm_redo_node_t *
6192pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
6193 assert(token->type == PM_TOKEN_KEYWORD_REDO);
6194
6195 return pm_redo_node_new(
6196 parser->arena,
6197 ++parser->node_id,
6198 0,
6199 PM_LOCATION_INIT_TOKEN(parser, token)
6200 );
6201}
6202
6207static pm_regular_expression_node_t *
6208pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
6209 return pm_regular_expression_node_new(
6210 parser->arena,
6211 ++parser->node_id,
6212 pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL,
6213 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
6214 TOK2LOC(parser, opening),
6215 TOK2LOC(parser, content),
6216 TOK2LOC(parser, closing),
6217 *unescaped
6218 );
6219}
6220
6224static PRISM_INLINE pm_regular_expression_node_t *
6225pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6226 return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6227}
6228
6232static pm_required_parameter_node_t *
6233pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
6234 return pm_required_parameter_node_new(
6235 parser->arena,
6236 ++parser->node_id,
6237 0,
6238 PM_LOCATION_INIT_TOKEN(parser, token),
6239 pm_parser_constant_id_token(parser, token)
6240 );
6241}
6242
6246static pm_rescue_modifier_node_t *
6247pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
6248 return pm_rescue_modifier_node_new(
6249 parser->arena,
6250 ++parser->node_id,
6251 0,
6252 PM_LOCATION_INIT_NODES(expression, rescue_expression),
6253 expression,
6254 TOK2LOC(parser, keyword),
6255 rescue_expression
6256 );
6257}
6258
6262static pm_rescue_node_t *
6263pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
6264 return pm_rescue_node_new(
6265 parser->arena,
6266 ++parser->node_id,
6267 0,
6268 PM_LOCATION_INIT_TOKEN(parser, keyword),
6269 TOK2LOC(parser, keyword),
6270 ((pm_node_list_t) { 0 }),
6271 ((pm_location_t) { 0 }),
6272 NULL,
6273 ((pm_location_t) { 0 }),
6274 NULL,
6275 NULL
6276 );
6277}
6278
6279static PRISM_INLINE void
6280pm_rescue_node_operator_set(const pm_parser_t *parser, pm_rescue_node_t *node, const pm_token_t *operator) {
6281 node->operator_loc = TOK2LOC(parser, operator);
6282}
6283
6287static void
6288pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
6289 node->reference = reference;
6290 PM_NODE_LENGTH_SET_NODE(node, reference);
6291}
6292
6296static void
6297pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
6298 node->statements = statements;
6299 if (pm_statements_node_body_length(statements) > 0) {
6300 PM_NODE_LENGTH_SET_NODE(node, statements);
6301 }
6302}
6303
6307static void
6308pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
6309 node->subsequent = subsequent;
6310 PM_NODE_LENGTH_SET_NODE(node, subsequent);
6311}
6312
6316static void
6317pm_rescue_node_exceptions_append(pm_arena_t *arena, pm_rescue_node_t *node, pm_node_t *exception) {
6318 pm_node_list_append(arena, &node->exceptions, exception);
6319 PM_NODE_LENGTH_SET_NODE(node, exception);
6320}
6321
6325static pm_rest_parameter_node_t *
6326pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
6327 return pm_rest_parameter_node_new(
6328 parser->arena,
6329 ++parser->node_id,
6330 0,
6331 (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
6332 name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
6333 NTOK2LOC(parser, name),
6334 TOK2LOC(parser, operator)
6335 );
6336}
6337
6341static pm_retry_node_t *
6342pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
6343 assert(token->type == PM_TOKEN_KEYWORD_RETRY);
6344
6345 return pm_retry_node_new(
6346 parser->arena,
6347 ++parser->node_id,
6348 0,
6349 PM_LOCATION_INIT_TOKEN(parser, token)
6350 );
6351}
6352
6356static pm_return_node_t *
6357pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6358 return pm_return_node_new(
6359 parser->arena,
6360 ++parser->node_id,
6361 0,
6362 (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
6363 TOK2LOC(parser, keyword),
6364 arguments
6365 );
6366}
6367
6371static pm_self_node_t *
6372pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
6373 assert(token->type == PM_TOKEN_KEYWORD_SELF);
6374
6375 return pm_self_node_new(
6376 parser->arena,
6377 ++parser->node_id,
6378 0,
6379 PM_LOCATION_INIT_TOKEN(parser, token)
6380 );
6381}
6382
6386static pm_shareable_constant_node_t *
6387pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
6388 return pm_shareable_constant_node_new(
6389 parser->arena,
6390 ++parser->node_id,
6391 (pm_node_flags_t) value,
6392 PM_LOCATION_INIT_NODE(write),
6393 write
6394 );
6395}
6396
6400static pm_singleton_class_node_t *
6401pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
6402 return pm_singleton_class_node_new(
6403 parser->arena,
6404 ++parser->node_id,
6405 0,
6406 PM_LOCATION_INIT_TOKENS(parser, class_keyword, end_keyword),
6407 *locals,
6408 TOK2LOC(parser, class_keyword),
6409 TOK2LOC(parser, operator),
6410 expression,
6411 body,
6412 TOK2LOC(parser, end_keyword)
6413 );
6414}
6415
6419static pm_source_encoding_node_t *
6420pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
6421 assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
6422
6423 return pm_source_encoding_node_new(
6424 parser->arena,
6425 ++parser->node_id,
6426 PM_NODE_FLAG_STATIC_LITERAL,
6427 PM_LOCATION_INIT_TOKEN(parser, token)
6428 );
6429}
6430
6434static pm_source_file_node_t*
6435pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
6436 assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
6437
6438 pm_node_flags_t flags = 0;
6439
6440 switch (parser->frozen_string_literal) {
6441 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6442 flags |= PM_STRING_FLAGS_MUTABLE;
6443 break;
6444 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6445 flags |= PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6446 break;
6447 }
6448
6449 return pm_source_file_node_new(
6450 parser->arena,
6451 ++parser->node_id,
6452 flags,
6453 PM_LOCATION_INIT_TOKEN(parser, file_keyword),
6454 parser->filepath
6455 );
6456}
6457
6461static pm_source_line_node_t *
6462pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
6463 assert(token->type == PM_TOKEN_KEYWORD___LINE__);
6464
6465 return pm_source_line_node_new(
6466 parser->arena,
6467 ++parser->node_id,
6468 PM_NODE_FLAG_STATIC_LITERAL,
6469 PM_LOCATION_INIT_TOKEN(parser, token)
6470 );
6471}
6472
6476static pm_splat_node_t *
6477pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
6478 return pm_splat_node_new(
6479 parser->arena,
6480 ++parser->node_id,
6481 0,
6482 (expression == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, expression),
6483 TOK2LOC(parser, operator),
6484 expression
6485 );
6486}
6487
6491static pm_statements_node_t *
6492pm_statements_node_create(pm_parser_t *parser) {
6493 return pm_statements_node_new(
6494 parser->arena,
6495 ++parser->node_id,
6496 0,
6497 PM_LOCATION_INIT_UNSET,
6498 ((pm_node_list_t) { 0 })
6499 );
6500}
6501
6505static size_t
6506pm_statements_node_body_length(pm_statements_node_t *node) {
6507 return node && node->body.size;
6508}
6509
6514static PRISM_INLINE void
6515pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
6516 if (pm_statements_node_body_length(node) == 0 || PM_NODE_START(statement) < PM_NODE_START(node)) {
6517 PM_NODE_START_SET_NODE(node, statement);
6518 }
6519
6520 if (PM_NODE_END(statement) > PM_NODE_END(node)) {
6521 PM_NODE_LENGTH_SET_NODE(node, statement);
6522 }
6523}
6524
6528static void
6529pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline) {
6530 pm_statements_node_body_update(node, statement);
6531
6532 if (node->body.size > 0) {
6533 const pm_node_t *previous = node->body.nodes[node->body.size - 1];
6534
6535 switch (PM_NODE_TYPE(previous)) {
6536 case PM_BREAK_NODE:
6537 case PM_NEXT_NODE:
6538 case PM_REDO_NODE:
6539 case PM_RETRY_NODE:
6540 case PM_RETURN_NODE:
6541 pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
6542 break;
6543 default:
6544 break;
6545 }
6546 }
6547
6548 pm_node_list_append(parser->arena, &node->body, statement);
6549 if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
6550}
6551
6555static void
6556pm_statements_node_body_prepend(pm_arena_t *arena, pm_statements_node_t *node, pm_node_t *statement) {
6557 pm_statements_node_body_update(node, statement);
6558 pm_node_list_prepend(arena, &node->body, statement);
6559 pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
6560}
6561
6565static PRISM_INLINE pm_string_node_t *
6566pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
6567 pm_node_flags_t flags = 0;
6568
6569 switch (parser->frozen_string_literal) {
6570 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6571 flags = PM_STRING_FLAGS_MUTABLE;
6572 break;
6573 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6574 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6575 break;
6576 }
6577
6578 uint32_t start = PM_TOKEN_START(parser, opening == NULL ? content : opening);
6579 uint32_t end = PM_TOKEN_END(parser, closing == NULL ? content : closing);
6580
6581 return pm_string_node_new(
6582 parser->arena,
6583 ++parser->node_id,
6584 flags,
6585 ((pm_location_t) { .start = start, .length = U32(end - start) }),
6586 NTOK2LOC(parser, opening),
6587 TOK2LOC(parser, content),
6588 NTOK2LOC(parser, closing),
6589 *string
6590 );
6591}
6592
6596static pm_string_node_t *
6597pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6598 return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6599}
6600
6605static pm_string_node_t *
6606pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6607 pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
6608 parser->current_string = PM_STRING_EMPTY;
6609 return node;
6610}
6611
6615static pm_super_node_t *
6616pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
6617 assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
6618
6619 const pm_location_t *end = pm_arguments_end(arguments);
6620 assert(end != NULL && "unreachable");
6621
6622 return pm_super_node_new(
6623 parser->arena,
6624 ++parser->node_id,
6625 0,
6626 ((pm_location_t) { .start = PM_TOKEN_START(parser, keyword), .length = PM_LOCATION_END(end) - PM_TOKEN_START(parser, keyword) }),
6627 TOK2LOC(parser, keyword),
6628 arguments->opening_loc,
6629 arguments->arguments,
6630 arguments->closing_loc,
6631 arguments->block
6632 );
6633}
6634
6639static bool
6640pm_ascii_only_p(const pm_string_t *contents) {
6641 const size_t length = pm_string_length(contents);
6642 const uint8_t *source = pm_string_source(contents);
6643
6644 for (size_t index = 0; index < length; index++) {
6645 if (source[index] & 0x80) return false;
6646 }
6647
6648 return true;
6649}
6650
6654static void
6655parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
6656 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
6657 size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
6658
6659 if (width == 0) {
6660 pm_parser_err(parser, PM_TOKEN_START(parser, location), PM_TOKEN_LENGTH(location), PM_ERR_INVALID_SYMBOL);
6661 break;
6662 }
6663
6664 cursor += width;
6665 }
6666}
6667
6672static void
6673parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
6674 const pm_encoding_t *encoding = parser->encoding;
6675
6676 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
6677 size_t width = encoding->char_width(cursor, end - cursor);
6678
6679 if (width == 0) {
6680 pm_parser_err(parser, PM_TOKEN_START(parser, location), PM_TOKEN_LENGTH(location), PM_ERR_INVALID_SYMBOL);
6681 break;
6682 }
6683
6684 cursor += width;
6685 }
6686}
6687
6697static PRISM_INLINE pm_node_flags_t
6698parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
6699 if (parser->explicit_encoding != NULL) {
6700 // A Symbol may optionally have its encoding explicitly set. This will
6701 // happen if an escape sequence results in a non-ASCII code point.
6702 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
6703 if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
6704 return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
6705 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6706 return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
6707 } else if (validate) {
6708 parse_symbol_encoding_validate_other(parser, location, contents);
6709 }
6710 } else if (pm_ascii_only_p(contents)) {
6711 // Ruby stipulates that all source files must use an ASCII-compatible
6712 // encoding. Thus, all symbols appearing in source are eligible for
6713 // "downgrading" to US-ASCII.
6714 return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
6715 } else if (validate) {
6716 parse_symbol_encoding_validate_other(parser, location, contents);
6717 }
6718
6719 return 0;
6720}
6721
6726static pm_symbol_node_t *
6727pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
6728 uint32_t start = opening == NULL ? PM_TOKEN_START(parser, value) : PM_TOKEN_START(parser, opening);
6729 uint32_t end = closing == NULL ? PM_TOKEN_END(parser, value) : PM_TOKEN_END(parser, closing);
6730
6731 return pm_symbol_node_new(
6732 parser->arena,
6733 ++parser->node_id,
6734 PM_NODE_FLAG_STATIC_LITERAL | flags,
6735 ((pm_location_t) { .start = start, .length = U32(end - start) }),
6736 NTOK2LOC(parser, opening),
6737 NTOK2LOC(parser, value),
6738 NTOK2LOC(parser, closing),
6739 *unescaped
6740 );
6741}
6742
6746static PRISM_INLINE pm_symbol_node_t *
6747pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
6748 return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
6749}
6750
6754static pm_symbol_node_t *
6755pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
6756 pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
6757 parser->current_string = PM_STRING_EMPTY;
6758 return node;
6759}
6760
6764static pm_symbol_node_t *
6765pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
6766 assert(token->type == PM_TOKEN_LABEL);
6767
6768 pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
6769 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
6770 pm_symbol_node_t *node = pm_symbol_node_create(parser, NULL, &label, &closing);
6771
6772 assert((label.end - label.start) >= 0);
6773 pm_string_shared_init(&node->unescaped, label.start, label.end);
6774 pm_node_flag_set(UP(node), parse_symbol_encoding(parser, &label, &node->unescaped, false));
6775
6776 return node;
6777}
6778
6782static pm_symbol_node_t *
6783pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
6784 pm_symbol_node_t *node = pm_symbol_node_new(
6785 parser->arena,
6786 ++parser->node_id,
6787 PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
6788 PM_LOCATION_INIT_UNSET,
6789 ((pm_location_t) { 0 }),
6790 ((pm_location_t) { 0 }),
6791 ((pm_location_t) { 0 }),
6792 ((pm_string_t) { 0 })
6793 );
6794
6795 pm_string_constant_init(&node->unescaped, content, strlen(content));
6796 return node;
6797}
6798
6802static bool
6803pm_symbol_node_label_p(const pm_parser_t *parser, const pm_node_t *node) {
6804 const pm_location_t *location = NULL;
6805
6806 switch (PM_NODE_TYPE(node)) {
6807 case PM_SYMBOL_NODE: {
6808 const pm_symbol_node_t *cast = (pm_symbol_node_t *) node;
6809 if (cast->closing_loc.length > 0) {
6810 location = &cast->closing_loc;
6811 }
6812 break;
6813 }
6814 case PM_INTERPOLATED_SYMBOL_NODE: {
6815 const pm_interpolated_symbol_node_t *cast = (pm_interpolated_symbol_node_t *) node;
6816 if (cast->closing_loc.length > 0) {
6817 location = &cast->closing_loc;
6818 }
6819 break;
6820 }
6821 default:
6822 return false;
6823 }
6824
6825 return (location != NULL) && (parser->start[PM_LOCATION_END(location) - 1] == ':');
6826}
6827
6831static pm_symbol_node_t *
6832pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
6833 pm_symbol_node_t *new_node = pm_symbol_node_new(
6834 parser->arena,
6835 ++parser->node_id,
6836 PM_NODE_FLAG_STATIC_LITERAL,
6837 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
6838 TOK2LOC(parser, opening),
6839 node->content_loc,
6840 TOK2LOC(parser, closing),
6841 node->unescaped
6842 );
6843
6844 pm_token_t content = {
6845 .type = PM_TOKEN_IDENTIFIER,
6846 .start = parser->start + node->content_loc.start,
6847 .end = parser->start + node->content_loc.start + node->content_loc.length
6848 };
6849
6850 pm_node_flag_set(UP(new_node), parse_symbol_encoding(parser, &content, &node->unescaped, true));
6851
6852 /* The old node is arena-allocated so no explicit free is needed. */
6853 return new_node;
6854}
6855
6859static pm_string_node_t *
6860pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
6861 pm_node_flags_t flags = 0;
6862
6863 switch (parser->frozen_string_literal) {
6864 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6865 flags = PM_STRING_FLAGS_MUTABLE;
6866 break;
6867 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6868 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6869 break;
6870 }
6871
6872 pm_string_node_t *new_node = pm_string_node_new(
6873 parser->arena,
6874 ++parser->node_id,
6875 flags,
6876 PM_LOCATION_INIT_NODE(node),
6877 node->opening_loc,
6878 node->value_loc,
6879 node->closing_loc,
6880 node->unescaped
6881 );
6882
6883 /* The old node is arena-allocated so no explicit free is needed. */
6884 return new_node;
6885}
6886
6890static pm_true_node_t *
6891pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
6892 assert(token->type == PM_TOKEN_KEYWORD_TRUE);
6893
6894 return pm_true_node_new(
6895 parser->arena,
6896 ++parser->node_id,
6897 PM_NODE_FLAG_STATIC_LITERAL,
6898 PM_LOCATION_INIT_TOKEN(parser, token)
6899 );
6900}
6901
6905static pm_true_node_t *
6906pm_true_node_synthesized_create(pm_parser_t *parser) {
6907 return pm_true_node_new(
6908 parser->arena,
6909 ++parser->node_id,
6910 PM_NODE_FLAG_STATIC_LITERAL,
6911 PM_LOCATION_INIT_UNSET
6912 );
6913}
6914
6918static pm_undef_node_t *
6919pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
6920 assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
6921
6922 return pm_undef_node_new(
6923 parser->arena,
6924 ++parser->node_id,
6925 0,
6926 PM_LOCATION_INIT_TOKEN(parser, token),
6927 ((pm_node_list_t) { 0 }),
6928 TOK2LOC(parser, token)
6929 );
6930}
6931
6935static void
6936pm_undef_node_append(pm_arena_t *arena, pm_undef_node_t *node, pm_node_t *name) {
6937 PM_NODE_LENGTH_SET_NODE(node, name);
6938 pm_node_list_append(arena, &node->names, name);
6939}
6940
6944static pm_unless_node_t *
6945pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
6946 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6947 pm_node_t *end = statements == NULL ? predicate : UP(statements);
6948
6949 return pm_unless_node_new(
6950 parser->arena,
6951 ++parser->node_id,
6952 PM_NODE_FLAG_NEWLINE,
6953 PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, end),
6954 TOK2LOC(parser, keyword),
6955 predicate,
6956 NTOK2LOC(parser, then_keyword),
6957 statements,
6958 NULL,
6959 ((pm_location_t) { 0 })
6960 );
6961}
6962
6966static pm_unless_node_t *
6967pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
6968 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6969
6970 pm_statements_node_t *statements = pm_statements_node_create(parser);
6971 pm_statements_node_body_append(parser, statements, statement, true);
6972
6973 return pm_unless_node_new(
6974 parser->arena,
6975 ++parser->node_id,
6976 PM_NODE_FLAG_NEWLINE,
6977 PM_LOCATION_INIT_NODES(statement, predicate),
6978 TOK2LOC(parser, unless_keyword),
6979 predicate,
6980 ((pm_location_t) { 0 }),
6981 statements,
6982 NULL,
6983 ((pm_location_t) { 0 })
6984 );
6985}
6986
6987static PRISM_INLINE void
6988pm_unless_node_end_keyword_loc_set(const pm_parser_t *parser, pm_unless_node_t *node, const pm_token_t *end_keyword) {
6989 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
6990 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
6991}
6992
6998static void
6999pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statements) {
7000 assert(parser->current_block_exits != NULL);
7001
7002 // All of the block exits that we want to remove should be within the
7003 // statements, and since we are modifying the statements, we shouldn't have
7004 // to check the end location.
7005 uint32_t start = statements->base.location.start;
7006
7007 for (size_t index = parser->current_block_exits->size; index > 0; index--) {
7008 pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
7009 if (block_exit->location.start < start) break;
7010
7011 // Implicitly remove from the list by lowering the size.
7012 parser->current_block_exits->size--;
7013 }
7014}
7015
7019static pm_until_node_t *
7020pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7021 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7022
7023 return pm_until_node_new(
7024 parser->arena,
7025 ++parser->node_id,
7026 flags,
7027 PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
7028 TOK2LOC(parser, keyword),
7029 NTOK2LOC(parser, do_keyword),
7030 TOK2LOC(parser, closing),
7031 predicate,
7032 statements
7033 );
7034}
7035
7039static pm_until_node_t *
7040pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7041 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7042 pm_loop_modifier_block_exits(parser, statements);
7043
7044 return pm_until_node_new(
7045 parser->arena,
7046 ++parser->node_id,
7047 flags,
7048 PM_LOCATION_INIT_NODES(statements, predicate),
7049 TOK2LOC(parser, keyword),
7050 ((pm_location_t) { 0 }),
7051 ((pm_location_t) { 0 }),
7052 predicate,
7053 statements
7054 );
7055}
7056
7060static pm_when_node_t *
7061pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
7062 return pm_when_node_new(
7063 parser->arena,
7064 ++parser->node_id,
7065 0,
7066 PM_LOCATION_INIT_TOKEN(parser, keyword),
7067 TOK2LOC(parser, keyword),
7068 ((pm_node_list_t) { 0 }),
7069 ((pm_location_t) { 0 }),
7070 NULL
7071 );
7072}
7073
7077static void
7078pm_when_node_conditions_append(pm_arena_t *arena, pm_when_node_t *node, pm_node_t *condition) {
7079 PM_NODE_LENGTH_SET_NODE(node, condition);
7080 pm_node_list_append(arena, &node->conditions, condition);
7081}
7082
7086static PRISM_INLINE void
7087pm_when_node_then_keyword_loc_set(const pm_parser_t *parser, pm_when_node_t *node, const pm_token_t *then_keyword) {
7088 PM_NODE_LENGTH_SET_TOKEN(parser, node, then_keyword);
7089 node->then_keyword_loc = TOK2LOC(parser, then_keyword);
7090}
7091
7095static void
7096pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
7097 if (PM_NODE_END(statements) > PM_NODE_END(node)) {
7098 PM_NODE_LENGTH_SET_NODE(node, statements);
7099 }
7100
7101 node->statements = statements;
7102}
7103
7107static pm_while_node_t *
7108pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7109 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7110
7111 return pm_while_node_new(
7112 parser->arena,
7113 ++parser->node_id,
7114 flags,
7115 PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
7116 TOK2LOC(parser, keyword),
7117 NTOK2LOC(parser, do_keyword),
7118 TOK2LOC(parser, closing),
7119 predicate,
7120 statements
7121 );
7122}
7123
7127static pm_while_node_t *
7128pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7129 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7130 pm_loop_modifier_block_exits(parser, statements);
7131
7132 return pm_while_node_new(
7133 parser->arena,
7134 ++parser->node_id,
7135 flags,
7136 PM_LOCATION_INIT_NODES(statements, predicate),
7137 TOK2LOC(parser, keyword),
7138 ((pm_location_t) { 0 }),
7139 ((pm_location_t) { 0 }),
7140 predicate,
7141 statements
7142 );
7143}
7144
7148static pm_while_node_t *
7149pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
7150 return pm_while_node_new(
7151 parser->arena,
7152 ++parser->node_id,
7153 0,
7154 PM_LOCATION_INIT_UNSET,
7155 ((pm_location_t) { 0 }),
7156 ((pm_location_t) { 0 }),
7157 ((pm_location_t) { 0 }),
7158 predicate,
7159 statements
7160 );
7161}
7162
7167static pm_x_string_node_t *
7168pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
7169 return pm_x_string_node_new(
7170 parser->arena,
7171 ++parser->node_id,
7172 PM_STRING_FLAGS_FROZEN,
7173 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
7174 TOK2LOC(parser, opening),
7175 TOK2LOC(parser, content),
7176 TOK2LOC(parser, closing),
7177 *unescaped
7178 );
7179}
7180
7184static PRISM_INLINE pm_x_string_node_t *
7185pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7186 return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7187}
7188
7192static pm_yield_node_t *
7193pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
7194 uint32_t start = PM_TOKEN_START(parser, keyword);
7195 uint32_t end;
7196
7197 if (rparen_loc->length > 0) {
7198 end = PM_LOCATION_END(rparen_loc);
7199 } else if (arguments != NULL) {
7200 end = PM_NODE_END(arguments);
7201 } else if (lparen_loc->length > 0) {
7202 end = PM_LOCATION_END(lparen_loc);
7203 } else {
7204 end = PM_TOKEN_END(parser, keyword);
7205 }
7206
7207 return pm_yield_node_new(
7208 parser->arena,
7209 ++parser->node_id,
7210 0,
7211 ((pm_location_t) { .start = start, .length = U32(end - start) }),
7212 TOK2LOC(parser, keyword),
7213 *lparen_loc,
7214 arguments,
7215 *rparen_loc
7216 );
7217}
7218
7223static int
7224pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
7225 pm_scope_t *scope = parser->current_scope;
7226 int depth = 0;
7227
7228 while (scope != NULL) {
7229 if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
7230 if (scope->closed) break;
7231
7232 scope = scope->previous;
7233 depth++;
7234 }
7235
7236 return -1;
7237}
7238
7244static PRISM_INLINE int
7245pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
7246 return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
7247}
7248
7252static PRISM_INLINE void
7253pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7254 pm_locals_write(&parser->current_scope->locals, constant_id, U32(start - parser->start), U32(end - start), reads);
7255}
7256
7260static pm_constant_id_t
7261pm_parser_local_add_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7262 pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, start, end);
7263 if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
7264 return constant_id;
7265}
7266
7270static PRISM_INLINE pm_constant_id_t
7271pm_parser_local_add_location(pm_parser_t *parser, pm_location_t *location, uint32_t reads) {
7272 return pm_parser_local_add_raw(parser, parser->start + location->start, parser->start + location->start + location->length, reads);
7273}
7274
7278static PRISM_INLINE pm_constant_id_t
7279pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
7280 return pm_parser_local_add_raw(parser, token->start, token->end, reads);
7281}
7282
7286static pm_constant_id_t
7287pm_parser_local_add_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
7288 pm_constant_id_t constant_id = pm_parser_constant_id_owned(parser, start, length);
7289 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
7290 return constant_id;
7291}
7292
7296static pm_constant_id_t
7297pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
7298 pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
7299 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
7300 return constant_id;
7301}
7302
7310static bool
7311pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
7312 // We want to check whether the parameter name is a numbered parameter or
7313 // not.
7314 pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, name), PM_TOKEN_LENGTH(name));
7315
7316 // Otherwise we'll fetch the constant id for the parameter name and check
7317 // whether it's already in the current scope.
7318 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
7319
7320 if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
7321 // Add an error if the parameter doesn't start with _ and has been seen before
7322 if ((name->start < name->end) && (*name->start != '_')) {
7323 pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
7324 }
7325 return true;
7326 }
7327 return false;
7328}
7329
7333static void
7334pm_parser_scope_pop(pm_parser_t *parser) {
7335 pm_scope_t *scope = parser->current_scope;
7336 parser->current_scope = scope->previous;
7337 pm_locals_free(&scope->locals);
7338 xfree_sized(scope, sizeof(pm_scope_t));
7339}
7340
7341/******************************************************************************/
7342/* Stack helpers */
7343/******************************************************************************/
7344
7348static PRISM_INLINE void
7349pm_state_stack_push(pm_state_stack_t *stack, bool value) {
7350 *stack = (*stack << 1) | (value & 1);
7351}
7352
7356static PRISM_INLINE void
7357pm_state_stack_pop(pm_state_stack_t *stack) {
7358 *stack >>= 1;
7359}
7360
7364static PRISM_INLINE bool
7365pm_state_stack_p(const pm_state_stack_t *stack) {
7366 return *stack & 1;
7367}
7368
7369static PRISM_INLINE void
7370pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
7371 // Use the negation of the value to prevent stack overflow.
7372 pm_state_stack_push(&parser->accepts_block_stack, !value);
7373}
7374
7375static PRISM_INLINE void
7376pm_accepts_block_stack_pop(pm_parser_t *parser) {
7377 pm_state_stack_pop(&parser->accepts_block_stack);
7378}
7379
7380static PRISM_INLINE bool
7381pm_accepts_block_stack_p(pm_parser_t *parser) {
7382 return !pm_state_stack_p(&parser->accepts_block_stack);
7383}
7384
7385static PRISM_INLINE void
7386pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
7387 pm_state_stack_push(&parser->do_loop_stack, value);
7388}
7389
7390static PRISM_INLINE void
7391pm_do_loop_stack_pop(pm_parser_t *parser) {
7392 pm_state_stack_pop(&parser->do_loop_stack);
7393}
7394
7395static PRISM_INLINE bool
7396pm_do_loop_stack_p(pm_parser_t *parser) {
7397 return pm_state_stack_p(&parser->do_loop_stack);
7398}
7399
7400/******************************************************************************/
7401/* Lexer check helpers */
7402/******************************************************************************/
7403
7408static PRISM_INLINE uint8_t
7409peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
7410 if (cursor < parser->end) {
7411 return *cursor;
7412 } else {
7413 return '\0';
7414 }
7415}
7416
7422static PRISM_INLINE uint8_t
7423peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
7424 return peek_at(parser, parser->current.end + offset);
7425}
7426
7431static PRISM_INLINE uint8_t
7432peek(const pm_parser_t *parser) {
7433 return peek_at(parser, parser->current.end);
7434}
7435
7440static PRISM_INLINE bool
7441match(pm_parser_t *parser, uint8_t value) {
7442 if (peek(parser) == value) {
7443 parser->current.end++;
7444 return true;
7445 }
7446 return false;
7447}
7448
7453static PRISM_INLINE size_t
7454match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
7455 if (peek_at(parser, cursor) == '\n') {
7456 return 1;
7457 }
7458 if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
7459 return 2;
7460 }
7461 return 0;
7462}
7463
7469static PRISM_INLINE size_t
7470match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
7471 return match_eol_at(parser, parser->current.end + offset);
7472}
7473
7479static PRISM_INLINE size_t
7480match_eol(pm_parser_t *parser) {
7481 return match_eol_at(parser, parser->current.end);
7482}
7483
7487static PRISM_INLINE const uint8_t *
7488next_newline(const uint8_t *cursor, ptrdiff_t length) {
7489 assert(length >= 0);
7490
7491 // Note that it's okay for us to use memchr here to look for \n because none
7492 // of the encodings that we support have \n as a component of a multi-byte
7493 // character.
7494 return memchr(cursor, '\n', (size_t) length);
7495}
7496
7500static PRISM_INLINE bool
7501ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
7502 return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
7503}
7504
7509static bool
7510parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
7511 const pm_encoding_t *encoding = pm_encoding_find(start, end);
7512
7513 if (encoding != NULL) {
7514 if (parser->encoding != encoding) {
7515 parser->encoding = encoding;
7516 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
7517 }
7518
7519 parser->encoding_changed = (encoding != PM_ENCODING_UTF_8_ENTRY);
7520 return true;
7521 }
7522
7523 return false;
7524}
7525
7530static void
7531parser_lex_magic_comment_encoding(pm_parser_t *parser) {
7532 const uint8_t *cursor = parser->current.start + 1;
7533 const uint8_t *end = parser->current.end;
7534
7535 bool separator = false;
7536 while (true) {
7537 if (end - cursor <= 6) return;
7538 switch (cursor[6]) {
7539 case 'C': case 'c': cursor += 6; continue;
7540 case 'O': case 'o': cursor += 5; continue;
7541 case 'D': case 'd': cursor += 4; continue;
7542 case 'I': case 'i': cursor += 3; continue;
7543 case 'N': case 'n': cursor += 2; continue;
7544 case 'G': case 'g': cursor += 1; continue;
7545 case '=': case ':':
7546 separator = true;
7547 cursor += 6;
7548 break;
7549 default:
7550 cursor += 6;
7551 if (pm_char_is_whitespace(*cursor)) break;
7552 continue;
7553 }
7554 if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
7555 separator = false;
7556 }
7557
7558 while (true) {
7559 do {
7560 if (++cursor >= end) return;
7561 } while (pm_char_is_whitespace(*cursor));
7562
7563 if (separator) break;
7564 if (*cursor != '=' && *cursor != ':') return;
7565
7566 separator = true;
7567 cursor++;
7568 }
7569
7570 const uint8_t *value_start = cursor;
7571 while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
7572
7573 if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
7574 // If we were unable to parse the encoding value, then we've got an
7575 // issue because we didn't understand the encoding that the user was
7576 // trying to use. In this case we'll keep using the default encoding but
7577 // add an error to the parser to indicate an unsuccessful parse.
7578 pm_parser_err(parser, U32(value_start - parser->start), U32(cursor - value_start), PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
7579 }
7580}
7581
7582typedef enum {
7583 PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE,
7584 PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE,
7585 PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID
7586} pm_magic_comment_boolean_value_t;
7587
7592static pm_magic_comment_boolean_value_t
7593parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t value_length) {
7594 if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "true", 4) == 0) {
7595 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE;
7596 } else if (value_length == 5 && pm_strncasecmp(value_start, (const uint8_t *) "false", 5) == 0) {
7597 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE;
7598 } else {
7599 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID;
7600 }
7601}
7602
7603static PRISM_INLINE bool
7604pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
7605 return b == '\'' || b == '"' || b == ':' || b == ';';
7606}
7607
7613static PRISM_INLINE const uint8_t *
7614parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
7615 // Scan for '*' as the middle character, since it is rarer than '-' in
7616 // typical comments and avoids repeated memchr calls for '-' that hit
7617 // dashes in words like "foo-bar".
7618 while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor + 1, '*', (size_t) (end - cursor - 1), parser->encoding_changed, parser->encoding)) != NULL) {
7619 if (cursor[-1] == '-' && cursor + 1 < end && cursor[1] == '-') {
7620 return cursor - 1;
7621 }
7622 }
7623 return NULL;
7624}
7625
7636static PRISM_INLINE bool
7637parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
7638 bool result = true;
7639
7640 const uint8_t *start = parser->current.start + 1;
7641 const uint8_t *end = parser->current.end;
7642 if (end - start <= 7) return false;
7643
7644 const uint8_t *cursor;
7645 bool indicator = false;
7646
7647 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
7648 start = cursor + 3;
7649
7650 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
7651 end = cursor;
7652 indicator = true;
7653 } else {
7654 // If we have a start marker but not an end marker, then we cannot
7655 // have a magic comment.
7656 return false;
7657 }
7658 } else {
7659 // Non-emacs magic comments must contain a colon for `key: value`.
7660 // Reject early if there is no colon to avoid scanning the entire
7661 // comment character-by-character.
7662 if (pm_memchr(start, ':', (size_t) (end - start), parser->encoding_changed, parser->encoding) == NULL) {
7663 return false;
7664 }
7665
7666 // Advance start past leading whitespace so the main loop begins
7667 // directly at the key, avoiding a redundant whitespace scan.
7668 start += pm_strspn_whitespace(start, end - start);
7669 }
7670
7671 cursor = start;
7672 while (cursor < end) {
7673 if (indicator) {
7674 while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
7675 }
7676
7677 const uint8_t *key_start = cursor;
7678 while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
7679
7680 const uint8_t *key_end = cursor;
7681 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7682 if (cursor == end) break;
7683
7684 if (*cursor == ':') {
7685 cursor++;
7686 } else {
7687 if (!indicator) return false;
7688 continue;
7689 }
7690
7691 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7692 if (cursor == end) break;
7693
7694 const uint8_t *value_start;
7695 const uint8_t *value_end;
7696
7697 if (*cursor == '"') {
7698 value_start = ++cursor;
7699 for (; cursor < end && *cursor != '"'; cursor++) {
7700 if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
7701 }
7702 value_end = cursor;
7703 if (cursor < end && *cursor == '"') cursor++;
7704 } else {
7705 value_start = cursor;
7706 while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
7707 value_end = cursor;
7708 }
7709
7710 if (indicator) {
7711 while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
7712 } else {
7713 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7714 if (cursor != end) return false;
7715 }
7716
7717 // Here, we need to do some processing on the key to swap out dashes for
7718 // underscores. We only need to do this if there _is_ a dash in the key.
7719 pm_string_t key;
7720 const size_t key_length = (size_t) (key_end - key_start);
7721 const uint8_t *dash = pm_memchr(key_start, '-', key_length, parser->encoding_changed, parser->encoding);
7722
7723 if (dash == NULL) {
7724 pm_string_shared_init(&key, key_start, key_end);
7725 } else {
7726 uint8_t *buffer = xmalloc(key_length);
7727 if (buffer == NULL) break;
7728
7729 memcpy(buffer, key_start, key_length);
7730 buffer[dash - key_start] = '_';
7731
7732 while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
7733 buffer[dash - key_start] = '_';
7734 }
7735
7736 pm_string_owned_init(&key, buffer, key_length);
7737 }
7738
7739 // Finally, we can start checking the key against the list of known
7740 // magic comment keys, and potentially change state based on that.
7741 const uint8_t *key_source = pm_string_source(&key);
7742 uint32_t value_length = (uint32_t) (value_end - value_start);
7743
7744 // We only want to attempt to compare against encoding comments if it's
7745 // the first line in the file (or the second in the case of a shebang).
7746 if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
7747 if (
7748 (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
7749 (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
7750 ) {
7751 result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
7752 }
7753 }
7754
7755 if (key_length == 11) {
7756 if (pm_strncasecmp(key_source, (const uint8_t *) "warn_indent", 11) == 0) {
7757 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
7758 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
7759 PM_PARSER_WARN_TOKEN_FORMAT(
7760 parser,
7761 &parser->current,
7762 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7763 (int) key_length,
7764 (const char *) key_source,
7765 (int) value_length,
7766 (const char *) value_start
7767 );
7768 break;
7769 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
7770 parser->warn_mismatched_indentation = false;
7771 break;
7772 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
7773 parser->warn_mismatched_indentation = true;
7774 break;
7775 }
7776 }
7777 } else if (key_length == 21) {
7778 if (pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
7779 // We only want to handle frozen string literal comments if it's
7780 // before any semantic tokens have been seen.
7781 if (semantic_token_seen) {
7782 pm_parser_warn_token(parser, &parser->current, PM_WARN_IGNORED_FROZEN_STRING_LITERAL);
7783 } else {
7784 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
7785 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
7786 PM_PARSER_WARN_TOKEN_FORMAT(
7787 parser,
7788 &parser->current,
7789 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7790 (int) key_length,
7791 (const char *) key_source,
7792 (int) value_length,
7793 (const char *) value_start
7794 );
7795 break;
7796 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
7797 parser->frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED;
7798 break;
7799 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
7800 parser->frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED;
7801 break;
7802 }
7803 }
7804 }
7805 } else if (key_length == 24) {
7806 if (pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
7807 const uint8_t *cursor = parser->current.start;
7808 while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
7809
7810 if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
7811 pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
7812 } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
7813 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
7814 } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
7815 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
7816 } else if (value_length == 23 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_everything", 23) == 0) {
7817 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING);
7818 } else if (value_length == 17 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_copy", 17) == 0) {
7819 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY);
7820 } else {
7821 PM_PARSER_WARN_TOKEN_FORMAT(
7822 parser,
7823 &parser->current,
7824 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7825 (int) key_length,
7826 (const char *) key_source,
7827 (int) value_length,
7828 (const char *) value_start
7829 );
7830 }
7831 }
7832 }
7833
7834 // When we're done, we want to free the string in case we had to
7835 // allocate memory for it.
7836 pm_string_cleanup(&key);
7837
7838 // Allocate a new magic comment node to append to the parser's list.
7839 pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) pm_arena_alloc(&parser->metadata_arena, sizeof(pm_magic_comment_t), PRISM_ALIGNOF(pm_magic_comment_t));
7840 magic_comment->node.next = NULL;
7841 magic_comment->key = (pm_location_t) { .start = U32(key_start - parser->start), .length = U32(key_length) };
7842 magic_comment->value = (pm_location_t) { .start = U32(value_start - parser->start), .length = value_length };
7843 pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
7844 }
7845
7846 return result;
7847}
7848
7849/******************************************************************************/
7850/* Context manipulations */
7851/******************************************************************************/
7852
7853static const uint32_t context_terminators[] = {
7854 [PM_CONTEXT_NONE] = 0,
7855 [PM_CONTEXT_BEGIN] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7856 [PM_CONTEXT_BEGIN_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7857 [PM_CONTEXT_BEGIN_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7858 [PM_CONTEXT_BEGIN_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7859 [PM_CONTEXT_BLOCK_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
7860 [PM_CONTEXT_BLOCK_KEYWORDS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7861 [PM_CONTEXT_BLOCK_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7862 [PM_CONTEXT_BLOCK_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7863 [PM_CONTEXT_BLOCK_PARAMETERS] = (1U << PM_TOKEN_PIPE),
7864 [PM_CONTEXT_BLOCK_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7865 [PM_CONTEXT_CASE_WHEN] = (1U << PM_TOKEN_KEYWORD_WHEN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
7866 [PM_CONTEXT_CASE_IN] = (1U << PM_TOKEN_KEYWORD_IN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
7867 [PM_CONTEXT_CLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7868 [PM_CONTEXT_CLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7869 [PM_CONTEXT_CLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7870 [PM_CONTEXT_CLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7871 [PM_CONTEXT_DEF] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7872 [PM_CONTEXT_DEF_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7873 [PM_CONTEXT_DEF_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7874 [PM_CONTEXT_DEF_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7875 [PM_CONTEXT_DEF_PARAMS] = (1U << PM_TOKEN_EOF),
7876 [PM_CONTEXT_DEFINED] = (1U << PM_TOKEN_EOF),
7877 [PM_CONTEXT_DEFAULT_PARAMS] = (1U << PM_TOKEN_COMMA) | (1U << PM_TOKEN_PARENTHESIS_RIGHT),
7878 [PM_CONTEXT_ELSE] = (1U << PM_TOKEN_KEYWORD_END),
7879 [PM_CONTEXT_ELSIF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
7880 [PM_CONTEXT_EMBEXPR] = (1U << PM_TOKEN_EMBEXPR_END),
7881 [PM_CONTEXT_FOR] = (1U << PM_TOKEN_KEYWORD_END),
7882 [PM_CONTEXT_FOR_INDEX] = (1U << PM_TOKEN_KEYWORD_IN),
7883 [PM_CONTEXT_IF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
7884 [PM_CONTEXT_LAMBDA_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
7885 [PM_CONTEXT_LAMBDA_DO_END] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7886 [PM_CONTEXT_LAMBDA_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7887 [PM_CONTEXT_LAMBDA_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7888 [PM_CONTEXT_LAMBDA_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7889 [PM_CONTEXT_LOOP_PREDICATE] = (1U << PM_TOKEN_KEYWORD_DO) | (1U << PM_TOKEN_KEYWORD_THEN),
7890 [PM_CONTEXT_MAIN] = (1U << PM_TOKEN_EOF),
7891 [PM_CONTEXT_MODULE] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7892 [PM_CONTEXT_MODULE_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7893 [PM_CONTEXT_MODULE_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7894 [PM_CONTEXT_MODULE_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7895 [PM_CONTEXT_MULTI_TARGET] = (1U << PM_TOKEN_EOF),
7896 [PM_CONTEXT_PARENS] = (1U << PM_TOKEN_PARENTHESIS_RIGHT),
7897 [PM_CONTEXT_POSTEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
7898 [PM_CONTEXT_PREDICATE] = (1U << PM_TOKEN_KEYWORD_THEN) | (1U << PM_TOKEN_NEWLINE) | (1U << PM_TOKEN_SEMICOLON),
7899 [PM_CONTEXT_PREEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
7900 [PM_CONTEXT_RESCUE_MODIFIER] = (1U << PM_TOKEN_EOF),
7901 [PM_CONTEXT_SCLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7902 [PM_CONTEXT_SCLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7903 [PM_CONTEXT_SCLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7904 [PM_CONTEXT_SCLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7905 [PM_CONTEXT_TERNARY] = (1U << PM_TOKEN_EOF),
7906 [PM_CONTEXT_UNLESS] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7907 [PM_CONTEXT_UNTIL] = (1U << PM_TOKEN_KEYWORD_END),
7908 [PM_CONTEXT_WHILE] = (1U << PM_TOKEN_KEYWORD_END),
7909};
7910
7911static PRISM_INLINE bool
7912context_terminator(pm_context_t context, pm_token_t *token) {
7913 return token->type < 32 && (context_terminators[context] & (1U << token->type));
7914}
7915
7920static pm_context_t
7921context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
7922 pm_context_node_t *context_node = parser->current_context;
7923
7924 while (context_node != NULL) {
7925 if (context_terminator(context_node->context, token)) return context_node->context;
7926 context_node = context_node->prev;
7927 }
7928
7929 return PM_CONTEXT_NONE;
7930}
7931
7932static bool
7933context_push(pm_parser_t *parser, pm_context_t context) {
7934 pm_context_node_t *context_node = (pm_context_node_t *) xmalloc(sizeof(pm_context_node_t));
7935 if (context_node == NULL) return false;
7936
7937 *context_node = (pm_context_node_t) { .context = context, .prev = NULL };
7938
7939 if (parser->current_context == NULL) {
7940 parser->current_context = context_node;
7941 } else {
7942 context_node->prev = parser->current_context;
7943 parser->current_context = context_node;
7944 }
7945
7946 return true;
7947}
7948
7949static void
7950context_pop(pm_parser_t *parser) {
7951 pm_context_node_t *prev = parser->current_context->prev;
7952 xfree_sized(parser->current_context, sizeof(pm_context_node_t));
7953 parser->current_context = prev;
7954}
7955
7956static bool
7957context_p(const pm_parser_t *parser, pm_context_t context) {
7958 pm_context_node_t *context_node = parser->current_context;
7959
7960 while (context_node != NULL) {
7961 if (context_node->context == context) return true;
7962 context_node = context_node->prev;
7963 }
7964
7965 return false;
7966}
7967
7968static bool
7969context_def_p(const pm_parser_t *parser) {
7970 pm_context_node_t *context_node = parser->current_context;
7971
7972 while (context_node != NULL) {
7973 switch (context_node->context) {
7974 case PM_CONTEXT_DEF:
7975 case PM_CONTEXT_DEF_PARAMS:
7976 case PM_CONTEXT_DEF_ENSURE:
7977 case PM_CONTEXT_DEF_RESCUE:
7978 case PM_CONTEXT_DEF_ELSE:
7979 return true;
7980 case PM_CONTEXT_CLASS:
7981 case PM_CONTEXT_CLASS_ENSURE:
7982 case PM_CONTEXT_CLASS_RESCUE:
7983 case PM_CONTEXT_CLASS_ELSE:
7984 case PM_CONTEXT_MODULE:
7985 case PM_CONTEXT_MODULE_ENSURE:
7986 case PM_CONTEXT_MODULE_RESCUE:
7987 case PM_CONTEXT_MODULE_ELSE:
7988 case PM_CONTEXT_SCLASS:
7989 case PM_CONTEXT_SCLASS_ENSURE:
7990 case PM_CONTEXT_SCLASS_RESCUE:
7991 case PM_CONTEXT_SCLASS_ELSE:
7992 return false;
7993 default:
7994 context_node = context_node->prev;
7995 }
7996 }
7997
7998 return false;
7999}
8000
8005static const char *
8006context_human(pm_context_t context) {
8007 switch (context) {
8008 case PM_CONTEXT_NONE:
8009 assert(false && "unreachable");
8010 return "";
8011 case PM_CONTEXT_BEGIN: return "begin statement";
8012 case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
8013 case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
8014 case PM_CONTEXT_BLOCK_PARAMETERS: return "'|'..'|' block parameter";
8015 case PM_CONTEXT_CASE_WHEN: return "'when' clause";
8016 case PM_CONTEXT_CASE_IN: return "'in' clause";
8017 case PM_CONTEXT_CLASS: return "class definition";
8018 case PM_CONTEXT_DEF: return "method definition";
8019 case PM_CONTEXT_DEF_PARAMS: return "method parameters";
8020 case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
8021 case PM_CONTEXT_DEFINED: return "'defined?' expression";
8022 case PM_CONTEXT_ELSE:
8023 case PM_CONTEXT_BEGIN_ELSE:
8024 case PM_CONTEXT_BLOCK_ELSE:
8025 case PM_CONTEXT_CLASS_ELSE:
8026 case PM_CONTEXT_DEF_ELSE:
8027 case PM_CONTEXT_LAMBDA_ELSE:
8028 case PM_CONTEXT_MODULE_ELSE:
8029 case PM_CONTEXT_SCLASS_ELSE: return "'else' clause";
8030 case PM_CONTEXT_ELSIF: return "'elsif' clause";
8031 case PM_CONTEXT_EMBEXPR: return "embedded expression";
8032 case PM_CONTEXT_BEGIN_ENSURE:
8033 case PM_CONTEXT_BLOCK_ENSURE:
8034 case PM_CONTEXT_CLASS_ENSURE:
8035 case PM_CONTEXT_DEF_ENSURE:
8036 case PM_CONTEXT_LAMBDA_ENSURE:
8037 case PM_CONTEXT_MODULE_ENSURE:
8038 case PM_CONTEXT_SCLASS_ENSURE: return "'ensure' clause";
8039 case PM_CONTEXT_FOR: return "for loop";
8040 case PM_CONTEXT_FOR_INDEX: return "for loop index";
8041 case PM_CONTEXT_IF: return "if statement";
8042 case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
8043 case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
8044 case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
8045 case PM_CONTEXT_MAIN: return "top level context";
8046 case PM_CONTEXT_MODULE: return "module definition";
8047 case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
8048 case PM_CONTEXT_PARENS: return "parentheses";
8049 case PM_CONTEXT_POSTEXE: return "'END' block";
8050 case PM_CONTEXT_PREDICATE: return "predicate";
8051 case PM_CONTEXT_PREEXE: return "'BEGIN' block";
8052 case PM_CONTEXT_BEGIN_RESCUE:
8053 case PM_CONTEXT_BLOCK_RESCUE:
8054 case PM_CONTEXT_CLASS_RESCUE:
8055 case PM_CONTEXT_DEF_RESCUE:
8056 case PM_CONTEXT_LAMBDA_RESCUE:
8057 case PM_CONTEXT_MODULE_RESCUE:
8058 case PM_CONTEXT_RESCUE_MODIFIER:
8059 case PM_CONTEXT_SCLASS_RESCUE: return "'rescue' clause";
8060 case PM_CONTEXT_SCLASS: return "singleton class definition";
8061 case PM_CONTEXT_TERNARY: return "ternary expression";
8062 case PM_CONTEXT_UNLESS: return "unless statement";
8063 case PM_CONTEXT_UNTIL: return "until statement";
8064 case PM_CONTEXT_WHILE: return "while statement";
8065 }
8066
8067 assert(false && "unreachable");
8068 return "";
8069}
8070
8071/******************************************************************************/
8072/* Specific token lexers */
8073/******************************************************************************/
8074
8075static PRISM_INLINE void
8076pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
8077 if (invalid != NULL) {
8078 pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
8079 pm_parser_err(parser, U32(invalid - parser->start), 1, diag_id);
8080 }
8081}
8082
8083static size_t
8084pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
8085 const uint8_t *invalid = NULL;
8086 size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
8087 pm_strspn_number_validate(parser, string, length, invalid);
8088 return length;
8089}
8090
8091static size_t
8092pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8093 const uint8_t *invalid = NULL;
8094 size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
8095 pm_strspn_number_validate(parser, string, length, invalid);
8096 return length;
8097}
8098
8099static size_t
8100pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8101 const uint8_t *invalid = NULL;
8102 size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
8103 pm_strspn_number_validate(parser, string, length, invalid);
8104 return length;
8105}
8106
8107static size_t
8108pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8109 const uint8_t *invalid = NULL;
8110 size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
8111 pm_strspn_number_validate(parser, string, length, invalid);
8112 return length;
8113}
8114
8115static pm_token_type_t
8116lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
8117 pm_token_type_t type = PM_TOKEN_INTEGER;
8118
8119 // Here we're going to attempt to parse the optional decimal portion of a
8120 // float. If it's not there, then it's okay and we'll just continue on.
8121 if (peek(parser) == '.') {
8122 if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8123 parser->current.end += 2;
8124 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8125 type = PM_TOKEN_FLOAT;
8126 } else {
8127 // If we had a . and then something else, then it's not a float
8128 // suffix on a number it's a method call or something else.
8129 return type;
8130 }
8131 }
8132
8133 // Here we're going to attempt to parse the optional exponent portion of a
8134 // float. If it's not there, it's okay and we'll just continue on.
8135 if ((peek(parser) == 'e') || (peek(parser) == 'E')) {
8136 if ((peek_offset(parser, 1) == '+') || (peek_offset(parser, 1) == '-')) {
8137 parser->current.end += 2;
8138
8139 if (pm_char_is_decimal_digit(peek(parser))) {
8140 parser->current.end++;
8141 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8142 } else {
8143 pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
8144 }
8145 } else if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8146 parser->current.end++;
8147 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8148 } else {
8149 return type;
8150 }
8151
8152 *seen_e = true;
8153 type = PM_TOKEN_FLOAT;
8154 }
8155
8156 return type;
8157}
8158
8159static pm_token_type_t
8160lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8161 pm_token_type_t type = PM_TOKEN_INTEGER;
8162 *seen_e = false;
8163
8164 if (peek_offset(parser, -1) == '0') {
8165 switch (*parser->current.end) {
8166 // 0d1111 is a decimal number
8167 case 'd':
8168 case 'D':
8169 parser->current.end++;
8170 if (pm_char_is_decimal_digit(peek(parser))) {
8171 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8172 } else {
8173 match(parser, '_');
8174 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
8175 }
8176
8177 break;
8178
8179 // 0b1111 is a binary number
8180 case 'b':
8181 case 'B':
8182 parser->current.end++;
8183 if (pm_char_is_binary_digit(peek(parser))) {
8184 parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
8185 } else {
8186 match(parser, '_');
8187 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
8188 }
8189
8190 parser->integer.base = PM_INTEGER_BASE_FLAGS_BINARY;
8191 break;
8192
8193 // 0o1111 is an octal number
8194 case 'o':
8195 case 'O':
8196 parser->current.end++;
8197 if (pm_char_is_octal_digit(peek(parser))) {
8198 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8199 } else {
8200 match(parser, '_');
8201 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
8202 }
8203
8204 parser->integer.base = PM_INTEGER_BASE_FLAGS_OCTAL;
8205 break;
8206
8207 // 01111 is an octal number
8208 case '_':
8209 case '0':
8210 case '1':
8211 case '2':
8212 case '3':
8213 case '4':
8214 case '5':
8215 case '6':
8216 case '7':
8217 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8218 parser->integer.base = PM_INTEGER_BASE_FLAGS_OCTAL;
8219 break;
8220
8221 // 0x1111 is a hexadecimal number
8222 case 'x':
8223 case 'X':
8224 parser->current.end++;
8225 if (pm_char_is_hexadecimal_digit(peek(parser))) {
8226 parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
8227 } else {
8228 match(parser, '_');
8229 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
8230 }
8231
8232 parser->integer.base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
8233 break;
8234
8235 // 0.xxx is a float
8236 case '.': {
8237 type = lex_optional_float_suffix(parser, seen_e);
8238 break;
8239 }
8240
8241 // 0exxx is a float
8242 case 'e':
8243 case 'E': {
8244 type = lex_optional_float_suffix(parser, seen_e);
8245 break;
8246 }
8247 }
8248 } else {
8249 // If it didn't start with a 0, then we'll lex as far as we can into a
8250 // decimal number. We compute the integer value inline to avoid
8251 // re-scanning the digits later in pm_integer_parse.
8252 {
8253 const uint8_t *cursor = parser->current.end;
8254 const uint8_t *end = parser->end;
8255 uint64_t value = (uint64_t) (cursor[-1] - '0');
8256
8257 bool has_underscore = false;
8258 bool prev_underscore = false;
8259 const uint8_t *invalid = NULL;
8260
8261 while (cursor < end) {
8262 uint8_t c = *cursor;
8263 if (c >= '0' && c <= '9') {
8264 if (value <= UINT32_MAX) value = value * 10 + (uint64_t) (c - '0');
8265 prev_underscore = false;
8266 cursor++;
8267 } else if (c == '_') {
8268 has_underscore = true;
8269 if (prev_underscore && invalid == NULL) invalid = cursor;
8270 prev_underscore = true;
8271 cursor++;
8272 } else {
8273 break;
8274 }
8275 }
8276
8277 if (has_underscore) {
8278 if (prev_underscore && invalid == NULL) invalid = cursor - 1;
8279 pm_strspn_number_validate(parser, parser->current.end, (size_t) (cursor - parser->current.end), invalid);
8280 }
8281
8282 if (value <= UINT32_MAX) {
8283 parser->integer.value = (uint32_t) value;
8284 parser->integer.lexed = true;
8285 }
8286
8287 parser->current.end = cursor;
8288 }
8289
8290 // Afterward, we'll lex as far as we can into an optional float suffix.
8291 // Guard the function call: the vast majority of decimal numbers are
8292 // plain integers, so avoid the call when the next byte cannot start a
8293 // float suffix.
8294 {
8295 uint8_t next = peek(parser);
8296 if (next == '.' || next == 'e' || next == 'E') {
8297 type = lex_optional_float_suffix(parser, seen_e);
8298
8299 // If it turned out to be a float, the cached integer value is
8300 // invalid.
8301 if (type != PM_TOKEN_INTEGER) {
8302 parser->integer.lexed = false;
8303 }
8304 }
8305 }
8306 }
8307
8308 // At this point we have a completed number, but we want to provide the user
8309 // with a good experience if they put an additional .xxx fractional
8310 // component on the end, so we'll check for that here.
8311 if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8312 const uint8_t *fraction_start = parser->current.end;
8313 const uint8_t *fraction_end = parser->current.end + 2;
8314 fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
8315 pm_parser_err(parser, U32(fraction_start - parser->start), U32(fraction_end - fraction_start), PM_ERR_INVALID_NUMBER_FRACTION);
8316 }
8317
8318 return type;
8319}
8320
8321static pm_token_type_t
8322lex_numeric(pm_parser_t *parser) {
8323 pm_token_type_t type = PM_TOKEN_INTEGER;
8324 parser->integer.base = PM_INTEGER_BASE_FLAGS_DECIMAL;
8325 parser->integer.lexed = false;
8326
8327 if (parser->current.end < parser->end) {
8328 bool seen_e = false;
8329 type = lex_numeric_prefix(parser, &seen_e);
8330
8331 const uint8_t *end = parser->current.end;
8332 pm_token_type_t suffix_type = type;
8333
8334 if (type == PM_TOKEN_INTEGER) {
8335 if (match(parser, 'r')) {
8336 suffix_type = PM_TOKEN_INTEGER_RATIONAL;
8337
8338 if (match(parser, 'i')) {
8339 suffix_type = PM_TOKEN_INTEGER_RATIONAL_IMAGINARY;
8340 }
8341 } else if (match(parser, 'i')) {
8342 suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
8343 }
8344 } else {
8345 if (!seen_e && match(parser, 'r')) {
8346 suffix_type = PM_TOKEN_FLOAT_RATIONAL;
8347
8348 if (match(parser, 'i')) {
8349 suffix_type = PM_TOKEN_FLOAT_RATIONAL_IMAGINARY;
8350 }
8351 } else if (match(parser, 'i')) {
8352 suffix_type = PM_TOKEN_FLOAT_IMAGINARY;
8353 }
8354 }
8355
8356 const uint8_t b = peek(parser);
8357 if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
8358 parser->current.end = end;
8359 } else {
8360 type = suffix_type;
8361 }
8362 }
8363
8364 return type;
8365}
8366
8367static pm_token_type_t
8368lex_global_variable(pm_parser_t *parser) {
8369 if (parser->current.end >= parser->end) {
8370 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
8371 return PM_TOKEN_GLOBAL_VARIABLE;
8372 }
8373
8374 // True if multiple characters are allowed after the declaration of the
8375 // global variable. Not true when it starts with "$-".
8376 bool allow_multiple = true;
8377
8378 switch (*parser->current.end) {
8379 case '~': // $~: match-data
8380 case '*': // $*: argv
8381 case '$': // $$: pid
8382 case '?': // $?: last status
8383 case '!': // $!: error string
8384 case '@': // $@: error position
8385 case '/': // $/: input record separator
8386 case '\\': // $\: output record separator
8387 case ';': // $;: field separator
8388 case ',': // $,: output field separator
8389 case '.': // $.: last read line number
8390 case '=': // $=: ignorecase
8391 case ':': // $:: load path
8392 case '<': // $<: reading filename
8393 case '>': // $>: default output handle
8394 case '\"': // $": already loaded files
8395 parser->current.end++;
8396 return PM_TOKEN_GLOBAL_VARIABLE;
8397
8398 case '&': // $&: last match
8399 case '`': // $`: string before last match
8400 case '\'': // $': string after last match
8401 case '+': // $+: string matches last paren.
8402 parser->current.end++;
8403 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_BACK_REFERENCE;
8404
8405 case '0': {
8406 parser->current.end++;
8407 size_t width;
8408
8409 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
8410 do {
8411 parser->current.end += width;
8412 } while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
8413
8414 // $0 isn't allowed to be followed by anything.
8415 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8416 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->current, diag_id);
8417 }
8418
8419 return PM_TOKEN_GLOBAL_VARIABLE;
8420 }
8421
8422 case '1':
8423 case '2':
8424 case '3':
8425 case '4':
8426 case '5':
8427 case '6':
8428 case '7':
8429 case '8':
8430 case '9':
8431 parser->current.end += pm_strspn_decimal_digit(parser->current.end, parser->end - parser->current.end);
8432 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_NUMBERED_REFERENCE;
8433
8434 case '-':
8435 parser->current.end++;
8436 allow_multiple = false;
8438 default: {
8439 size_t width;
8440
8441 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
8442 do {
8443 parser->current.end += width;
8444 } while (allow_multiple && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
8445 } else if (pm_char_is_whitespace(peek(parser))) {
8446 // If we get here, then we have a $ followed by whitespace,
8447 // which is not allowed.
8448 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
8449 } else {
8450 // If we get here, then we have a $ followed by something that
8451 // isn't recognized as a global variable.
8452 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8453 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8454 PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), diag_id, (int) (PM_TOKEN_LENGTH(&parser->current) + U32(width)), (const char *) parser->current.start);
8455 }
8456
8457 return PM_TOKEN_GLOBAL_VARIABLE;
8458 }
8459 }
8460}
8461
8474static PRISM_INLINE pm_token_type_t
8475lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
8476 if (memcmp(current_start, value, vlen) == 0) {
8477 pm_lex_state_t last_state = parser->lex_state;
8478
8479 if (parser->lex_state & PM_LEX_STATE_FNAME) {
8480 lex_state_set(parser, PM_LEX_STATE_ENDFN);
8481 } else {
8482 lex_state_set(parser, state);
8483 if (state == PM_LEX_STATE_BEG) {
8484 parser->command_start = true;
8485 }
8486
8487 if ((modifier_type != PM_TOKEN_EOF) && !(last_state & (PM_LEX_STATE_BEG | PM_LEX_STATE_LABELED | PM_LEX_STATE_CLASS))) {
8488 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
8489 return modifier_type;
8490 }
8491 }
8492
8493 return type;
8494 }
8495
8496 return PM_TOKEN_EOF;
8497}
8498
8499static pm_token_type_t
8500lex_identifier(pm_parser_t *parser, bool previous_command_start) {
8501 // Lex as far as we can into the current identifier.
8502 size_t width;
8503 const uint8_t *end = parser->end;
8504 const uint8_t *current_start = parser->current.start;
8505 const uint8_t *current_end = parser->current.end;
8506 bool encoding_changed = parser->encoding_changed;
8507
8508 if (encoding_changed) {
8509 while ((width = char_is_identifier(parser, current_end, end - current_end)) > 0) {
8510 current_end += width;
8511 }
8512 } else {
8513 // Fast path: scan ASCII identifier bytes using wide operations.
8514 current_end += scan_identifier_ascii(current_end, end);
8515
8516 // Byte-at-a-time fallback for the tail and any UTF-8 sequences.
8517 while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
8518 current_end += width;
8519 }
8520 }
8521 parser->current.end = current_end;
8522
8523 // Now cache the length of the identifier so that we can quickly compare it
8524 // against known keywords.
8525 width = (size_t) (current_end - current_start);
8526
8527 if (current_end < end) {
8528 if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
8529 // First we'll attempt to extend the identifier by a ! or ?. Then we'll
8530 // check if we're returning the defined? keyword or just an identifier.
8531 width++;
8532
8533 if (
8534 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
8535 (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
8536 ) {
8537 // If we're in a position where we can accept a : at the end of an
8538 // identifier, then we'll optionally accept it.
8539 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
8540 (void) match(parser, ':');
8541 return PM_TOKEN_LABEL;
8542 }
8543
8544 if (parser->lex_state != PM_LEX_STATE_DOT) {
8545 if (width == 8 && (lex_keyword(parser, current_start, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
8546 return PM_TOKEN_KEYWORD_DEFINED;
8547 }
8548 }
8549
8550 return PM_TOKEN_METHOD_NAME;
8551 }
8552
8553 if (lex_state_p(parser, PM_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
8554 // If we're in a position where we can accept a = at the end of an
8555 // identifier, then we'll optionally accept it.
8556 return PM_TOKEN_IDENTIFIER;
8557 }
8558
8559 if (
8560 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
8561 peek(parser) == ':' && peek_offset(parser, 1) != ':'
8562 ) {
8563 // If we're in a position where we can accept a : at the end of an
8564 // identifier, then we'll optionally accept it.
8565 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
8566 (void) match(parser, ':');
8567 return PM_TOKEN_LABEL;
8568 }
8569 }
8570
8571 if (parser->lex_state != PM_LEX_STATE_DOT) {
8572 pm_token_type_t type;
8573 switch (width) {
8574 case 2:
8575 if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
8576 if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
8577 return PM_TOKEN_KEYWORD_DO;
8578 }
8579 if (pm_do_loop_stack_p(parser)) {
8580 return PM_TOKEN_KEYWORD_DO_LOOP;
8581 }
8582 if (!pm_accepts_block_stack_p(parser)) {
8583 return PM_TOKEN_KEYWORD_DO_BLOCK;
8584 }
8585 return PM_TOKEN_KEYWORD_DO;
8586 }
8587
8588 if ((type = lex_keyword(parser, current_start, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
8589 if ((type = lex_keyword(parser, current_start, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8590 if ((type = lex_keyword(parser, current_start, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8591 break;
8592 case 3:
8593 if ((type = lex_keyword(parser, current_start, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8594 if ((type = lex_keyword(parser, current_start, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8595 if ((type = lex_keyword(parser, current_start, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8596 if ((type = lex_keyword(parser, current_start, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8597 if ((type = lex_keyword(parser, current_start, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8598 if ((type = lex_keyword(parser, current_start, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8599 if ((type = lex_keyword(parser, current_start, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8600 break;
8601 case 4:
8602 if ((type = lex_keyword(parser, current_start, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8603 if ((type = lex_keyword(parser, current_start, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8604 if ((type = lex_keyword(parser, current_start, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8605 if ((type = lex_keyword(parser, current_start, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8606 if ((type = lex_keyword(parser, current_start, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8607 if ((type = lex_keyword(parser, current_start, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8608 if ((type = lex_keyword(parser, current_start, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8609 if ((type = lex_keyword(parser, current_start, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8610 break;
8611 case 5:
8612 if ((type = lex_keyword(parser, current_start, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8613 if ((type = lex_keyword(parser, current_start, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8614 if ((type = lex_keyword(parser, current_start, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8615 if ((type = lex_keyword(parser, current_start, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8616 if ((type = lex_keyword(parser, current_start, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8617 if ((type = lex_keyword(parser, current_start, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8618 if ((type = lex_keyword(parser, current_start, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8619 if ((type = lex_keyword(parser, current_start, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8620 if ((type = lex_keyword(parser, current_start, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8621 if ((type = lex_keyword(parser, current_start, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8622 if ((type = lex_keyword(parser, current_start, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
8623 if ((type = lex_keyword(parser, current_start, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
8624 if ((type = lex_keyword(parser, current_start, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8625 break;
8626 case 6:
8627 if ((type = lex_keyword(parser, current_start, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8628 if ((type = lex_keyword(parser, current_start, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8629 if ((type = lex_keyword(parser, current_start, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
8630 if ((type = lex_keyword(parser, current_start, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8631 if ((type = lex_keyword(parser, current_start, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
8632 break;
8633 case 8:
8634 if ((type = lex_keyword(parser, current_start, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8635 if ((type = lex_keyword(parser, current_start, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8636 break;
8637 case 12:
8638 if ((type = lex_keyword(parser, current_start, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8639 break;
8640 }
8641 }
8642
8643 if (encoding_changed) {
8644 return parser->encoding->isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
8645 }
8646 return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
8647}
8648
8653static bool
8654current_token_starts_line(pm_parser_t *parser) {
8655 return (parser->current.start == parser->start) || (parser->current.start[-1] == '\n');
8656}
8657
8672static pm_token_type_t
8673lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
8674 // If there is no content following this #, then we're at the end of
8675 // the string and we can safely return string content.
8676 if (pound + 1 >= parser->end) {
8677 parser->current.end = pound + 1;
8678 return PM_TOKEN_STRING_CONTENT;
8679 }
8680
8681 // Now we'll check against the character that follows the #. If it
8682 // constitutes valid interplation, we'll handle that, otherwise we'll return
8683 // 0.
8684 switch (pound[1]) {
8685 case '@': {
8686 // In this case we may have hit an embedded instance or class variable.
8687 if (pound + 2 >= parser->end) {
8688 parser->current.end = pound + 1;
8689 return PM_TOKEN_STRING_CONTENT;
8690 }
8691
8692 // If we're looking at a @ and there's another @, then we'll skip past the
8693 // second @.
8694 const uint8_t *variable = pound + 2;
8695 if (*variable == '@' && pound + 3 < parser->end) variable++;
8696
8697 if (char_is_identifier_start(parser, variable, parser->end - variable)) {
8698 // At this point we're sure that we've either hit an embedded instance
8699 // or class variable. In this case we'll first need to check if we've
8700 // already consumed content.
8701 if (pound > parser->current.start) {
8702 parser->current.end = pound;
8703 return PM_TOKEN_STRING_CONTENT;
8704 }
8705
8706 // Otherwise we need to return the embedded variable token
8707 // and then switch to the embedded variable lex mode.
8708 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
8709 parser->current.end = pound + 1;
8710 return PM_TOKEN_EMBVAR;
8711 }
8712
8713 // If we didn't get a valid interpolation, then this is just regular
8714 // string content. This is like if we get "#@-". In this case the caller
8715 // should keep lexing.
8716 parser->current.end = pound + 1;
8717 return 0;
8718 }
8719 case '$':
8720 // In this case we may have hit an embedded global variable. If there's
8721 // not enough room, then we'll just return string content.
8722 if (pound + 2 >= parser->end) {
8723 parser->current.end = pound + 1;
8724 return PM_TOKEN_STRING_CONTENT;
8725 }
8726
8727 // This is the character that we're going to check to see if it is the
8728 // start of an identifier that would indicate that this is a global
8729 // variable.
8730 const uint8_t *check = pound + 2;
8731
8732 if (pound[2] == '-') {
8733 if (pound + 3 >= parser->end) {
8734 parser->current.end = pound + 2;
8735 return PM_TOKEN_STRING_CONTENT;
8736 }
8737
8738 check++;
8739 }
8740
8741 // If the character that we're going to check is the start of an
8742 // identifier, or we don't have a - and the character is a decimal number
8743 // or a global name punctuation character, then we've hit an embedded
8744 // global variable.
8745 if (
8746 char_is_identifier_start(parser, check, parser->end - check) ||
8747 (pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
8748 ) {
8749 // In this case we've hit an embedded global variable. First check to
8750 // see if we've already consumed content. If we have, then we need to
8751 // return that content as string content first.
8752 if (pound > parser->current.start) {
8753 parser->current.end = pound;
8754 return PM_TOKEN_STRING_CONTENT;
8755 }
8756
8757 // Otherwise, we need to return the embedded variable token and switch
8758 // to the embedded variable lex mode.
8759 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
8760 parser->current.end = pound + 1;
8761 return PM_TOKEN_EMBVAR;
8762 }
8763
8764 // In this case we've hit a #$ that does not indicate a global variable.
8765 // In this case we'll continue lexing past it.
8766 parser->current.end = pound + 1;
8767 return 0;
8768 case '{':
8769 // In this case it's the start of an embedded expression. If we have
8770 // already consumed content, then we need to return that content as string
8771 // content first.
8772 if (pound > parser->current.start) {
8773 parser->current.end = pound;
8774 return PM_TOKEN_STRING_CONTENT;
8775 }
8776
8777 parser->enclosure_nesting++;
8778
8779 // Otherwise we'll skip past the #{ and begin lexing the embedded
8780 // expression.
8781 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBEXPR });
8782 parser->current.end = pound + 2;
8783 parser->command_start = true;
8784 pm_do_loop_stack_push(parser, false);
8785 return PM_TOKEN_EMBEXPR_BEGIN;
8786 default:
8787 // In this case we've hit a # that doesn't constitute interpolation. We'll
8788 // mark that by returning the not provided token type. This tells the
8789 // consumer to keep lexing forward.
8790 parser->current.end = pound + 1;
8791 return 0;
8792 }
8793}
8794
8795static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
8796static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
8797static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
8798static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
8799static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
8800
8804static const bool ascii_printable_chars[] = {
8805 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
8806 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8807 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8808 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8809 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8810 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
8811 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8812 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
8813};
8814
8815static PRISM_INLINE bool
8816char_is_ascii_printable(const uint8_t b) {
8817 return (b < 0x80) && ascii_printable_chars[b];
8818}
8819
8824static PRISM_INLINE uint8_t
8825escape_hexadecimal_digit(const uint8_t value) {
8826 return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
8827}
8828
8834static PRISM_INLINE uint32_t
8835escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length, const pm_location_t *error_location, const uint8_t flags) {
8836 uint32_t value = 0;
8837 for (size_t index = 0; index < length; index++) {
8838 if (index != 0) value <<= 4;
8839 value |= escape_hexadecimal_digit(string[index]);
8840 }
8841
8842 // Here we're going to verify that the value is actually a valid Unicode
8843 // codepoint and not a surrogate pair.
8844 if (value >= 0xD800 && value <= 0xDFFF) {
8845 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8846 // In regexp context, defer the error to regexp encoding
8847 // validation where we can produce a regexp-specific message.
8848 } else if (error_location != NULL) {
8849 pm_parser_err(parser, error_location->start, error_location->length, PM_ERR_ESCAPE_INVALID_UNICODE);
8850 } else {
8851 pm_parser_err(parser, U32(string - parser->start), U32(length), PM_ERR_ESCAPE_INVALID_UNICODE);
8852 }
8853 return 0xFFFD;
8854 }
8855
8856 return value;
8857}
8858
8862static PRISM_INLINE uint8_t
8863escape_byte(uint8_t value, const uint8_t flags) {
8864 if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
8865 if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
8866 return value;
8867}
8868
8872static PRISM_INLINE void
8873escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
8874 // \u escape sequences in string-like structures implicitly change the
8875 // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
8876 // literal.
8877 if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
8878 if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
8879 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8880 // In regexp context, suppress this error — the regexp encoding
8881 // validation will produce a more specific error message.
8882 } else {
8883 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(end - start), PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
8884 }
8885 }
8886
8887 parser->explicit_encoding = PM_ENCODING_UTF_8_ENTRY;
8888 }
8889
8890 if (!pm_buffer_append_unicode_codepoint(buffer, value)) {
8891 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8892 // In regexp context, defer the error to the regexp encoding
8893 // validation which produces a regexp-specific message.
8894 } else {
8895 pm_parser_err(parser, U32(start - parser->start), U32(end - start), PM_ERR_ESCAPE_INVALID_UNICODE);
8896 }
8897
8898 pm_buffer_append_byte(buffer, 0xEF);
8899 pm_buffer_append_byte(buffer, 0xBF);
8900 pm_buffer_append_byte(buffer, 0xBD);
8901 }
8902}
8903
8908static PRISM_INLINE void
8909escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, uint8_t byte) {
8910 if (byte >= 0x80) {
8911 if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
8912 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8913 // In regexp context, suppress this error — the regexp encoding
8914 // validation will produce a more specific error message.
8915 } else {
8916 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
8917 }
8918 }
8919
8920 parser->explicit_encoding = parser->encoding;
8921 }
8922
8923 pm_buffer_append_byte(buffer, byte);
8924}
8925
8941static PRISM_INLINE void
8942escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
8943 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8944 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
8945 }
8946
8947 escape_write_byte_encoded(parser, buffer, flags, byte);
8948}
8949
8953static PRISM_INLINE void
8954escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
8955 size_t width;
8956 if (parser->encoding_changed) {
8957 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8958 } else {
8959 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
8960 }
8961
8962 if (width == 1) {
8963 if (*parser->current.end == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
8964 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
8965 } else if (width > 1) {
8966 // Valid multibyte character. Just ignore escape.
8967 pm_buffer_t *b = (flags & PM_ESCAPE_FLAG_REGEXP) ? regular_expression_buffer : buffer;
8968 pm_buffer_append_bytes(b, parser->current.end, width);
8969 parser->current.end += width;
8970 } else {
8971 // Assume the next character wasn't meant to be part of this escape
8972 // sequence since it is invalid. Add an error and move on.
8973 parser->current.end++;
8974 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
8975 }
8976}
8977
8983static void
8984escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *type) {
8985#define FLAG(value) ((value & PM_ESCAPE_FLAG_CONTROL) ? "\\C-" : (value & PM_ESCAPE_FLAG_META) ? "\\M-" : "")
8986
8987 PM_PARSER_WARN_TOKEN_FORMAT(
8988 parser,
8989 &parser->current,
8990 PM_WARN_INVALID_CHARACTER,
8991 FLAG(flags),
8992 FLAG(flag),
8993 type
8994 );
8995
8996#undef FLAG
8997}
8998
9002static void
9003escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
9004 uint8_t peeked = peek(parser);
9005 switch (peeked) {
9006 case '\\': {
9007 parser->current.end++;
9008 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
9009 return;
9010 }
9011 case '\'': {
9012 parser->current.end++;
9013 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
9014 return;
9015 }
9016 case 'a': {
9017 parser->current.end++;
9018 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
9019 return;
9020 }
9021 case 'b': {
9022 parser->current.end++;
9023 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
9024 return;
9025 }
9026 case 'e': {
9027 parser->current.end++;
9028 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
9029 return;
9030 }
9031 case 'f': {
9032 parser->current.end++;
9033 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
9034 return;
9035 }
9036 case 'n': {
9037 parser->current.end++;
9038 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
9039 return;
9040 }
9041 case 'r': {
9042 parser->current.end++;
9043 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
9044 return;
9045 }
9046 case 's': {
9047 parser->current.end++;
9048 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
9049 return;
9050 }
9051 case 't': {
9052 parser->current.end++;
9053 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
9054 return;
9055 }
9056 case 'v': {
9057 parser->current.end++;
9058 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
9059 return;
9060 }
9061 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
9062 uint8_t value = (uint8_t) (*parser->current.end - '0');
9063 parser->current.end++;
9064
9065 if (pm_char_is_octal_digit(peek(parser))) {
9066 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9067 parser->current.end++;
9068
9069 if (pm_char_is_octal_digit(peek(parser))) {
9070 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9071 parser->current.end++;
9072 }
9073 }
9074
9075 value = escape_byte(value, flags);
9076 escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
9077 return;
9078 }
9079 case 'x': {
9080 const uint8_t *start = parser->current.end - 1;
9081
9082 parser->current.end++;
9083 uint8_t byte = peek(parser);
9084
9085 if (pm_char_is_hexadecimal_digit(byte)) {
9086 uint8_t value = escape_hexadecimal_digit(byte);
9087 parser->current.end++;
9088
9089 byte = peek(parser);
9090 if (pm_char_is_hexadecimal_digit(byte)) {
9091 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
9092 parser->current.end++;
9093 }
9094
9095 value = escape_byte(value, flags);
9096 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9097 if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
9098 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
9099 } else {
9100 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9101 }
9102 }
9103
9104 escape_write_byte_encoded(parser, buffer, flags, value);
9105 } else {
9106 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
9107 }
9108
9109 return;
9110 }
9111 case 'u': {
9112 const uint8_t *start = parser->current.end - 1;
9113 parser->current.end++;
9114
9115 if (parser->current.end == parser->end) {
9116 const uint8_t *start = parser->current.end - 2;
9117 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9118 } else if (peek(parser) == '{') {
9119 const uint8_t *unicode_codepoints_start = parser->current.end - 2;
9120 parser->current.end++;
9121
9122 size_t whitespace;
9123 while (true) {
9124 if ((whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
9125 parser->current.end += whitespace;
9126 } else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
9127 // This is super hacky, but it gets us nicer error
9128 // messages because we can still pass it off to the
9129 // regular expression engine even if we hit an
9130 // unterminated regular expression.
9131 parser->current.end += 2;
9132 } else {
9133 break;
9134 }
9135 }
9136
9137 const uint8_t *extra_codepoints_start = NULL;
9138 int codepoints_count = 0;
9139
9140 while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
9141 const uint8_t *unicode_start = parser->current.end;
9142 size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
9143
9144 if (hexadecimal_length > 6) {
9145 // \u{nnnn} character literal allows only 1-6 hexadecimal digits
9146 pm_parser_err(parser, U32(unicode_start - parser->start), U32(hexadecimal_length), PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
9147 } else if (hexadecimal_length == 0) {
9148 // there are not hexadecimal characters
9149
9150 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9151 // If this is a regular expression, we are going to
9152 // let the regular expression engine handle this
9153 // error instead of us because we don't know at this
9154 // point if we're inside a comment in /x mode.
9155 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9156 } else {
9157 pm_parser_err(parser, PM_TOKEN_END(parser, &parser->current), 0, PM_ERR_ESCAPE_INVALID_UNICODE);
9158 pm_parser_err(parser, PM_TOKEN_END(parser, &parser->current), 0, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9159 }
9160
9161 return;
9162 }
9163
9164 parser->current.end += hexadecimal_length;
9165 codepoints_count++;
9166 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
9167 extra_codepoints_start = unicode_start;
9168 }
9169
9170 uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length, NULL, flags);
9171 escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
9172
9173 parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
9174 }
9175
9176 // ?\u{nnnn} character literal should contain only one codepoint
9177 // and cannot be like ?\u{nnnn mmmm}.
9178 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
9179 pm_parser_err(parser, U32(extra_codepoints_start - parser->start), U32(parser->current.end - 1 - extra_codepoints_start), PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
9180 }
9181
9182 if (parser->current.end == parser->end) {
9183 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
9184 } else if (peek(parser) == '}') {
9185 parser->current.end++;
9186 } else {
9187 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9188 // If this is a regular expression, we are going to let
9189 // the regular expression engine handle this error
9190 // instead of us because we don't know at this point if
9191 // we're inside a comment in /x mode.
9192 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9193 } else {
9194 pm_parser_err(parser, U32(unicode_codepoints_start - parser->start), U32(parser->current.end - unicode_codepoints_start), PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9195 }
9196 }
9197
9198 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9199 pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
9200 }
9201 } else {
9202 size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
9203
9204 if (length == 0) {
9205 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9206 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9207 } else {
9208 const uint8_t *start = parser->current.end - 2;
9209 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9210 }
9211 } else if (length == 4) {
9212 uint32_t value = escape_unicode(parser, parser->current.end, 4, NULL, flags);
9213
9214 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9215 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
9216 }
9217
9218 escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
9219 parser->current.end += 4;
9220 } else {
9221 parser->current.end += length;
9222
9223 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9224 // If this is a regular expression, we are going to let
9225 // the regular expression engine handle this error
9226 // instead of us.
9227 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9228 } else {
9229 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
9230 }
9231 }
9232 }
9233
9234 return;
9235 }
9236 case 'c': {
9237 parser->current.end++;
9238 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9239 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9240 }
9241
9242 if (parser->current.end == parser->end) {
9243 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9244 return;
9245 }
9246
9247 uint8_t peeked = peek(parser);
9248 switch (peeked) {
9249 case '?': {
9250 parser->current.end++;
9251 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9252 return;
9253 }
9254 case '\\':
9255 parser->current.end++;
9256
9257 if (match(parser, 'u') || match(parser, 'U')) {
9258 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
9259 return;
9260 }
9261
9262 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9263 return;
9264 case ' ':
9265 parser->current.end++;
9266 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9267 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9268 return;
9269 case '\t':
9270 parser->current.end++;
9271 escape_read_warn(parser, flags, 0, "\\t");
9272 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9273 return;
9274 default: {
9275 if (!char_is_ascii_printable(peeked)) {
9276 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9277 return;
9278 }
9279
9280 if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
9281 parser->current.end++;
9282 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9283 return;
9284 }
9285 }
9286 }
9287 case 'C': {
9288 parser->current.end++;
9289 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9290 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9291 }
9292
9293 if (peek(parser) != '-') {
9294 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9295 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_CONTROL);
9296 return;
9297 }
9298
9299 parser->current.end++;
9300 if (parser->current.end == parser->end) {
9301 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9302 return;
9303 }
9304
9305 uint8_t peeked = peek(parser);
9306 switch (peeked) {
9307 case '?': {
9308 parser->current.end++;
9309 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9310 return;
9311 }
9312 case '\\':
9313 parser->current.end++;
9314
9315 if (match(parser, 'u') || match(parser, 'U')) {
9316 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
9317 return;
9318 }
9319
9320 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9321 return;
9322 case ' ':
9323 parser->current.end++;
9324 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9325 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9326 return;
9327 case '\t':
9328 parser->current.end++;
9329 escape_read_warn(parser, flags, 0, "\\t");
9330 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9331 return;
9332 default: {
9333 if (!char_is_ascii_printable(peeked)) {
9334 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9335 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_CONTROL);
9336 return;
9337 }
9338
9339 if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
9340 parser->current.end++;
9341 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9342 return;
9343 }
9344 }
9345 }
9346 case 'M': {
9347 parser->current.end++;
9348 if (flags & PM_ESCAPE_FLAG_META) {
9349 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
9350 }
9351
9352 if (peek(parser) != '-') {
9353 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9354 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
9355 return;
9356 }
9357
9358 parser->current.end++;
9359 if (parser->current.end == parser->end) {
9360 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
9361 return;
9362 }
9363
9364 uint8_t peeked = peek(parser);
9365 switch (peeked) {
9366 case '\\':
9367 parser->current.end++;
9368
9369 if (match(parser, 'u') || match(parser, 'U')) {
9370 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
9371 return;
9372 }
9373
9374 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
9375 return;
9376 case ' ':
9377 parser->current.end++;
9378 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_META, "\\s");
9379 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9380 return;
9381 case '\t':
9382 parser->current.end++;
9383 escape_read_warn(parser, flags & ((uint8_t) ~PM_ESCAPE_FLAG_CONTROL), PM_ESCAPE_FLAG_META, "\\t");
9384 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9385 return;
9386 default:
9387 if (!char_is_ascii_printable(peeked)) {
9388 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9389 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
9390 return;
9391 }
9392
9393 if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
9394 parser->current.end++;
9395 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9396 return;
9397 }
9398 }
9399 case '\r': {
9400 if (peek_offset(parser, 1) == '\n') {
9401 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 2);
9402 parser->current.end += 2;
9403 escape_write_byte_encoded(parser, buffer, flags, escape_byte('\n', flags));
9404 return;
9405 }
9407 }
9408 default: {
9409 if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
9410 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9411 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
9412 return;
9413 }
9414 if (parser->current.end < parser->end) {
9415 escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags);
9416 } else {
9417 pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
9418 }
9419 return;
9420 }
9421 }
9422}
9423
9449static pm_token_type_t
9450lex_question_mark(pm_parser_t *parser) {
9451 if (lex_state_end_p(parser)) {
9452 lex_state_set(parser, PM_LEX_STATE_BEG);
9453 return PM_TOKEN_QUESTION_MARK;
9454 }
9455
9456 if (parser->current.end >= parser->end) {
9457 pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
9458 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
9459 return PM_TOKEN_CHARACTER_LITERAL;
9460 }
9461
9462 if (pm_char_is_whitespace(*parser->current.end)) {
9463 lex_state_set(parser, PM_LEX_STATE_BEG);
9464 return PM_TOKEN_QUESTION_MARK;
9465 }
9466
9467 lex_state_set(parser, PM_LEX_STATE_BEG);
9468
9469 if (match(parser, '\\')) {
9470 lex_state_set(parser, PM_LEX_STATE_END);
9471
9472 pm_buffer_t buffer;
9473 pm_buffer_init(&buffer, 3);
9474
9475 escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
9476
9477 // Copy buffer data into the arena and free the heap buffer.
9478 void *arena_data = pm_arena_memdup(parser->arena, buffer.value, buffer.length, PRISM_ALIGNOF(uint8_t));
9479 pm_string_constant_init(&parser->current_string, (const char *) arena_data, buffer.length);
9480 pm_buffer_cleanup(&buffer);
9481
9482 return PM_TOKEN_CHARACTER_LITERAL;
9483 } else {
9484 size_t encoding_width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9485
9486 // Ternary operators can have a ? immediately followed by an identifier
9487 // which starts with an underscore. We check for this case here.
9488 if (
9489 !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
9490 (
9491 (parser->current.end + encoding_width >= parser->end) ||
9492 !char_is_identifier(parser, parser->current.end + encoding_width, parser->end - (parser->current.end + encoding_width))
9493 )
9494 ) {
9495 lex_state_set(parser, PM_LEX_STATE_END);
9496 parser->current.end += encoding_width;
9497 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
9498 return PM_TOKEN_CHARACTER_LITERAL;
9499 }
9500 }
9501
9502 return PM_TOKEN_QUESTION_MARK;
9503}
9504
9509static pm_token_type_t
9510lex_at_variable(pm_parser_t *parser) {
9511 pm_token_type_t type = match(parser, '@') ? PM_TOKEN_CLASS_VARIABLE : PM_TOKEN_INSTANCE_VARIABLE;
9512 const uint8_t *end = parser->end;
9513
9514 size_t width;
9515 if ((width = char_is_identifier_start(parser, parser->current.end, end - parser->current.end)) > 0) {
9516 parser->current.end += width;
9517
9518 while ((width = char_is_identifier(parser, parser->current.end, end - parser->current.end)) > 0) {
9519 parser->current.end += width;
9520 }
9521 } else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) {
9522 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
9523 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) {
9524 diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
9525 }
9526
9527 size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end);
9528 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
9529 } else {
9530 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
9531 pm_parser_err_token(parser, &parser->current, diag_id);
9532 }
9533
9534 // If we're lexing an embedded variable, then we need to pop back into the
9535 // parent lex context.
9536 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
9537 lex_mode_pop(parser);
9538 }
9539
9540 return type;
9541}
9542
9546static PRISM_INLINE void
9547parser_lex_callback(pm_parser_t *parser) {
9548 if (parser->lex_callback.callback) {
9549 parser->lex_callback.callback(parser, &parser->current, parser->lex_callback.data);
9550 }
9551}
9552
9557parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
9558 pm_comment_t *comment = (pm_comment_t *) pm_arena_alloc(&parser->metadata_arena, sizeof(pm_comment_t), PRISM_ALIGNOF(pm_comment_t));
9559
9560 *comment = (pm_comment_t) {
9561 .type = type,
9562 .location = TOK2LOC(parser, &parser->current)
9563 };
9564
9565 return comment;
9566}
9567
9573static pm_token_type_t
9574lex_embdoc(pm_parser_t *parser) {
9575 // First, lex out the EMBDOC_BEGIN token.
9576 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9577
9578 if (newline == NULL) {
9579 parser->current.end = parser->end;
9580 } else {
9581 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
9582 parser->current.end = newline + 1;
9583 }
9584
9585 parser->current.type = PM_TOKEN_EMBDOC_BEGIN;
9586 parser_lex_callback(parser);
9587
9588 // Now, create a comment that is going to be attached to the parser.
9589 const uint8_t *comment_start = parser->current.start;
9590 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
9591
9592 // Now, loop until we find the end of the embedded documentation or the end
9593 // of the file.
9594 while (parser->current.end + 4 <= parser->end) {
9595 parser->current.start = parser->current.end;
9596
9597 // If we've hit the end of the embedded documentation then we'll return
9598 // that token here.
9599 if (
9600 (memcmp(parser->current.end, "=end", 4) == 0) &&
9601 (
9602 (parser->current.end + 4 == parser->end) || // end of file
9603 pm_char_is_whitespace(parser->current.end[4]) || // whitespace
9604 (parser->current.end[4] == '\0') || // NUL or end of script
9605 (parser->current.end[4] == '\004') || // ^D
9606 (parser->current.end[4] == '\032') // ^Z
9607 )
9608 ) {
9609 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9610
9611 if (newline == NULL) {
9612 parser->current.end = parser->end;
9613 } else {
9614 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
9615 parser->current.end = newline + 1;
9616 }
9617
9618 parser->current.type = PM_TOKEN_EMBDOC_END;
9619 parser_lex_callback(parser);
9620
9621 comment->location.length = (uint32_t) (parser->current.end - comment_start);
9622 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9623
9624 return PM_TOKEN_EMBDOC_END;
9625 }
9626
9627 // Otherwise, we'll parse until the end of the line and return a line of
9628 // embedded documentation.
9629 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9630
9631 if (newline == NULL) {
9632 parser->current.end = parser->end;
9633 } else {
9634 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
9635 parser->current.end = newline + 1;
9636 }
9637
9638 parser->current.type = PM_TOKEN_EMBDOC_LINE;
9639 parser_lex_callback(parser);
9640 }
9641
9642 pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
9643
9644 comment->location.length = (uint32_t) (parser->current.end - comment_start);
9645 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9646
9647 return PM_TOKEN_EOF;
9648}
9649
9655static PRISM_INLINE void
9656parser_lex_ignored_newline(pm_parser_t *parser) {
9657 parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
9658 parser_lex_callback(parser);
9659}
9660
9670static PRISM_INLINE void
9671parser_flush_heredoc_end(pm_parser_t *parser) {
9672 assert(parser->heredoc_end <= parser->end);
9673 parser->next_start = parser->heredoc_end;
9674 parser->heredoc_end = NULL;
9675}
9676
9680static bool
9681parser_end_of_line_p(const pm_parser_t *parser) {
9682 const uint8_t *cursor = parser->current.end;
9683
9684 while (cursor < parser->end && *cursor != '\n' && *cursor != '#') {
9685 if (!pm_char_is_inline_whitespace(*cursor++)) return false;
9686 }
9687
9688 return true;
9689}
9690
9709typedef struct {
9715
9720 const uint8_t *cursor;
9722
9742
9746static PRISM_INLINE void
9747pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
9748 pm_buffer_append_byte(&token_buffer->buffer, byte);
9749}
9750
9751static PRISM_INLINE void
9752pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
9753 pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
9754}
9755
9759static PRISM_INLINE size_t
9760parser_char_width(const pm_parser_t *parser) {
9761 size_t width;
9762 if (parser->encoding_changed) {
9763 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9764 } else {
9765 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
9766 }
9767
9768 // TODO: If the character is invalid in the given encoding, then we'll just
9769 // push one byte into the buffer. This should actually be an error.
9770 return (width == 0 ? 1 : width);
9771}
9772
9776static void
9777pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
9778 size_t width = parser_char_width(parser);
9779 pm_buffer_append_bytes(&token_buffer->buffer, parser->current.end, width);
9780 parser->current.end += width;
9781}
9782
9783static void
9784pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
9785 size_t width = parser_char_width(parser);
9786 const uint8_t *start = parser->current.end;
9787 pm_buffer_append_bytes(&token_buffer->base.buffer, start, width);
9788 pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, width);
9789 parser->current.end += width;
9790}
9791
9798static PRISM_INLINE void
9799pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9800 // Copy buffer data into the arena and free the heap buffer.
9801 size_t len = pm_buffer_length(&token_buffer->buffer);
9802 void *arena_data = pm_arena_memdup(parser->arena, pm_buffer_value(&token_buffer->buffer), len, PRISM_ALIGNOF(uint8_t));
9803 pm_string_constant_init(&parser->current_string, (const char *) arena_data, len);
9804 pm_buffer_cleanup(&token_buffer->buffer);
9805}
9806
9807static PRISM_INLINE void
9808pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9809 pm_token_buffer_copy(parser, &token_buffer->base);
9810 pm_buffer_cleanup(&token_buffer->regexp_buffer);
9811}
9812
9822static void
9823pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9824 if (token_buffer->cursor == NULL) {
9825 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
9826 } else {
9827 pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
9828 pm_token_buffer_copy(parser, token_buffer);
9829 }
9830}
9831
9832static void
9833pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9834 if (token_buffer->base.cursor == NULL) {
9835 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
9836 } else {
9837 const uint8_t *cursor = token_buffer->base.cursor;
9838 size_t length = (size_t) (parser->current.end - cursor);
9839 pm_buffer_append_bytes(&token_buffer->base.buffer, cursor, length);
9840 pm_buffer_append_bytes(&token_buffer->regexp_buffer, cursor, length);
9841 pm_regexp_token_buffer_copy(parser, token_buffer);
9842 }
9843}
9844
9845#define PM_TOKEN_BUFFER_DEFAULT_SIZE 16
9846
9855static void
9856pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9857 const uint8_t *start;
9858 if (token_buffer->cursor == NULL) {
9859 pm_buffer_init(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9860 start = parser->current.start;
9861 } else {
9862 start = token_buffer->cursor;
9863 }
9864
9865 const uint8_t *end = parser->current.end - 1;
9866 assert(end >= start);
9867 pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
9868
9869 token_buffer->cursor = end;
9870}
9871
9872static void
9873pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9874 const uint8_t *start;
9875 if (token_buffer->base.cursor == NULL) {
9876 pm_buffer_init(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9877 pm_buffer_init(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9878 start = parser->current.start;
9879 } else {
9880 start = token_buffer->base.cursor;
9881 }
9882
9883 const uint8_t *end = parser->current.end - 1;
9884 pm_buffer_append_bytes(&token_buffer->base.buffer, start, (size_t) (end - start));
9885 pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, (size_t) (end - start));
9886
9887 token_buffer->base.cursor = end;
9888}
9889
9890#undef PM_TOKEN_BUFFER_DEFAULT_SIZE
9891
9896static PRISM_INLINE size_t
9897pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
9898 size_t whitespace = 0;
9899
9900 switch (indent) {
9901 case PM_HEREDOC_INDENT_NONE:
9902 // Do nothing, we can't match a terminator with
9903 // indentation and there's no need to calculate common
9904 // whitespace.
9905 break;
9906 case PM_HEREDOC_INDENT_DASH:
9907 // Skip past inline whitespace.
9908 *cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
9909 break;
9910 case PM_HEREDOC_INDENT_TILDE:
9911 // Skip past inline whitespace and calculate common
9912 // whitespace.
9913 while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
9914 if (**cursor == '\t') {
9915 whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
9916 } else {
9917 whitespace++;
9918 }
9919 (*cursor)++;
9920 }
9921
9922 break;
9923 }
9924
9925 return whitespace;
9926}
9927
9932static uint8_t
9933pm_lex_percent_delimiter(pm_parser_t *parser) {
9934 size_t eol_length = match_eol(parser);
9935
9936 if (eol_length) {
9937 if (parser->heredoc_end) {
9938 // If we have already lexed a heredoc, then the newline has already
9939 // been added to the list. In this case we want to just flush the
9940 // heredoc end.
9941 parser_flush_heredoc_end(parser);
9942 } else {
9943 // Otherwise, we'll add the newline to the list of newlines.
9944 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + U32(eol_length));
9945 }
9946
9947 uint8_t delimiter = *parser->current.end;
9948
9949 // If our delimiter is \r\n, we want to treat it as if it's \n.
9950 // For example, %\r\nfoo\r\n should be "foo"
9951 if (eol_length == 2) {
9952 delimiter = *(parser->current.end + 1);
9953 }
9954
9955 parser->current.end += eol_length;
9956 return delimiter;
9957 }
9958
9959 return *parser->current.end++;
9960}
9961
9966#define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
9967
9974static void
9975parser_lex(pm_parser_t *parser) {
9976 assert(parser->current.end <= parser->end);
9977 parser->previous = parser->current;
9978
9979 // This value mirrors cmd_state from CRuby.
9980 bool previous_command_start = parser->command_start;
9981 parser->command_start = false;
9982
9983 // This is used to communicate to the newline lexing function that we've
9984 // already seen a comment.
9985 bool lexed_comment = false;
9986
9987 // Here we cache the current value of the semantic token seen flag. This is
9988 // used to reset it in case we find a token that shouldn't flip this flag.
9989 unsigned int semantic_token_seen = parser->semantic_token_seen;
9990 parser->semantic_token_seen = true;
9991
9992 // We'll jump to this label when we are about to encounter an EOF.
9993 // If we still have lex_modes on the stack, we pop them so that cleanup
9994 // can happen. For example, we should still continue parsing after a heredoc
9995 // identifier, even if the heredoc body was syntax invalid.
9996 switch_lex_modes:
9997
9998 switch (parser->lex_modes.current->mode) {
9999 case PM_LEX_DEFAULT:
10000 case PM_LEX_EMBEXPR:
10001 case PM_LEX_EMBVAR:
10002
10003 // We have a specific named label here because we are going to jump back to
10004 // this location in the event that we have lexed a token that should not be
10005 // returned to the parser. This includes comments, ignored newlines, and
10006 // invalid tokens of some form.
10007 lex_next_token: {
10008 // If we have the special next_start pointer set, then we're going to jump
10009 // to that location and start lexing from there.
10010 if (parser->next_start != NULL) {
10011 parser->current.end = parser->next_start;
10012 parser->next_start = NULL;
10013 }
10014
10015 // This value mirrors space_seen from CRuby. It tracks whether or not
10016 // space has been eaten before the start of the next token.
10017 bool space_seen = false;
10018
10019 // First, we're going to skip past any whitespace at the front of the next
10020 // token. Skip runs of inline whitespace in bulk to avoid per-character
10021 // stores back to parser->current.end.
10022 bool chomping = true;
10023 while (parser->current.end < parser->end && chomping) {
10024 {
10025 static const uint8_t inline_whitespace[256] = {
10026 [' '] = 1, ['\t'] = 1, ['\f'] = 1, ['\v'] = 1
10027 };
10028 const uint8_t *scan = parser->current.end;
10029 while (scan < parser->end && inline_whitespace[*scan]) scan++;
10030 if (scan > parser->current.end) {
10031 parser->current.end = scan;
10032 space_seen = true;
10033 continue;
10034 }
10035 }
10036
10037 switch (*parser->current.end) {
10038 case '\r':
10039 if (match_eol_offset(parser, 1)) {
10040 chomping = false;
10041 } else {
10042 pm_parser_warn(parser, PM_TOKEN_END(parser, &parser->current), 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
10043 parser->current.end++;
10044 space_seen = true;
10045 }
10046 break;
10047 case '\\': {
10048 size_t eol_length = match_eol_offset(parser, 1);
10049 if (eol_length) {
10050 if (parser->heredoc_end) {
10051 parser->current.end = parser->heredoc_end;
10052 parser->heredoc_end = NULL;
10053 } else {
10054 parser->current.end += eol_length + 1;
10055 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
10056 space_seen = true;
10057 }
10058 } else if (pm_char_is_inline_whitespace(*parser->current.end)) {
10059 parser->current.end += 2;
10060 } else {
10061 chomping = false;
10062 }
10063
10064 break;
10065 }
10066 default:
10067 chomping = false;
10068 break;
10069 }
10070 }
10071
10072 // Next, we'll set to start of this token to be the current end.
10073 parser->current.start = parser->current.end;
10074
10075 // We'll check if we're at the end of the file. If we are, then we
10076 // need to return the EOF token.
10077 if (parser->current.end >= parser->end) {
10078 // We may be missing closing tokens. We should pop modes one by one
10079 // to do the appropriate cleanup like moving next_start for heredocs.
10080 // Only when no mode is remaining will we actually emit the EOF token.
10081 if (parser->lex_modes.current->mode != PM_LEX_DEFAULT) {
10082 lex_mode_pop(parser);
10083 goto switch_lex_modes;
10084 }
10085
10086 // If we hit EOF, but the EOF came immediately after a newline,
10087 // set the start of the token to the newline. This way any EOF
10088 // errors will be reported as happening on that line rather than
10089 // a line after. For example "foo(\n" should report an error
10090 // on line 1 even though EOF technically occurs on line 2.
10091 if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
10092 parser->current.start -= 1;
10093 }
10094 LEX(PM_TOKEN_EOF);
10095 }
10096
10097 // Finally, we'll check the current character to determine the next
10098 // token.
10099 switch (*parser->current.end++) {
10100 case '\0': // NUL or end of script
10101 case '\004': // ^D
10102 case '\032': // ^Z
10103 parser->current.end--;
10104 LEX(PM_TOKEN_EOF);
10105
10106 case '#': { // comments
10107 const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
10108 parser->current.end = ending == NULL ? parser->end : ending;
10109
10110 // If we found a comment while lexing, then we're going to
10111 // add it to the list of comments in the file and keep
10112 // lexing.
10113 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_INLINE);
10114 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10115
10116 if (ending) parser->current.end++;
10117 parser->current.type = PM_TOKEN_COMMENT;
10118 parser_lex_callback(parser);
10119
10120 // Here, parse the comment to see if it's a magic comment
10121 // and potentially change state on the parser.
10122 if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
10123 ptrdiff_t length = parser->current.end - parser->current.start;
10124
10125 // If we didn't find a magic comment within the first
10126 // pass and we're at the start of the file, then we need
10127 // to do another pass to potentially find other patterns
10128 // for encoding comments.
10129 if (length >= 10 && !parser->encoding_locked) {
10130 parser_lex_magic_comment_encoding(parser);
10131 }
10132 }
10133
10134 lexed_comment = true;
10135 }
10137 case '\r':
10138 case '\n': {
10139 parser->semantic_token_seen = semantic_token_seen & 0x1;
10140 size_t eol_length = match_eol_at(parser, parser->current.end - 1);
10141
10142 if (eol_length) {
10143 // The only way you can have carriage returns in this
10144 // particular loop is if you have a carriage return
10145 // followed by a newline. In that case we'll just skip
10146 // over the carriage return and continue lexing, in
10147 // order to make it so that the newline token
10148 // encapsulates both the carriage return and the
10149 // newline. Note that we need to check that we haven't
10150 // already lexed a comment here because that falls
10151 // through into here as well.
10152 if (!lexed_comment) {
10153 parser->current.end += eol_length - 1; // skip CR
10154 }
10155
10156 if (parser->heredoc_end == NULL) {
10157 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
10158 }
10159 }
10160
10161 if (parser->heredoc_end) {
10162 parser_flush_heredoc_end(parser);
10163 }
10164
10165 // If this is an ignored newline, then we can continue lexing after
10166 // calling the callback with the ignored newline token.
10167 switch (lex_state_ignored_p(parser)) {
10168 case PM_IGNORED_NEWLINE_NONE:
10169 break;
10170 case PM_IGNORED_NEWLINE_PATTERN:
10171 if (parser->pattern_matching_newlines || parser->in_keyword_arg) {
10172 if (!lexed_comment) parser_lex_ignored_newline(parser);
10173 lex_state_set(parser, PM_LEX_STATE_BEG);
10174 parser->command_start = true;
10175 parser->current.type = PM_TOKEN_NEWLINE;
10176 return;
10177 }
10179 case PM_IGNORED_NEWLINE_ALL:
10180 if (!lexed_comment) parser_lex_ignored_newline(parser);
10181 lexed_comment = false;
10182 goto lex_next_token;
10183 }
10184
10185 // Here we need to look ahead and see if there is a call operator
10186 // (either . or &.) that starts the next line. If there is, then this
10187 // is going to become an ignored newline and we're going to instead
10188 // return the call operator.
10189 const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
10190 next_content += pm_strspn_inline_whitespace(next_content, parser->end - next_content);
10191
10192 if (next_content < parser->end) {
10193 // If we hit a comment after a newline, then we're going to check
10194 // if it's ignored or if it's followed by a method call ('.').
10195 // If it is, then we're going to call the
10196 // callback with an ignored newline and then continue lexing.
10197 // Otherwise we'll return a regular newline.
10198 if (next_content[0] == '#') {
10199 // Here we look for a "." or "&." following a "\n".
10200 const uint8_t *following = next_newline(next_content, parser->end - next_content);
10201
10202 while (following && (following + 1 < parser->end)) {
10203 following++;
10204 following += pm_strspn_inline_whitespace(following, parser->end - following);
10205
10206 // If this is not followed by a comment, then we can break out
10207 // of this loop.
10208 if (peek_at(parser, following) != '#') break;
10209
10210 // If there is a comment, then we need to find the end of the
10211 // comment and continue searching from there.
10212 following = next_newline(following, parser->end - following);
10213 }
10214
10215 // If the lex state was ignored, we will lex the
10216 // ignored newline.
10217 if (lex_state_ignored_p(parser)) {
10218 if (!lexed_comment) parser_lex_ignored_newline(parser);
10219 lexed_comment = false;
10220 goto lex_next_token;
10221 }
10222
10223 // If we hit a '.' or a '&.' we will lex the ignored
10224 // newline.
10225 if (following && (
10226 (peek_at(parser, following) == '.') ||
10227 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
10228 )) {
10229 if (!lexed_comment) parser_lex_ignored_newline(parser);
10230 lexed_comment = false;
10231 goto lex_next_token;
10232 }
10233
10234
10235 // If we are parsing as CRuby 4.0 or later and we
10236 // hit a '&&' or a '||' then we will lex the ignored
10237 // newline.
10238 if (
10239 (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) &&
10240 following && (
10241 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '&') ||
10242 (peek_at(parser, following) == '|' && peek_at(parser, following + 1) == '|') ||
10243 (
10244 peek_at(parser, following) == 'a' &&
10245 peek_at(parser, following + 1) == 'n' &&
10246 peek_at(parser, following + 2) == 'd' &&
10247 peek_at(parser, next_content + 3) != '!' &&
10248 peek_at(parser, next_content + 3) != '?' &&
10249 !char_is_identifier(parser, following + 3, parser->end - (following + 3))
10250 ) ||
10251 (
10252 peek_at(parser, following) == 'o' &&
10253 peek_at(parser, following + 1) == 'r' &&
10254 peek_at(parser, next_content + 2) != '!' &&
10255 peek_at(parser, next_content + 2) != '?' &&
10256 !char_is_identifier(parser, following + 2, parser->end - (following + 2))
10257 )
10258 )
10259 ) {
10260 if (!lexed_comment) parser_lex_ignored_newline(parser);
10261 lexed_comment = false;
10262 goto lex_next_token;
10263 }
10264 }
10265
10266 // If we hit a . after a newline, then we're in a call chain and
10267 // we need to return the call operator.
10268 if (next_content[0] == '.') {
10269 // To match ripper, we need to emit an ignored newline even though
10270 // it's a real newline in the case that we have a beginless range
10271 // on a subsequent line.
10272 if (peek_at(parser, next_content + 1) == '.') {
10273 if (!lexed_comment) parser_lex_ignored_newline(parser);
10274 lex_state_set(parser, PM_LEX_STATE_BEG);
10275 parser->command_start = true;
10276 parser->current.type = PM_TOKEN_NEWLINE;
10277 return;
10278 }
10279
10280 if (!lexed_comment) parser_lex_ignored_newline(parser);
10281 lex_state_set(parser, PM_LEX_STATE_DOT);
10282 parser->current.start = next_content;
10283 parser->current.end = next_content + 1;
10284 parser->next_start = NULL;
10285 LEX(PM_TOKEN_DOT);
10286 }
10287
10288 // If we hit a &. after a newline, then we're in a call chain and
10289 // we need to return the call operator.
10290 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
10291 if (!lexed_comment) parser_lex_ignored_newline(parser);
10292 lex_state_set(parser, PM_LEX_STATE_DOT);
10293 parser->current.start = next_content;
10294 parser->current.end = next_content + 2;
10295 parser->next_start = NULL;
10296 LEX(PM_TOKEN_AMPERSAND_DOT);
10297 }
10298
10299 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
10300 // If we hit an && then we are in a logical chain
10301 // and we need to return the logical operator.
10302 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '&') {
10303 if (!lexed_comment) parser_lex_ignored_newline(parser);
10304 lex_state_set(parser, PM_LEX_STATE_BEG);
10305 parser->current.start = next_content;
10306 parser->current.end = next_content + 2;
10307 parser->next_start = NULL;
10308 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
10309 }
10310
10311 // If we hit a || then we are in a logical chain and
10312 // we need to return the logical operator.
10313 if (peek_at(parser, next_content) == '|' && peek_at(parser, next_content + 1) == '|') {
10314 if (!lexed_comment) parser_lex_ignored_newline(parser);
10315 lex_state_set(parser, PM_LEX_STATE_BEG);
10316 parser->current.start = next_content;
10317 parser->current.end = next_content + 2;
10318 parser->next_start = NULL;
10319 LEX(PM_TOKEN_PIPE_PIPE);
10320 }
10321
10322 // If we hit an 'and' then we are in a logical chain
10323 // and we need to return the logical operator.
10324 if (
10325 peek_at(parser, next_content) == 'a' &&
10326 peek_at(parser, next_content + 1) == 'n' &&
10327 peek_at(parser, next_content + 2) == 'd' &&
10328 peek_at(parser, next_content + 3) != '!' &&
10329 peek_at(parser, next_content + 3) != '?' &&
10330 !char_is_identifier(parser, next_content + 3, parser->end - (next_content + 3))
10331 ) {
10332 if (!lexed_comment) parser_lex_ignored_newline(parser);
10333 lex_state_set(parser, PM_LEX_STATE_BEG);
10334 parser->current.start = next_content;
10335 parser->current.end = next_content + 3;
10336 parser->next_start = NULL;
10337 parser->command_start = true;
10338 LEX(PM_TOKEN_KEYWORD_AND);
10339 }
10340
10341 // If we hit a 'or' then we are in a logical chain
10342 // and we need to return the logical operator.
10343 if (
10344 peek_at(parser, next_content) == 'o' &&
10345 peek_at(parser, next_content + 1) == 'r' &&
10346 peek_at(parser, next_content + 2) != '!' &&
10347 peek_at(parser, next_content + 2) != '?' &&
10348 !char_is_identifier(parser, next_content + 2, parser->end - (next_content + 2))
10349 ) {
10350 if (!lexed_comment) parser_lex_ignored_newline(parser);
10351 lex_state_set(parser, PM_LEX_STATE_BEG);
10352 parser->current.start = next_content;
10353 parser->current.end = next_content + 2;
10354 parser->next_start = NULL;
10355 parser->command_start = true;
10356 LEX(PM_TOKEN_KEYWORD_OR);
10357 }
10358 }
10359 }
10360
10361 // At this point we know this is a regular newline, and we can set the
10362 // necessary state and return the token.
10363 lex_state_set(parser, PM_LEX_STATE_BEG);
10364 parser->command_start = true;
10365 parser->current.type = PM_TOKEN_NEWLINE;
10366 if (!lexed_comment) parser_lex_callback(parser);
10367 return;
10368 }
10369
10370 // ,
10371 case ',':
10372 if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
10373 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_str(parser->current.type));
10374 }
10375
10376 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10377 LEX(PM_TOKEN_COMMA);
10378
10379 // (
10380 case '(': {
10381 pm_token_type_t type = PM_TOKEN_PARENTHESIS_LEFT;
10382
10383 if (space_seen && (lex_state_arg_p(parser) || parser->lex_state == (PM_LEX_STATE_END | PM_LEX_STATE_LABEL))) {
10384 type = PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES;
10385 }
10386
10387 parser->enclosure_nesting++;
10388 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10389 pm_do_loop_stack_push(parser, false);
10390 LEX(type);
10391 }
10392
10393 // )
10394 case ')':
10395 parser->enclosure_nesting--;
10396 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10397 pm_do_loop_stack_pop(parser);
10398 LEX(PM_TOKEN_PARENTHESIS_RIGHT);
10399
10400 // ;
10401 case ';':
10402 lex_state_set(parser, PM_LEX_STATE_BEG);
10403 parser->command_start = true;
10404 LEX(PM_TOKEN_SEMICOLON);
10405
10406 // [ [] []=
10407 case '[':
10408 parser->enclosure_nesting++;
10409 pm_token_type_t type = PM_TOKEN_BRACKET_LEFT;
10410
10411 if (lex_state_operator_p(parser)) {
10412 if (match(parser, ']')) {
10413 parser->enclosure_nesting--;
10414 lex_state_set(parser, PM_LEX_STATE_ARG);
10415 LEX(match(parser, '=') ? PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL : PM_TOKEN_BRACKET_LEFT_RIGHT);
10416 }
10417
10418 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABEL);
10419 LEX(type);
10420 }
10421
10422 if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, PM_LEX_STATE_LABELED)))) {
10423 type = PM_TOKEN_BRACKET_LEFT_ARRAY;
10424 }
10425
10426 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10427 pm_do_loop_stack_push(parser, false);
10428 LEX(type);
10429
10430 // ]
10431 case ']':
10432 parser->enclosure_nesting--;
10433 lex_state_set(parser, PM_LEX_STATE_END);
10434 pm_do_loop_stack_pop(parser);
10435 LEX(PM_TOKEN_BRACKET_RIGHT);
10436
10437 // {
10438 case '{': {
10439 pm_token_type_t type = PM_TOKEN_BRACE_LEFT;
10440
10441 if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
10442 // This { begins a lambda
10443 parser->command_start = true;
10444 lex_state_set(parser, PM_LEX_STATE_BEG);
10445 type = PM_TOKEN_LAMBDA_BEGIN;
10446 } else if (lex_state_p(parser, PM_LEX_STATE_LABELED)) {
10447 // This { begins a hash literal
10448 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10449 } else if (lex_state_p(parser, PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_END | PM_LEX_STATE_ENDFN)) {
10450 // This { begins a block
10451 parser->command_start = true;
10452 lex_state_set(parser, PM_LEX_STATE_BEG);
10453 } else if (lex_state_p(parser, PM_LEX_STATE_ENDARG)) {
10454 // This { begins a block on a command
10455 parser->command_start = true;
10456 lex_state_set(parser, PM_LEX_STATE_BEG);
10457 } else {
10458 // This { begins a hash literal
10459 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10460 }
10461
10462 parser->enclosure_nesting++;
10463 parser->brace_nesting++;
10464 pm_do_loop_stack_push(parser, false);
10465
10466 LEX(type);
10467 }
10468
10469 // }
10470 case '}':
10471 parser->enclosure_nesting--;
10472 pm_do_loop_stack_pop(parser);
10473
10474 if ((parser->lex_modes.current->mode == PM_LEX_EMBEXPR) && (parser->brace_nesting == 0)) {
10475 lex_mode_pop(parser);
10476 LEX(PM_TOKEN_EMBEXPR_END);
10477 }
10478
10479 parser->brace_nesting--;
10480 lex_state_set(parser, PM_LEX_STATE_END);
10481 LEX(PM_TOKEN_BRACE_RIGHT);
10482
10483 // * ** **= *=
10484 case '*': {
10485 if (match(parser, '*')) {
10486 if (match(parser, '=')) {
10487 lex_state_set(parser, PM_LEX_STATE_BEG);
10488 LEX(PM_TOKEN_STAR_STAR_EQUAL);
10489 }
10490
10491 pm_token_type_t type = PM_TOKEN_STAR_STAR;
10492
10493 if (lex_state_spcarg_p(parser, space_seen)) {
10494 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR);
10495 type = PM_TOKEN_USTAR_STAR;
10496 } else if (lex_state_beg_p(parser)) {
10497 type = PM_TOKEN_USTAR_STAR;
10498 } else if (ambiguous_operator_p(parser, space_seen)) {
10499 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
10500 }
10501
10502 if (lex_state_operator_p(parser)) {
10503 lex_state_set(parser, PM_LEX_STATE_ARG);
10504 } else {
10505 lex_state_set(parser, PM_LEX_STATE_BEG);
10506 }
10507
10508 LEX(type);
10509 }
10510
10511 if (match(parser, '=')) {
10512 lex_state_set(parser, PM_LEX_STATE_BEG);
10513 LEX(PM_TOKEN_STAR_EQUAL);
10514 }
10515
10516 pm_token_type_t type = PM_TOKEN_STAR;
10517
10518 if (lex_state_spcarg_p(parser, space_seen)) {
10519 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
10520 type = PM_TOKEN_USTAR;
10521 } else if (lex_state_beg_p(parser)) {
10522 type = PM_TOKEN_USTAR;
10523 } else if (ambiguous_operator_p(parser, space_seen)) {
10524 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
10525 }
10526
10527 if (lex_state_operator_p(parser)) {
10528 lex_state_set(parser, PM_LEX_STATE_ARG);
10529 } else {
10530 lex_state_set(parser, PM_LEX_STATE_BEG);
10531 }
10532
10533 LEX(type);
10534 }
10535
10536 // ! != !~ !@
10537 case '!':
10538 if (lex_state_operator_p(parser)) {
10539 lex_state_set(parser, PM_LEX_STATE_ARG);
10540 if (match(parser, '@')) {
10541 LEX(PM_TOKEN_BANG);
10542 }
10543 } else {
10544 lex_state_set(parser, PM_LEX_STATE_BEG);
10545 }
10546
10547 if (match(parser, '=')) {
10548 LEX(PM_TOKEN_BANG_EQUAL);
10549 }
10550
10551 if (match(parser, '~')) {
10552 LEX(PM_TOKEN_BANG_TILDE);
10553 }
10554
10555 LEX(PM_TOKEN_BANG);
10556
10557 // = => =~ == === =begin
10558 case '=':
10559 if (
10560 current_token_starts_line(parser) &&
10561 (parser->current.end + 5 <= parser->end) &&
10562 memcmp(parser->current.end, "begin", 5) == 0 &&
10563 (pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) == '\0'))
10564 ) {
10565 pm_token_type_t type = lex_embdoc(parser);
10566 if (type == PM_TOKEN_EOF) {
10567 LEX(type);
10568 }
10569
10570 goto lex_next_token;
10571 }
10572
10573 if (lex_state_operator_p(parser)) {
10574 lex_state_set(parser, PM_LEX_STATE_ARG);
10575 } else {
10576 lex_state_set(parser, PM_LEX_STATE_BEG);
10577 }
10578
10579 if (match(parser, '>')) {
10580 LEX(PM_TOKEN_EQUAL_GREATER);
10581 }
10582
10583 if (match(parser, '~')) {
10584 LEX(PM_TOKEN_EQUAL_TILDE);
10585 }
10586
10587 if (match(parser, '=')) {
10588 LEX(match(parser, '=') ? PM_TOKEN_EQUAL_EQUAL_EQUAL : PM_TOKEN_EQUAL_EQUAL);
10589 }
10590
10591 LEX(PM_TOKEN_EQUAL);
10592
10593 // < << <<= <= <=>
10594 case '<':
10595 if (match(parser, '<')) {
10596 if (
10597 !lex_state_p(parser, PM_LEX_STATE_DOT | PM_LEX_STATE_CLASS) &&
10598 !lex_state_end_p(parser) &&
10599 (!lex_state_p(parser, PM_LEX_STATE_ARG_ANY) || lex_state_p(parser, PM_LEX_STATE_LABELED) || space_seen)
10600 ) {
10601 const uint8_t *end = parser->current.end;
10602
10603 pm_heredoc_quote_t quote = PM_HEREDOC_QUOTE_NONE;
10604 pm_heredoc_indent_t indent = PM_HEREDOC_INDENT_NONE;
10605
10606 if (match(parser, '-')) {
10607 indent = PM_HEREDOC_INDENT_DASH;
10608 }
10609 else if (match(parser, '~')) {
10610 indent = PM_HEREDOC_INDENT_TILDE;
10611 }
10612
10613 if (match(parser, '`')) {
10614 quote = PM_HEREDOC_QUOTE_BACKTICK;
10615 }
10616 else if (match(parser, '"')) {
10617 quote = PM_HEREDOC_QUOTE_DOUBLE;
10618 }
10619 else if (match(parser, '\'')) {
10620 quote = PM_HEREDOC_QUOTE_SINGLE;
10621 }
10622
10623 const uint8_t *ident_start = parser->current.end;
10624 size_t width = 0;
10625
10626 if (parser->current.end >= parser->end) {
10627 parser->current.end = end;
10628 } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) == 0) {
10629 parser->current.end = end;
10630 } else {
10631 if (quote == PM_HEREDOC_QUOTE_NONE) {
10632 parser->current.end += width;
10633
10634 while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end))) {
10635 parser->current.end += width;
10636 }
10637 } else {
10638 // If we have quotes, then we're going to go until we find the
10639 // end quote.
10640 while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
10641 if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
10642 parser->current.end++;
10643 }
10644 }
10645
10646 size_t ident_length = (size_t) (parser->current.end - ident_start);
10647 bool ident_error = false;
10648
10649 if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
10650 pm_parser_err(parser, U32(ident_start - parser->start), U32(ident_length), PM_ERR_HEREDOC_IDENTIFIER);
10651 ident_error = true;
10652 }
10653
10654 parser->explicit_encoding = NULL;
10655 lex_mode_push(parser, (pm_lex_mode_t) {
10656 .mode = PM_LEX_HEREDOC,
10657 .as.heredoc = {
10658 .base = {
10659 .ident_start = ident_start,
10660 .ident_length = ident_length,
10661 .quote = quote,
10662 .indent = indent
10663 },
10664 .next_start = parser->current.end,
10665 .common_whitespace = NULL,
10666 .line_continuation = false
10667 }
10668 });
10669
10670 if (parser->heredoc_end == NULL) {
10671 const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
10672
10673 if (body_start == NULL) {
10674 // If there is no newline after the heredoc identifier, then
10675 // this is not a valid heredoc declaration. In this case we
10676 // will add an error, but we will still return a heredoc
10677 // start.
10678 if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
10679 body_start = parser->end;
10680 } else {
10681 // Otherwise, we want to indicate that the body of the
10682 // heredoc starts on the character after the next newline.
10683 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(body_start - parser->start + 1));
10684 body_start++;
10685 }
10686
10687 parser->next_start = body_start;
10688 } else {
10689 parser->next_start = parser->heredoc_end;
10690 }
10691
10692 LEX(PM_TOKEN_HEREDOC_START);
10693 }
10694 }
10695
10696 if (match(parser, '=')) {
10697 lex_state_set(parser, PM_LEX_STATE_BEG);
10698 LEX(PM_TOKEN_LESS_LESS_EQUAL);
10699 }
10700
10701 if (ambiguous_operator_p(parser, space_seen)) {
10702 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
10703 }
10704
10705 if (lex_state_operator_p(parser)) {
10706 lex_state_set(parser, PM_LEX_STATE_ARG);
10707 } else {
10708 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
10709 lex_state_set(parser, PM_LEX_STATE_BEG);
10710 }
10711
10712 LEX(PM_TOKEN_LESS_LESS);
10713 }
10714
10715 if (lex_state_operator_p(parser)) {
10716 lex_state_set(parser, PM_LEX_STATE_ARG);
10717 } else {
10718 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
10719 lex_state_set(parser, PM_LEX_STATE_BEG);
10720 }
10721
10722 if (match(parser, '=')) {
10723 if (match(parser, '>')) {
10724 LEX(PM_TOKEN_LESS_EQUAL_GREATER);
10725 }
10726
10727 LEX(PM_TOKEN_LESS_EQUAL);
10728 }
10729
10730 LEX(PM_TOKEN_LESS);
10731
10732 // > >> >>= >=
10733 case '>':
10734 if (match(parser, '>')) {
10735 if (lex_state_operator_p(parser)) {
10736 lex_state_set(parser, PM_LEX_STATE_ARG);
10737 } else {
10738 lex_state_set(parser, PM_LEX_STATE_BEG);
10739 }
10740 LEX(match(parser, '=') ? PM_TOKEN_GREATER_GREATER_EQUAL : PM_TOKEN_GREATER_GREATER);
10741 }
10742
10743 if (lex_state_operator_p(parser)) {
10744 lex_state_set(parser, PM_LEX_STATE_ARG);
10745 } else {
10746 lex_state_set(parser, PM_LEX_STATE_BEG);
10747 }
10748
10749 LEX(match(parser, '=') ? PM_TOKEN_GREATER_EQUAL : PM_TOKEN_GREATER);
10750
10751 // double-quoted string literal
10752 case '"': {
10753 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
10754 lex_mode_push_string(parser, true, label_allowed, '\0', '"');
10755 LEX(PM_TOKEN_STRING_BEGIN);
10756 }
10757
10758 // xstring literal
10759 case '`': {
10760 if (lex_state_p(parser, PM_LEX_STATE_FNAME)) {
10761 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10762 LEX(PM_TOKEN_BACKTICK);
10763 }
10764
10765 if (lex_state_p(parser, PM_LEX_STATE_DOT)) {
10766 if (previous_command_start) {
10767 lex_state_set(parser, PM_LEX_STATE_CMDARG);
10768 } else {
10769 lex_state_set(parser, PM_LEX_STATE_ARG);
10770 }
10771
10772 LEX(PM_TOKEN_BACKTICK);
10773 }
10774
10775 lex_mode_push_string(parser, true, false, '\0', '`');
10776 LEX(PM_TOKEN_BACKTICK);
10777 }
10778
10779 // single-quoted string literal
10780 case '\'': {
10781 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
10782 lex_mode_push_string(parser, false, label_allowed, '\0', '\'');
10783 LEX(PM_TOKEN_STRING_BEGIN);
10784 }
10785
10786 // ? character literal
10787 case '?':
10788 LEX(lex_question_mark(parser));
10789
10790 // & && &&= &=
10791 case '&': {
10792 if (match(parser, '&')) {
10793 lex_state_set(parser, PM_LEX_STATE_BEG);
10794
10795 if (match(parser, '=')) {
10796 LEX(PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
10797 }
10798
10799 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
10800 }
10801
10802 if (match(parser, '=')) {
10803 lex_state_set(parser, PM_LEX_STATE_BEG);
10804 LEX(PM_TOKEN_AMPERSAND_EQUAL);
10805 }
10806
10807 if (match(parser, '.')) {
10808 lex_state_set(parser, PM_LEX_STATE_DOT);
10809 LEX(PM_TOKEN_AMPERSAND_DOT);
10810 }
10811
10812 pm_token_type_t type = PM_TOKEN_AMPERSAND;
10813 if (lex_state_spcarg_p(parser, space_seen)) {
10814 if ((peek(parser) != ':') || (peek_offset(parser, 1) == '\0')) {
10815 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
10816 } else {
10817 const uint8_t delim = peek_offset(parser, 1);
10818
10819 if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1, parser->end - (parser->current.end + 1))) {
10820 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
10821 }
10822 }
10823
10824 type = PM_TOKEN_UAMPERSAND;
10825 } else if (lex_state_beg_p(parser)) {
10826 type = PM_TOKEN_UAMPERSAND;
10827 } else if (ambiguous_operator_p(parser, space_seen)) {
10828 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
10829 }
10830
10831 if (lex_state_operator_p(parser)) {
10832 lex_state_set(parser, PM_LEX_STATE_ARG);
10833 } else {
10834 lex_state_set(parser, PM_LEX_STATE_BEG);
10835 }
10836
10837 LEX(type);
10838 }
10839
10840 // | || ||= |=
10841 case '|':
10842 if (match(parser, '|')) {
10843 if (match(parser, '=')) {
10844 lex_state_set(parser, PM_LEX_STATE_BEG);
10845 LEX(PM_TOKEN_PIPE_PIPE_EQUAL);
10846 }
10847
10848 if (lex_state_p(parser, PM_LEX_STATE_BEG)) {
10849 parser->current.end--;
10850 LEX(PM_TOKEN_PIPE);
10851 }
10852
10853 lex_state_set(parser, PM_LEX_STATE_BEG);
10854 LEX(PM_TOKEN_PIPE_PIPE);
10855 }
10856
10857 if (match(parser, '=')) {
10858 lex_state_set(parser, PM_LEX_STATE_BEG);
10859 LEX(PM_TOKEN_PIPE_EQUAL);
10860 }
10861
10862 if (lex_state_operator_p(parser)) {
10863 lex_state_set(parser, PM_LEX_STATE_ARG);
10864 } else {
10865 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10866 }
10867
10868 LEX(PM_TOKEN_PIPE);
10869
10870 // + += +@
10871 case '+': {
10872 if (lex_state_operator_p(parser)) {
10873 lex_state_set(parser, PM_LEX_STATE_ARG);
10874
10875 if (match(parser, '@')) {
10876 LEX(PM_TOKEN_UPLUS);
10877 }
10878
10879 LEX(PM_TOKEN_PLUS);
10880 }
10881
10882 if (match(parser, '=')) {
10883 lex_state_set(parser, PM_LEX_STATE_BEG);
10884 LEX(PM_TOKEN_PLUS_EQUAL);
10885 }
10886
10887 if (
10888 lex_state_beg_p(parser) ||
10889 (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
10890 ) {
10891 lex_state_set(parser, PM_LEX_STATE_BEG);
10892
10893 if (pm_char_is_decimal_digit(peek(parser))) {
10894 parser->current.end++;
10895 pm_token_type_t type = lex_numeric(parser);
10896 lex_state_set(parser, PM_LEX_STATE_END);
10897 LEX(type);
10898 }
10899
10900 LEX(PM_TOKEN_UPLUS);
10901 }
10902
10903 if (ambiguous_operator_p(parser, space_seen)) {
10904 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
10905 }
10906
10907 lex_state_set(parser, PM_LEX_STATE_BEG);
10908 LEX(PM_TOKEN_PLUS);
10909 }
10910
10911 // - -= -@
10912 case '-': {
10913 if (lex_state_operator_p(parser)) {
10914 lex_state_set(parser, PM_LEX_STATE_ARG);
10915
10916 if (match(parser, '@')) {
10917 LEX(PM_TOKEN_UMINUS);
10918 }
10919
10920 LEX(PM_TOKEN_MINUS);
10921 }
10922
10923 if (match(parser, '=')) {
10924 lex_state_set(parser, PM_LEX_STATE_BEG);
10925 LEX(PM_TOKEN_MINUS_EQUAL);
10926 }
10927
10928 if (match(parser, '>')) {
10929 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10930 LEX(PM_TOKEN_MINUS_GREATER);
10931 }
10932
10933 bool spcarg = lex_state_spcarg_p(parser, space_seen);
10934 bool is_beg = lex_state_beg_p(parser);
10935 if (!is_beg && spcarg) {
10936 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
10937 }
10938
10939 if (is_beg || spcarg) {
10940 lex_state_set(parser, PM_LEX_STATE_BEG);
10941 LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
10942 }
10943
10944 if (ambiguous_operator_p(parser, space_seen)) {
10945 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
10946 }
10947
10948 lex_state_set(parser, PM_LEX_STATE_BEG);
10949 LEX(PM_TOKEN_MINUS);
10950 }
10951
10952 // . .. ...
10953 case '.': {
10954 bool beg_p = lex_state_beg_p(parser);
10955
10956 if (match(parser, '.')) {
10957 if (match(parser, '.')) {
10958 // If we're _not_ inside a range within default parameters
10959 if (!context_p(parser, PM_CONTEXT_DEFAULT_PARAMS) && context_p(parser, PM_CONTEXT_DEF_PARAMS)) {
10960 if (lex_state_p(parser, PM_LEX_STATE_END)) {
10961 lex_state_set(parser, PM_LEX_STATE_BEG);
10962 } else {
10963 lex_state_set(parser, PM_LEX_STATE_ENDARG);
10964 }
10965 LEX(PM_TOKEN_UDOT_DOT_DOT);
10966 }
10967
10968 if (parser->enclosure_nesting == 0 && parser_end_of_line_p(parser)) {
10969 pm_parser_warn_token(parser, &parser->current, PM_WARN_DOT_DOT_DOT_EOL);
10970 }
10971
10972 lex_state_set(parser, PM_LEX_STATE_BEG);
10973 LEX(beg_p ? PM_TOKEN_UDOT_DOT_DOT : PM_TOKEN_DOT_DOT_DOT);
10974 }
10975
10976 lex_state_set(parser, PM_LEX_STATE_BEG);
10977 LEX(beg_p ? PM_TOKEN_UDOT_DOT : PM_TOKEN_DOT_DOT);
10978 }
10979
10980 lex_state_set(parser, PM_LEX_STATE_DOT);
10981 LEX(PM_TOKEN_DOT);
10982 }
10983
10984 // integer
10985 case '0':
10986 case '1':
10987 case '2':
10988 case '3':
10989 case '4':
10990 case '5':
10991 case '6':
10992 case '7':
10993 case '8':
10994 case '9': {
10995 pm_token_type_t type = lex_numeric(parser);
10996 lex_state_set(parser, PM_LEX_STATE_END);
10997 LEX(type);
10998 }
10999
11000 // :: symbol
11001 case ':':
11002 if (match(parser, ':')) {
11003 if (lex_state_beg_p(parser) || lex_state_p(parser, PM_LEX_STATE_CLASS) || (lex_state_p(parser, PM_LEX_STATE_ARG_ANY) && space_seen)) {
11004 lex_state_set(parser, PM_LEX_STATE_BEG);
11005 LEX(PM_TOKEN_UCOLON_COLON);
11006 }
11007
11008 lex_state_set(parser, PM_LEX_STATE_DOT);
11009 LEX(PM_TOKEN_COLON_COLON);
11010 }
11011
11012 if (lex_state_end_p(parser) || pm_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
11013 lex_state_set(parser, PM_LEX_STATE_BEG);
11014 LEX(PM_TOKEN_COLON);
11015 }
11016
11017 if (peek(parser) == '"' || peek(parser) == '\'') {
11018 lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
11019 parser->current.end++;
11020 }
11021
11022 lex_state_set(parser, PM_LEX_STATE_FNAME);
11023 LEX(PM_TOKEN_SYMBOL_BEGIN);
11024
11025 // / /=
11026 case '/':
11027 if (lex_state_beg_p(parser)) {
11028 lex_mode_push_regexp(parser, '\0', '/');
11029 LEX(PM_TOKEN_REGEXP_BEGIN);
11030 }
11031
11032 if (match(parser, '=')) {
11033 lex_state_set(parser, PM_LEX_STATE_BEG);
11034 LEX(PM_TOKEN_SLASH_EQUAL);
11035 }
11036
11037 if (lex_state_spcarg_p(parser, space_seen)) {
11038 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
11039 lex_mode_push_regexp(parser, '\0', '/');
11040 LEX(PM_TOKEN_REGEXP_BEGIN);
11041 }
11042
11043 if (ambiguous_operator_p(parser, space_seen)) {
11044 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
11045 }
11046
11047 if (lex_state_operator_p(parser)) {
11048 lex_state_set(parser, PM_LEX_STATE_ARG);
11049 } else {
11050 lex_state_set(parser, PM_LEX_STATE_BEG);
11051 }
11052
11053 LEX(PM_TOKEN_SLASH);
11054
11055 // ^ ^=
11056 case '^':
11057 if (lex_state_operator_p(parser)) {
11058 lex_state_set(parser, PM_LEX_STATE_ARG);
11059 } else {
11060 lex_state_set(parser, PM_LEX_STATE_BEG);
11061 }
11062 LEX(match(parser, '=') ? PM_TOKEN_CARET_EQUAL : PM_TOKEN_CARET);
11063
11064 // ~ ~@
11065 case '~':
11066 if (lex_state_operator_p(parser)) {
11067 (void) match(parser, '@');
11068 lex_state_set(parser, PM_LEX_STATE_ARG);
11069 } else {
11070 lex_state_set(parser, PM_LEX_STATE_BEG);
11071 }
11072
11073 LEX(PM_TOKEN_TILDE);
11074
11075 // % %= %i %I %q %Q %w %W
11076 case '%': {
11077 // If there is no subsequent character then we have an
11078 // invalid token. We're going to say it's the percent
11079 // operator because we don't want to move into the string
11080 // lex mode unnecessarily.
11081 if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
11082 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
11083 LEX(PM_TOKEN_PERCENT);
11084 }
11085
11086 if (!lex_state_beg_p(parser) && match(parser, '=')) {
11087 lex_state_set(parser, PM_LEX_STATE_BEG);
11088 LEX(PM_TOKEN_PERCENT_EQUAL);
11089 } else if (
11090 lex_state_beg_p(parser) ||
11091 (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
11092 lex_state_spcarg_p(parser, space_seen)
11093 ) {
11094 if (!parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end)) {
11095 if (*parser->current.end >= 0x80) {
11096 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11097 }
11098
11099 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11100 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11101 LEX(PM_TOKEN_STRING_BEGIN);
11102 }
11103
11104 // Delimiters for %-literals cannot be alphanumeric. We
11105 // validate that here.
11106 uint8_t delimiter = peek_offset(parser, 1);
11107 if (delimiter >= 0x80 || parser->encoding->alnum_char(&delimiter, 1)) {
11108 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11109 goto lex_next_token;
11110 }
11111
11112 switch (peek(parser)) {
11113 case 'i': {
11114 parser->current.end++;
11115
11116 if (parser->current.end < parser->end) {
11117 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11118 } else {
11119 lex_mode_push_list_eof(parser);
11120 }
11121
11122 LEX(PM_TOKEN_PERCENT_LOWER_I);
11123 }
11124 case 'I': {
11125 parser->current.end++;
11126
11127 if (parser->current.end < parser->end) {
11128 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11129 } else {
11130 lex_mode_push_list_eof(parser);
11131 }
11132
11133 LEX(PM_TOKEN_PERCENT_UPPER_I);
11134 }
11135 case 'r': {
11136 parser->current.end++;
11137
11138 if (parser->current.end < parser->end) {
11139 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11140 lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11141 } else {
11142 lex_mode_push_regexp(parser, '\0', '\0');
11143 }
11144
11145 LEX(PM_TOKEN_REGEXP_BEGIN);
11146 }
11147 case 'q': {
11148 parser->current.end++;
11149
11150 if (parser->current.end < parser->end) {
11151 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11152 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11153 } else {
11154 lex_mode_push_string_eof(parser);
11155 }
11156
11157 LEX(PM_TOKEN_STRING_BEGIN);
11158 }
11159 case 'Q': {
11160 parser->current.end++;
11161
11162 if (parser->current.end < parser->end) {
11163 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11164 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11165 } else {
11166 lex_mode_push_string_eof(parser);
11167 }
11168
11169 LEX(PM_TOKEN_STRING_BEGIN);
11170 }
11171 case 's': {
11172 parser->current.end++;
11173
11174 if (parser->current.end < parser->end) {
11175 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11176 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11177 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
11178 } else {
11179 lex_mode_push_string_eof(parser);
11180 }
11181
11182 LEX(PM_TOKEN_SYMBOL_BEGIN);
11183 }
11184 case 'w': {
11185 parser->current.end++;
11186
11187 if (parser->current.end < parser->end) {
11188 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11189 } else {
11190 lex_mode_push_list_eof(parser);
11191 }
11192
11193 LEX(PM_TOKEN_PERCENT_LOWER_W);
11194 }
11195 case 'W': {
11196 parser->current.end++;
11197
11198 if (parser->current.end < parser->end) {
11199 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11200 } else {
11201 lex_mode_push_list_eof(parser);
11202 }
11203
11204 LEX(PM_TOKEN_PERCENT_UPPER_W);
11205 }
11206 case 'x': {
11207 parser->current.end++;
11208
11209 if (parser->current.end < parser->end) {
11210 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11211 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11212 } else {
11213 lex_mode_push_string_eof(parser);
11214 }
11215
11216 LEX(PM_TOKEN_PERCENT_LOWER_X);
11217 }
11218 default:
11219 // If we get to this point, then we have a % that is completely
11220 // unparsable. In this case we'll just drop it from the parser
11221 // and skip past it and hope that the next token is something
11222 // that we can parse.
11223 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11224 goto lex_next_token;
11225 }
11226 }
11227
11228 if (ambiguous_operator_p(parser, space_seen)) {
11229 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
11230 }
11231
11232 lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
11233 LEX(PM_TOKEN_PERCENT);
11234 }
11235
11236 // global variable
11237 case '$': {
11238 pm_token_type_t type = lex_global_variable(parser);
11239
11240 // If we're lexing an embedded variable, then we need to pop back into
11241 // the parent lex context.
11242 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
11243 lex_mode_pop(parser);
11244 }
11245
11246 lex_state_set(parser, PM_LEX_STATE_END);
11247 LEX(type);
11248 }
11249
11250 // instance variable, class variable
11251 case '@':
11252 lex_state_set(parser, parser->lex_state & PM_LEX_STATE_FNAME ? PM_LEX_STATE_ENDFN : PM_LEX_STATE_END);
11253 LEX(lex_at_variable(parser));
11254
11255 default: {
11256 if (*parser->current.start != '_') {
11257 size_t width = char_is_identifier_start(parser, parser->current.start, parser->end - parser->current.start);
11258
11259 // If this isn't the beginning of an identifier, then
11260 // it's an invalid token as we've exhausted all of the
11261 // other options. We'll skip past it and return the next
11262 // token after adding an appropriate error message.
11263 if (!width) {
11264 if (*parser->current.start >= 0x80) {
11265 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
11266 } else if (*parser->current.start == '\\') {
11267 switch (peek_at(parser, parser->current.start + 1)) {
11268 case ' ':
11269 parser->current.end++;
11270 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
11271 break;
11272 case '\f':
11273 parser->current.end++;
11274 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
11275 break;
11276 case '\t':
11277 parser->current.end++;
11278 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
11279 break;
11280 case '\v':
11281 parser->current.end++;
11282 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
11283 break;
11284 case '\r':
11285 if (peek_at(parser, parser->current.start + 2) != '\n') {
11286 parser->current.end++;
11287 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
11288 break;
11289 }
11291 default:
11292 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
11293 break;
11294 }
11295 } else if (char_is_ascii_printable(*parser->current.start)) {
11296 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
11297 } else {
11298 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
11299 }
11300
11301 goto lex_next_token;
11302 }
11303
11304 parser->current.end = parser->current.start + width;
11305 }
11306
11307 pm_token_type_t type = lex_identifier(parser, previous_command_start);
11308
11309 // If we've hit a __END__ and it was at the start of the
11310 // line or the start of the file and it is followed by
11311 // either a \n or a \r\n, then this is the last token of the
11312 // file.
11313 if (
11314 ((parser->current.end - parser->current.start) == 7) &&
11315 current_token_starts_line(parser) &&
11316 (memcmp(parser->current.start, "__END__", 7) == 0) &&
11317 (parser->current.end == parser->end || match_eol(parser))
11318 ) {
11319 // Since we know we're about to add an __END__ comment,
11320 // we know we need to add all of the newlines to get the
11321 // correct column information for it.
11322 const uint8_t *cursor = parser->current.end;
11323 while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
11324 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(++cursor - parser->start));
11325 }
11326
11327 parser->current.end = parser->end;
11328 parser->current.type = PM_TOKEN___END__;
11329 parser_lex_callback(parser);
11330
11331 parser->data_loc.start = PM_TOKEN_START(parser, &parser->current);
11332 parser->data_loc.length = PM_TOKEN_LENGTH(&parser->current);
11333
11334 LEX(PM_TOKEN_EOF);
11335 }
11336
11337 pm_lex_state_t last_state = parser->lex_state;
11338
11339 if (type == PM_TOKEN_IDENTIFIER || type == PM_TOKEN_CONSTANT || type == PM_TOKEN_METHOD_NAME) {
11340 if (lex_state_p(parser, PM_LEX_STATE_BEG_ANY | PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_DOT)) {
11341 if (previous_command_start) {
11342 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11343 } else {
11344 lex_state_set(parser, PM_LEX_STATE_ARG);
11345 }
11346 } else if (parser->lex_state == PM_LEX_STATE_FNAME) {
11347 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11348 } else {
11349 lex_state_set(parser, PM_LEX_STATE_END);
11350 }
11351 }
11352
11353 if (
11354 !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
11355 (type == PM_TOKEN_IDENTIFIER) &&
11356 ((pm_parser_local_depth(parser, &parser->current) != -1) ||
11357 pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)))
11358 ) {
11359 lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
11360 }
11361
11362 LEX(type);
11363 }
11364 }
11365 }
11366 case PM_LEX_LIST: {
11367 if (parser->next_start != NULL) {
11368 parser->current.end = parser->next_start;
11369 parser->next_start = NULL;
11370 }
11371
11372 // First we'll set the beginning of the token.
11373 parser->current.start = parser->current.end;
11374
11375 // If there's any whitespace at the start of the list, then we're
11376 // going to trim it off the beginning and create a new token.
11377 size_t whitespace;
11378
11379 if (parser->heredoc_end) {
11380 whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
11381 if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') {
11382 whitespace += 1;
11383 }
11384 } else {
11385 whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
11386 }
11387
11388 if (whitespace > 0) {
11389 parser->current.end += whitespace;
11390 if (peek_offset(parser, -1) == '\n') {
11391 // mutates next_start
11392 parser_flush_heredoc_end(parser);
11393 }
11394 LEX(PM_TOKEN_WORDS_SEP);
11395 }
11396
11397 // We'll check if we're at the end of the file. If we are, then we
11398 // need to return the EOF token.
11399 if (parser->current.end >= parser->end) {
11400 LEX(PM_TOKEN_EOF);
11401 }
11402
11403 // Here we'll get a list of the places where strpbrk should break,
11404 // and then find the first one.
11405 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11406 const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
11407 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11408
11409 // If we haven't found an escape yet, then this buffer will be
11410 // unallocated since we can refer directly to the source string.
11411 pm_token_buffer_t token_buffer = { 0 };
11412
11413 while (breakpoint != NULL) {
11414 // If we hit whitespace, then we must have received content by
11415 // now, so we can return an element of the list.
11416 if (pm_char_is_whitespace(*breakpoint)) {
11417 parser->current.end = breakpoint;
11418 pm_token_buffer_flush(parser, &token_buffer);
11419 LEX(PM_TOKEN_STRING_CONTENT);
11420 }
11421
11422 // If we hit the terminator, we need to check which token to
11423 // return.
11424 if (*breakpoint == lex_mode->as.list.terminator) {
11425 // If this terminator doesn't actually close the list, then
11426 // we need to continue on past it.
11427 if (lex_mode->as.list.nesting > 0) {
11428 parser->current.end = breakpoint + 1;
11429 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11430 lex_mode->as.list.nesting--;
11431 continue;
11432 }
11433
11434 // If we've hit the terminator and we've already skipped
11435 // past content, then we can return a list node.
11436 if (breakpoint > parser->current.start) {
11437 parser->current.end = breakpoint;
11438 pm_token_buffer_flush(parser, &token_buffer);
11439 LEX(PM_TOKEN_STRING_CONTENT);
11440 }
11441
11442 // Otherwise, switch back to the default state and return
11443 // the end of the list.
11444 parser->current.end = breakpoint + 1;
11445 lex_mode_pop(parser);
11446 lex_state_set(parser, PM_LEX_STATE_END);
11447 LEX(PM_TOKEN_STRING_END);
11448 }
11449
11450 // If we hit a null byte, skip directly past it.
11451 if (*breakpoint == '\0') {
11452 breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
11453 continue;
11454 }
11455
11456 // If we hit escapes, then we need to treat the next token
11457 // literally. In this case we'll skip past the next character
11458 // and find the next breakpoint.
11459 if (*breakpoint == '\\') {
11460 parser->current.end = breakpoint + 1;
11461
11462 // If we've hit the end of the file, then break out of the
11463 // loop by setting the breakpoint to NULL.
11464 if (parser->current.end == parser->end) {
11465 breakpoint = NULL;
11466 continue;
11467 }
11468
11469 pm_token_buffer_escape(parser, &token_buffer);
11470 uint8_t peeked = peek(parser);
11471
11472 switch (peeked) {
11473 case ' ':
11474 case '\f':
11475 case '\t':
11476 case '\v':
11477 case '\\':
11478 pm_token_buffer_push_byte(&token_buffer, peeked);
11479 parser->current.end++;
11480 break;
11481 case '\r':
11482 parser->current.end++;
11483 if (peek(parser) != '\n') {
11484 pm_token_buffer_push_byte(&token_buffer, '\r');
11485 break;
11486 }
11488 case '\n':
11489 pm_token_buffer_push_byte(&token_buffer, '\n');
11490
11491 if (parser->heredoc_end) {
11492 // ... if we are on the same line as a heredoc,
11493 // flush the heredoc and continue parsing after
11494 // heredoc_end.
11495 parser_flush_heredoc_end(parser);
11496 pm_token_buffer_copy(parser, &token_buffer);
11497 LEX(PM_TOKEN_STRING_CONTENT);
11498 } else {
11499 // ... else track the newline.
11500 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
11501 }
11502
11503 parser->current.end++;
11504 break;
11505 default:
11506 if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
11507 pm_token_buffer_push_byte(&token_buffer, peeked);
11508 parser->current.end++;
11509 } else if (lex_mode->as.list.interpolation) {
11510 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
11511 } else {
11512 pm_token_buffer_push_byte(&token_buffer, '\\');
11513 pm_token_buffer_push_escaped(&token_buffer, parser);
11514 }
11515
11516 break;
11517 }
11518
11519 token_buffer.cursor = parser->current.end;
11520 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11521 continue;
11522 }
11523
11524 // If we hit a #, then we will attempt to lex interpolation.
11525 if (*breakpoint == '#') {
11526 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11527
11528 if (!type) {
11529 // If we haven't returned at this point then we had something
11530 // that looked like an interpolated class or instance variable
11531 // like "#@" but wasn't actually. In this case we'll just skip
11532 // to the next breakpoint.
11533 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11534 continue;
11535 }
11536
11537 if (type == PM_TOKEN_STRING_CONTENT) {
11538 pm_token_buffer_flush(parser, &token_buffer);
11539 }
11540
11541 LEX(type);
11542 }
11543
11544 // If we've hit the incrementor, then we need to skip past it
11545 // and find the next breakpoint.
11546 assert(*breakpoint == lex_mode->as.list.incrementor);
11547 parser->current.end = breakpoint + 1;
11548 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11549 lex_mode->as.list.nesting++;
11550 continue;
11551 }
11552
11553 if (parser->current.end > parser->current.start) {
11554 pm_token_buffer_flush(parser, &token_buffer);
11555 LEX(PM_TOKEN_STRING_CONTENT);
11556 }
11557
11558 // If we were unable to find a breakpoint, then this token hits the
11559 // end of the file.
11560 parser->current.end = parser->end;
11561 pm_token_buffer_flush(parser, &token_buffer);
11562 LEX(PM_TOKEN_STRING_CONTENT);
11563 }
11564 case PM_LEX_REGEXP: {
11565 // First, we'll set to start of this token to be the current end.
11566 if (parser->next_start == NULL) {
11567 parser->current.start = parser->current.end;
11568 } else {
11569 parser->current.start = parser->next_start;
11570 parser->current.end = parser->next_start;
11571 parser->next_start = NULL;
11572 }
11573
11574 // We'll check if we're at the end of the file. If we are, then we
11575 // need to return the EOF token.
11576 if (parser->current.end >= parser->end) {
11577 LEX(PM_TOKEN_EOF);
11578 }
11579
11580 // Get a reference to the current mode.
11581 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11582
11583 // These are the places where we need to split up the content of the
11584 // regular expression. We'll use strpbrk to find the first of these
11585 // characters.
11586 const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
11587 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11588 pm_regexp_token_buffer_t token_buffer = { 0 };
11589
11590 while (breakpoint != NULL) {
11591 uint8_t term = lex_mode->as.regexp.terminator;
11592 bool is_terminator = (*breakpoint == term);
11593
11594 // If the terminator is newline, we need to consider \r\n _also_ a newline
11595 // For example: `%\nfoo\r\n`
11596 // The string should be "foo", not "foo\r"
11597 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
11598 if (term == '\n') {
11599 is_terminator = true;
11600 }
11601
11602 // If the terminator is a CR, but we see a CRLF, we need to
11603 // treat the CRLF as a newline, meaning this is _not_ the
11604 // terminator
11605 if (term == '\r') {
11606 is_terminator = false;
11607 }
11608 }
11609
11610 // If we hit the terminator, we need to determine what kind of
11611 // token to return.
11612 if (is_terminator) {
11613 if (lex_mode->as.regexp.nesting > 0) {
11614 parser->current.end = breakpoint + 1;
11615 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11616 lex_mode->as.regexp.nesting--;
11617 continue;
11618 }
11619
11620 // Here we've hit the terminator. If we have already consumed
11621 // content then we need to return that content as string content
11622 // first.
11623 if (breakpoint > parser->current.start) {
11624 parser->current.end = breakpoint;
11625 pm_regexp_token_buffer_flush(parser, &token_buffer);
11626 LEX(PM_TOKEN_STRING_CONTENT);
11627 }
11628
11629 // Check here if we need to track the newline.
11630 size_t eol_length = match_eol_at(parser, breakpoint);
11631 if (eol_length) {
11632 parser->current.end = breakpoint + eol_length;
11633
11634 // Track the newline if we're not in a heredoc that
11635 // would have already have added the newline to the
11636 // list.
11637 if (parser->heredoc_end == NULL) {
11638 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
11639 }
11640 } else {
11641 parser->current.end = breakpoint + 1;
11642 }
11643
11644 // Since we've hit the terminator of the regular expression,
11645 // we now need to parse the options.
11646 parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
11647
11648 lex_mode_pop(parser);
11649 lex_state_set(parser, PM_LEX_STATE_END);
11650 LEX(PM_TOKEN_REGEXP_END);
11651 }
11652
11653 // If we've hit the incrementor, then we need to skip past it
11654 // and find the next breakpoint.
11655 if (*breakpoint && *breakpoint == lex_mode->as.regexp.incrementor) {
11656 parser->current.end = breakpoint + 1;
11657 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11658 lex_mode->as.regexp.nesting++;
11659 continue;
11660 }
11661
11662 switch (*breakpoint) {
11663 case '\0':
11664 // If we hit a null byte, skip directly past it.
11665 parser->current.end = breakpoint + 1;
11666 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11667 break;
11668 case '\r':
11669 if (peek_at(parser, breakpoint + 1) != '\n') {
11670 parser->current.end = breakpoint + 1;
11671 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11672 break;
11673 }
11674
11675 breakpoint++;
11676 parser->current.end = breakpoint;
11677 pm_regexp_token_buffer_escape(parser, &token_buffer);
11678 token_buffer.base.cursor = breakpoint;
11679
11681 case '\n':
11682 // If we've hit a newline, then we need to track that in
11683 // the list of newlines.
11684 if (parser->heredoc_end == NULL) {
11685 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1));
11686 parser->current.end = breakpoint + 1;
11687 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11688 break;
11689 }
11690
11691 parser->current.end = breakpoint + 1;
11692 parser_flush_heredoc_end(parser);
11693 pm_regexp_token_buffer_flush(parser, &token_buffer);
11694 LEX(PM_TOKEN_STRING_CONTENT);
11695 case '\\': {
11696 // If we hit escapes, then we need to treat the next
11697 // token literally. In this case we'll skip past the
11698 // next character and find the next breakpoint.
11699 parser->current.end = breakpoint + 1;
11700
11701 // If we've hit the end of the file, then break out of
11702 // the loop by setting the breakpoint to NULL.
11703 if (parser->current.end == parser->end) {
11704 breakpoint = NULL;
11705 break;
11706 }
11707
11708 pm_regexp_token_buffer_escape(parser, &token_buffer);
11709 uint8_t peeked = peek(parser);
11710
11711 switch (peeked) {
11712 case '\r':
11713 parser->current.end++;
11714 if (peek(parser) != '\n') {
11715 if (lex_mode->as.regexp.terminator != '\r') {
11716 pm_token_buffer_push_byte(&token_buffer.base, '\\');
11717 }
11718 pm_regexp_token_buffer_push_byte(&token_buffer, '\r');
11719 pm_token_buffer_push_byte(&token_buffer.base, '\r');
11720 break;
11721 }
11723 case '\n':
11724 if (parser->heredoc_end) {
11725 // ... if we are on the same line as a heredoc,
11726 // flush the heredoc and continue parsing after
11727 // heredoc_end.
11728 parser_flush_heredoc_end(parser);
11729 pm_regexp_token_buffer_copy(parser, &token_buffer);
11730 LEX(PM_TOKEN_STRING_CONTENT);
11731 } else {
11732 // ... else track the newline.
11733 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
11734 }
11735
11736 parser->current.end++;
11737 break;
11738 case 'c':
11739 case 'C':
11740 case 'M':
11741 case 'u':
11742 case 'x':
11743 escape_read(parser, &token_buffer.regexp_buffer, &token_buffer.base.buffer, PM_ESCAPE_FLAG_REGEXP);
11744 break;
11745 default:
11746 if (lex_mode->as.regexp.terminator == peeked) {
11747 // Some characters when they are used as the
11748 // terminator also receive an escape. They are
11749 // enumerated here.
11750 switch (peeked) {
11751 case '$': case ')': case '*': case '+':
11752 case '.': case '>': case '?': case ']':
11753 case '^': case '|': case '}':
11754 pm_token_buffer_push_byte(&token_buffer.base, '\\');
11755 break;
11756 default:
11757 break;
11758 }
11759
11760 pm_regexp_token_buffer_push_byte(&token_buffer, peeked);
11761 pm_token_buffer_push_byte(&token_buffer.base, peeked);
11762 parser->current.end++;
11763 break;
11764 }
11765
11766 if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer.base, '\\');
11767 pm_regexp_token_buffer_push_escaped(&token_buffer, parser);
11768 break;
11769 }
11770
11771 token_buffer.base.cursor = parser->current.end;
11772 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11773 break;
11774 }
11775 case '#': {
11776 // If we hit a #, then we will attempt to lex
11777 // interpolation.
11778 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11779
11780 if (!type) {
11781 // If we haven't returned at this point then we had
11782 // something that looked like an interpolated class or
11783 // instance variable like "#@" but wasn't actually. In
11784 // this case we'll just skip to the next breakpoint.
11785 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11786 break;
11787 }
11788
11789 if (type == PM_TOKEN_STRING_CONTENT) {
11790 pm_regexp_token_buffer_flush(parser, &token_buffer);
11791 }
11792
11793 LEX(type);
11794 }
11795 default:
11796 assert(false && "unreachable");
11797 break;
11798 }
11799 }
11800
11801 if (parser->current.end > parser->current.start) {
11802 pm_regexp_token_buffer_flush(parser, &token_buffer);
11803 LEX(PM_TOKEN_STRING_CONTENT);
11804 }
11805
11806 // If we were unable to find a breakpoint, then this token hits the
11807 // end of the file.
11808 parser->current.end = parser->end;
11809 pm_regexp_token_buffer_flush(parser, &token_buffer);
11810 LEX(PM_TOKEN_STRING_CONTENT);
11811 }
11812 case PM_LEX_STRING: {
11813 // First, we'll set to start of this token to be the current end.
11814 if (parser->next_start == NULL) {
11815 parser->current.start = parser->current.end;
11816 } else {
11817 parser->current.start = parser->next_start;
11818 parser->current.end = parser->next_start;
11819 parser->next_start = NULL;
11820 }
11821
11822 // We'll check if we're at the end of the file. If we are, then we need to
11823 // return the EOF token.
11824 if (parser->current.end >= parser->end) {
11825 LEX(PM_TOKEN_EOF);
11826 }
11827
11828 // These are the places where we need to split up the content of the
11829 // string. We'll use strpbrk to find the first of these characters.
11830 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11831 const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
11832 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11833
11834 // If we haven't found an escape yet, then this buffer will be
11835 // unallocated since we can refer directly to the source string.
11836 pm_token_buffer_t token_buffer = { 0 };
11837
11838 while (breakpoint != NULL) {
11839 // If we hit the incrementor, then we'll increment then nesting and
11840 // continue lexing.
11841 if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
11842 lex_mode->as.string.nesting++;
11843 parser->current.end = breakpoint + 1;
11844 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11845 continue;
11846 }
11847
11848 uint8_t term = lex_mode->as.string.terminator;
11849 bool is_terminator = (*breakpoint == term);
11850
11851 // If the terminator is newline, we need to consider \r\n _also_ a newline
11852 // For example: `%r\nfoo\r\n`
11853 // The string should be /foo/, not /foo\r/
11854 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
11855 if (term == '\n') {
11856 is_terminator = true;
11857 }
11858
11859 // If the terminator is a CR, but we see a CRLF, we need to
11860 // treat the CRLF as a newline, meaning this is _not_ the
11861 // terminator
11862 if (term == '\r') {
11863 is_terminator = false;
11864 }
11865 }
11866
11867 // Note that we have to check the terminator here first because we could
11868 // potentially be parsing a % string that has a # character as the
11869 // terminator.
11870 if (is_terminator) {
11871 // If this terminator doesn't actually close the string, then we need
11872 // to continue on past it.
11873 if (lex_mode->as.string.nesting > 0) {
11874 parser->current.end = breakpoint + 1;
11875 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11876 lex_mode->as.string.nesting--;
11877 continue;
11878 }
11879
11880 // Here we've hit the terminator. If we have already consumed content
11881 // then we need to return that content as string content first.
11882 if (breakpoint > parser->current.start) {
11883 parser->current.end = breakpoint;
11884 pm_token_buffer_flush(parser, &token_buffer);
11885 LEX(PM_TOKEN_STRING_CONTENT);
11886 }
11887
11888 // Otherwise we need to switch back to the parent lex mode and
11889 // return the end of the string.
11890 size_t eol_length = match_eol_at(parser, breakpoint);
11891 if (eol_length) {
11892 parser->current.end = breakpoint + eol_length;
11893
11894 // Track the newline if we're not in a heredoc that
11895 // would have already have added the newline to the
11896 // list.
11897 if (parser->heredoc_end == NULL) {
11898 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
11899 }
11900 } else {
11901 parser->current.end = breakpoint + 1;
11902 }
11903
11904 if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
11905 parser->current.end++;
11906 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
11907 lex_mode_pop(parser);
11908 LEX(PM_TOKEN_LABEL_END);
11909 }
11910
11911 // When the delimiter itself is a newline, we won't
11912 // get a chance to flush heredocs in the usual places since
11913 // the newline is already consumed.
11914 if (term == '\n' && parser->heredoc_end) {
11915 parser_flush_heredoc_end(parser);
11916 }
11917
11918 lex_state_set(parser, PM_LEX_STATE_END);
11919 lex_mode_pop(parser);
11920 LEX(PM_TOKEN_STRING_END);
11921 }
11922
11923 switch (*breakpoint) {
11924 case '\0':
11925 // Skip directly past the null character.
11926 parser->current.end = breakpoint + 1;
11927 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11928 break;
11929 case '\r':
11930 if (peek_at(parser, breakpoint + 1) != '\n') {
11931 parser->current.end = breakpoint + 1;
11932 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11933 break;
11934 }
11935
11936 // If we hit a \r\n sequence, then we need to treat it
11937 // as a newline.
11938 breakpoint++;
11939 parser->current.end = breakpoint;
11940 pm_token_buffer_escape(parser, &token_buffer);
11941 token_buffer.cursor = breakpoint;
11942
11944 case '\n':
11945 // When we hit a newline, we need to flush any potential
11946 // heredocs. Note that this has to happen after we check
11947 // for the terminator in case the terminator is a
11948 // newline character.
11949 if (parser->heredoc_end == NULL) {
11950 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1));
11951 parser->current.end = breakpoint + 1;
11952 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11953 break;
11954 }
11955
11956 parser->current.end = breakpoint + 1;
11957 parser_flush_heredoc_end(parser);
11958 pm_token_buffer_flush(parser, &token_buffer);
11959 LEX(PM_TOKEN_STRING_CONTENT);
11960 case '\\': {
11961 // Here we hit escapes.
11962 parser->current.end = breakpoint + 1;
11963
11964 // If we've hit the end of the file, then break out of
11965 // the loop by setting the breakpoint to NULL.
11966 if (parser->current.end == parser->end) {
11967 breakpoint = NULL;
11968 continue;
11969 }
11970
11971 pm_token_buffer_escape(parser, &token_buffer);
11972 uint8_t peeked = peek(parser);
11973
11974 switch (peeked) {
11975 case '\\':
11976 pm_token_buffer_push_byte(&token_buffer, '\\');
11977 parser->current.end++;
11978 break;
11979 case '\r':
11980 parser->current.end++;
11981 if (peek(parser) != '\n') {
11982 if (!lex_mode->as.string.interpolation) {
11983 pm_token_buffer_push_byte(&token_buffer, '\\');
11984 }
11985 pm_token_buffer_push_byte(&token_buffer, '\r');
11986 break;
11987 }
11989 case '\n':
11990 if (!lex_mode->as.string.interpolation) {
11991 pm_token_buffer_push_byte(&token_buffer, '\\');
11992 pm_token_buffer_push_byte(&token_buffer, '\n');
11993 }
11994
11995 if (parser->heredoc_end) {
11996 // ... if we are on the same line as a heredoc,
11997 // flush the heredoc and continue parsing after
11998 // heredoc_end.
11999 parser_flush_heredoc_end(parser);
12000 pm_token_buffer_copy(parser, &token_buffer);
12001 LEX(PM_TOKEN_STRING_CONTENT);
12002 } else {
12003 // ... else track the newline.
12004 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
12005 }
12006
12007 parser->current.end++;
12008 break;
12009 default:
12010 if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
12011 pm_token_buffer_push_byte(&token_buffer, peeked);
12012 parser->current.end++;
12013 } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
12014 pm_token_buffer_push_byte(&token_buffer, peeked);
12015 parser->current.end++;
12016 } else if (lex_mode->as.string.interpolation) {
12017 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12018 } else {
12019 pm_token_buffer_push_byte(&token_buffer, '\\');
12020 pm_token_buffer_push_escaped(&token_buffer, parser);
12021 }
12022
12023 break;
12024 }
12025
12026 token_buffer.cursor = parser->current.end;
12027 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12028 break;
12029 }
12030 case '#': {
12031 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12032
12033 if (!type) {
12034 // If we haven't returned at this point then we had something that
12035 // looked like an interpolated class or instance variable like "#@"
12036 // but wasn't actually. In this case we'll just skip to the next
12037 // breakpoint.
12038 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12039 break;
12040 }
12041
12042 if (type == PM_TOKEN_STRING_CONTENT) {
12043 pm_token_buffer_flush(parser, &token_buffer);
12044 }
12045
12046 LEX(type);
12047 }
12048 default:
12049 assert(false && "unreachable");
12050 }
12051 }
12052
12053 if (parser->current.end > parser->current.start) {
12054 pm_token_buffer_flush(parser, &token_buffer);
12055 LEX(PM_TOKEN_STRING_CONTENT);
12056 }
12057
12058 // If we've hit the end of the string, then this is an unterminated
12059 // string. In that case we'll return a string content token.
12060 parser->current.end = parser->end;
12061 pm_token_buffer_flush(parser, &token_buffer);
12062 LEX(PM_TOKEN_STRING_CONTENT);
12063 }
12064 case PM_LEX_HEREDOC: {
12065 // First, we'll set to start of this token.
12066 if (parser->next_start == NULL) {
12067 parser->current.start = parser->current.end;
12068 } else {
12069 parser->current.start = parser->next_start;
12070 parser->current.end = parser->next_start;
12071 parser->heredoc_end = NULL;
12072 parser->next_start = NULL;
12073 }
12074
12075 // Now let's grab the information about the identifier off of the
12076 // current lex mode.
12077 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12078 pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
12079
12080 bool line_continuation = lex_mode->as.heredoc.line_continuation;
12081 lex_mode->as.heredoc.line_continuation = false;
12082
12083 // We'll check if we're at the end of the file. If we are, then we
12084 // will add an error (because we weren't able to find the
12085 // terminator) but still continue parsing so that content after the
12086 // declaration of the heredoc can be parsed.
12087 if (parser->current.end >= parser->end) {
12088 pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
12089 parser->next_start = lex_mode->as.heredoc.next_start;
12090 parser->heredoc_end = parser->current.end;
12091 lex_state_set(parser, PM_LEX_STATE_END);
12092 lex_mode_pop(parser);
12093 LEX(PM_TOKEN_HEREDOC_END);
12094 }
12095
12096 const uint8_t *ident_start = heredoc_lex_mode->ident_start;
12097 size_t ident_length = heredoc_lex_mode->ident_length;
12098
12099 // If we are immediately following a newline and we have hit the
12100 // terminator, then we need to return the ending of the heredoc.
12101 if (current_token_starts_line(parser)) {
12102 const uint8_t *start = parser->current.start;
12103
12104 if (!line_continuation && (start + ident_length <= parser->end)) {
12105 const uint8_t *newline = next_newline(start, parser->end - start);
12106 const uint8_t *ident_end = newline;
12107 const uint8_t *terminator_end = newline;
12108
12109 if (newline == NULL) {
12110 terminator_end = parser->end;
12111 ident_end = parser->end;
12112 } else {
12113 terminator_end++;
12114 if (newline[-1] == '\r') {
12115 ident_end--; // Remove \r
12116 }
12117 }
12118
12119 const uint8_t *terminator_start = ident_end - ident_length;
12120 const uint8_t *cursor = start;
12121
12122 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12123 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12124 cursor++;
12125 }
12126 }
12127
12128 if (
12129 (cursor == terminator_start) &&
12130 (memcmp(terminator_start, ident_start, ident_length) == 0)
12131 ) {
12132 if (newline != NULL) {
12133 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
12134 }
12135
12136 parser->current.end = terminator_end;
12137 if (*lex_mode->as.heredoc.next_start == '\\') {
12138 parser->next_start = NULL;
12139 } else {
12140 parser->next_start = lex_mode->as.heredoc.next_start;
12141 parser->heredoc_end = parser->current.end;
12142 }
12143
12144 lex_state_set(parser, PM_LEX_STATE_END);
12145 lex_mode_pop(parser);
12146 LEX(PM_TOKEN_HEREDOC_END);
12147 }
12148 }
12149
12150 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
12151 if (
12152 heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
12153 lex_mode->as.heredoc.common_whitespace != NULL &&
12154 (*lex_mode->as.heredoc.common_whitespace > whitespace) &&
12155 peek_at(parser, start) != '\n'
12156 ) {
12157 *lex_mode->as.heredoc.common_whitespace = whitespace;
12158 }
12159 }
12160
12161 // Otherwise we'll be parsing string content. These are the places
12162 // where we need to split up the content of the heredoc. We'll use
12163 // strpbrk to find the first of these characters.
12164 uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE] = "\r\n\\#";
12165
12166 pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
12167 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12168 breakpoints[3] = '\0';
12169 }
12170
12171 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12172 pm_token_buffer_t token_buffer = { 0 };
12173 bool was_line_continuation = false;
12174
12175 while (breakpoint != NULL) {
12176 switch (*breakpoint) {
12177 case '\0':
12178 // Skip directly past the null character.
12179 parser->current.end = breakpoint + 1;
12180 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12181 break;
12182 case '\r':
12183 parser->current.end = breakpoint + 1;
12184
12185 if (peek_at(parser, breakpoint + 1) != '\n') {
12186 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12187 break;
12188 }
12189
12190 // If we hit a \r\n sequence, then we want to replace it
12191 // with a single \n character in the final string.
12192 breakpoint++;
12193 pm_token_buffer_escape(parser, &token_buffer);
12194 token_buffer.cursor = breakpoint;
12195
12197 case '\n': {
12198 if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
12199 parser_flush_heredoc_end(parser);
12200 parser->current.end = breakpoint + 1;
12201 pm_token_buffer_flush(parser, &token_buffer);
12202 LEX(PM_TOKEN_STRING_CONTENT);
12203 }
12204
12205 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1));
12206
12207 // If we have a - or ~ heredoc, then we can match after
12208 // some leading whitespace.
12209 const uint8_t *start = breakpoint + 1;
12210
12211 if (!was_line_continuation && (start + ident_length <= parser->end)) {
12212 // We want to match the terminator starting from the end of the line in case
12213 // there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
12214 const uint8_t *newline = next_newline(start, parser->end - start);
12215
12216 if (newline == NULL) {
12217 newline = parser->end;
12218 } else if (newline[-1] == '\r') {
12219 newline--; // Remove \r
12220 }
12221
12222 // Start of a possible terminator.
12223 const uint8_t *terminator_start = newline - ident_length;
12224
12225 // Cursor to check for the leading whitespace. We skip the
12226 // leading whitespace if we have a - or ~ heredoc.
12227 const uint8_t *cursor = start;
12228
12229 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12230 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12231 cursor++;
12232 }
12233 }
12234
12235 if (
12236 cursor == terminator_start &&
12237 (memcmp(terminator_start, ident_start, ident_length) == 0)
12238 ) {
12239 parser->current.end = breakpoint + 1;
12240 pm_token_buffer_flush(parser, &token_buffer);
12241 LEX(PM_TOKEN_STRING_CONTENT);
12242 }
12243 }
12244
12245 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
12246
12247 // If we have hit a newline that is followed by a valid
12248 // terminator, then we need to return the content of the
12249 // heredoc here as string content. Then, the next time a
12250 // token is lexed, it will match again and return the
12251 // end of the heredoc.
12252 if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
12253 if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
12254 *lex_mode->as.heredoc.common_whitespace = whitespace;
12255 }
12256
12257 parser->current.end = breakpoint + 1;
12258 pm_token_buffer_flush(parser, &token_buffer);
12259 LEX(PM_TOKEN_STRING_CONTENT);
12260 }
12261
12262 // Otherwise we hit a newline and it wasn't followed by
12263 // a terminator, so we can continue parsing.
12264 parser->current.end = breakpoint + 1;
12265 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12266 break;
12267 }
12268 case '\\': {
12269 // If we hit an escape, then we need to skip past
12270 // however many characters the escape takes up. However
12271 // it's important that if \n or \r\n are escaped, we
12272 // stop looping before the newline and not after the
12273 // newline so that we can still potentially find the
12274 // terminator of the heredoc.
12275 parser->current.end = breakpoint + 1;
12276
12277 // If we've hit the end of the file, then break out of
12278 // the loop by setting the breakpoint to NULL.
12279 if (parser->current.end == parser->end) {
12280 breakpoint = NULL;
12281 continue;
12282 }
12283
12284 pm_token_buffer_escape(parser, &token_buffer);
12285 uint8_t peeked = peek(parser);
12286
12287 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12288 switch (peeked) {
12289 case '\r':
12290 parser->current.end++;
12291 if (peek(parser) != '\n') {
12292 pm_token_buffer_push_byte(&token_buffer, '\\');
12293 pm_token_buffer_push_byte(&token_buffer, '\r');
12294 break;
12295 }
12297 case '\n':
12298 pm_token_buffer_push_byte(&token_buffer, '\\');
12299 pm_token_buffer_push_byte(&token_buffer, '\n');
12300 token_buffer.cursor = parser->current.end + 1;
12301 breakpoint = parser->current.end;
12302 continue;
12303 default:
12304 pm_token_buffer_push_byte(&token_buffer, '\\');
12305 pm_token_buffer_push_escaped(&token_buffer, parser);
12306 break;
12307 }
12308 } else {
12309 switch (peeked) {
12310 case '\r':
12311 parser->current.end++;
12312 if (peek(parser) != '\n') {
12313 pm_token_buffer_push_byte(&token_buffer, '\r');
12314 break;
12315 }
12317 case '\n':
12318 // If we are in a tilde here, we should
12319 // break out of the loop and return the
12320 // string content.
12321 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12322 const uint8_t *end = parser->current.end;
12323
12324 if (parser->heredoc_end == NULL) {
12325 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(end - parser->start + 1));
12326 }
12327
12328 // Here we want the buffer to only
12329 // include up to the backslash.
12330 parser->current.end = breakpoint;
12331 pm_token_buffer_flush(parser, &token_buffer);
12332
12333 // Now we can advance the end of the
12334 // token past the newline.
12335 parser->current.end = end + 1;
12336 lex_mode->as.heredoc.line_continuation = true;
12337 LEX(PM_TOKEN_STRING_CONTENT);
12338 }
12339
12340 was_line_continuation = true;
12341 token_buffer.cursor = parser->current.end + 1;
12342 breakpoint = parser->current.end;
12343 continue;
12344 default:
12345 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12346 break;
12347 }
12348 }
12349
12350 token_buffer.cursor = parser->current.end;
12351 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12352 break;
12353 }
12354 case '#': {
12355 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12356
12357 if (!type) {
12358 // If we haven't returned at this point then we had
12359 // something that looked like an interpolated class
12360 // or instance variable like "#@" but wasn't
12361 // actually. In this case we'll just skip to the
12362 // next breakpoint.
12363 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12364 break;
12365 }
12366
12367 if (type == PM_TOKEN_STRING_CONTENT) {
12368 pm_token_buffer_flush(parser, &token_buffer);
12369 }
12370
12371 LEX(type);
12372 }
12373 default:
12374 assert(false && "unreachable");
12375 }
12376
12377 was_line_continuation = false;
12378 }
12379
12380 if (parser->current.end > parser->current.start) {
12381 parser->current.end = parser->end;
12382 pm_token_buffer_flush(parser, &token_buffer);
12383 LEX(PM_TOKEN_STRING_CONTENT);
12384 }
12385
12386 // If we've hit the end of the string, then this is an unterminated
12387 // heredoc. In that case we'll return a string content token.
12388 parser->current.end = parser->end;
12389 pm_token_buffer_flush(parser, &token_buffer);
12390 LEX(PM_TOKEN_STRING_CONTENT);
12391 }
12392 }
12393
12394 assert(false && "unreachable");
12395}
12396
12397#undef LEX
12398
12399/******************************************************************************/
12400/* Parse functions */
12401/******************************************************************************/
12402
12411typedef enum {
12412 PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
12413 PM_BINDING_POWER_STATEMENT = 2,
12414 PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
12415 PM_BINDING_POWER_MODIFIER = 6, // if unless until while
12416 PM_BINDING_POWER_COMPOSITION = 8, // and or
12417 PM_BINDING_POWER_NOT = 10, // not
12418 PM_BINDING_POWER_MATCH = 12, // => in
12419 PM_BINDING_POWER_DEFINED = 14, // defined?
12420 PM_BINDING_POWER_MULTI_ASSIGNMENT = 16, // =
12421 PM_BINDING_POWER_ASSIGNMENT = 18, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
12422 PM_BINDING_POWER_TERNARY = 20, // ?:
12423 PM_BINDING_POWER_RANGE = 22, // .. ...
12424 PM_BINDING_POWER_LOGICAL_OR = 24, // ||
12425 PM_BINDING_POWER_LOGICAL_AND = 26, // &&
12426 PM_BINDING_POWER_EQUALITY = 28, // <=> == === != =~ !~
12427 PM_BINDING_POWER_COMPARISON = 30, // > >= < <=
12428 PM_BINDING_POWER_BITWISE_OR = 32, // | ^
12429 PM_BINDING_POWER_BITWISE_AND = 34, // &
12430 PM_BINDING_POWER_SHIFT = 36, // << >>
12431 PM_BINDING_POWER_TERM = 38, // + -
12432 PM_BINDING_POWER_FACTOR = 40, // * / %
12433 PM_BINDING_POWER_UMINUS = 42, // -@
12434 PM_BINDING_POWER_EXPONENT = 44, // **
12435 PM_BINDING_POWER_UNARY = 46, // ! ~ +@
12436 PM_BINDING_POWER_INDEX = 48, // [] []=
12437 PM_BINDING_POWER_CALL = 50, // :: .
12438 PM_BINDING_POWER_MAX = 52
12439} pm_binding_power_t;
12440
12445typedef struct {
12447 pm_binding_power_t left;
12448
12450 pm_binding_power_t right;
12451
12454
12461
12462#define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
12463#define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
12464#define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
12465#define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
12466#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
12467
12468pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
12469 // rescue
12470 [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = { PM_BINDING_POWER_MODIFIER_RESCUE, PM_BINDING_POWER_COMPOSITION, true, false },
12471
12472 // if unless until while
12473 [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12474 [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12475 [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12476 [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12477
12478 // and or
12479 [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12480 [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12481
12482 // => in
12483 [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12484 [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12485
12486 // &&= &= ^= = >>= <<= -= %= |= ||= += /= *= **=
12487 [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12488 [PM_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12489 [PM_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
12490 [PM_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT,
12491 [PM_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT,
12492 [PM_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12493 [PM_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12494 [PM_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT,
12495 [PM_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12496 [PM_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12497 [PM_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12498 [PM_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT,
12499 [PM_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12500 [PM_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12501
12502 // ?:
12503 [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
12504
12505 // .. ...
12506 [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12507 [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12508 [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12509 [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12510
12511 // ||
12512 [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
12513
12514 // &&
12515 [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
12516
12517 // != !~ == === =~ <=>
12518 [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12519 [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12520 [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12521 [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12522 [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12523 [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12524
12525 // > >= < <=
12526 [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12527 [PM_TOKEN_GREATER_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12528 [PM_TOKEN_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12529 [PM_TOKEN_LESS_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12530
12531 // ^ |
12532 [PM_TOKEN_CARET] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12533 [PM_TOKEN_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12534
12535 // &
12536 [PM_TOKEN_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_AND),
12537
12538 // >> <<
12539 [PM_TOKEN_GREATER_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12540 [PM_TOKEN_LESS_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12541
12542 // - +
12543 [PM_TOKEN_MINUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12544 [PM_TOKEN_PLUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12545
12546 // % / *
12547 [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12548 [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12549 [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12550 [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
12551
12552 // -@
12553 [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
12554 [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false, false },
12555
12556 // **
12557 [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
12558 [PM_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12559
12560 // ! ~ +@
12561 [PM_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12562 [PM_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12563 [PM_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12564
12565 // [
12566 [PM_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_INDEX),
12567
12568 // :: . &.
12569 [PM_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
12570 [PM_TOKEN_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
12571 [PM_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL)
12572};
12573
12574#undef BINDING_POWER_ASSIGNMENT
12575#undef LEFT_ASSOCIATIVE
12576#undef RIGHT_ASSOCIATIVE
12577#undef RIGHT_ASSOCIATIVE_UNARY
12578
12582static PRISM_INLINE bool
12583match1(const pm_parser_t *parser, pm_token_type_t type) {
12584 return parser->current.type == type;
12585}
12586
12590static PRISM_INLINE bool
12591match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
12592 return match1(parser, type1) || match1(parser, type2);
12593}
12594
12598static PRISM_INLINE bool
12599match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
12600 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
12601}
12602
12606static PRISM_INLINE bool
12607match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
12608 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
12609}
12610
12614static PRISM_INLINE bool
12615match6(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6) {
12616 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6);
12617}
12618
12622static PRISM_INLINE bool
12623match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
12624 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
12625}
12626
12630static PRISM_INLINE bool
12631match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
12632 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
12633}
12634
12641static bool
12642accept1(pm_parser_t *parser, pm_token_type_t type) {
12643 if (match1(parser, type)) {
12644 parser_lex(parser);
12645 return true;
12646 }
12647 return false;
12648}
12649
12654static PRISM_INLINE bool
12655accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
12656 if (match2(parser, type1, type2)) {
12657 parser_lex(parser);
12658 return true;
12659 }
12660 return false;
12661}
12662
12674static void
12675expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
12676 if (accept1(parser, type)) return;
12677
12678 const uint8_t *location = parser->previous.end;
12679 pm_parser_err(parser, U32(location - parser->start), 0, diag_id);
12680
12681 parser->previous.start = location;
12682 parser->previous.type = 0;
12683}
12684
12689static void
12690expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_diagnostic_id_t diag_id) {
12691 if (accept2(parser, type1, type2)) return;
12692
12693 const uint8_t *location = parser->previous.end;
12694 pm_parser_err(parser, U32(location - parser->start), 0, diag_id);
12695
12696 parser->previous.start = location;
12697 parser->previous.type = 0;
12698}
12699
12704static void
12705expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
12706 if (match1(parser, PM_TOKEN_HEREDOC_END)) {
12707 parser_lex(parser);
12708 } else {
12709 pm_parser_err_heredoc_term(parser, ident_start, ident_length);
12710 parser->previous.start = parser->previous.end;
12711 parser->previous.type = 0;
12712 }
12713}
12714
12721static void
12722expect1_opening(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id, const pm_token_t *opening) {
12723 if (accept1(parser, type)) return;
12724
12725 const uint8_t *start = opening->start;
12726 pm_parser_err(parser, U32(start - parser->start), U32(opening->end - start), diag_id);
12727
12728 parser->previous.start = parser->previous.end;
12729 parser->previous.type = 0;
12730}
12731
12733#define PM_PARSE_ACCEPTS_COMMAND_CALL ((uint8_t) 0x1)
12734#define PM_PARSE_ACCEPTS_LABEL ((uint8_t) 0x2)
12735#define PM_PARSE_ACCEPTS_DO_BLOCK ((uint8_t) 0x4)
12736#define PM_PARSE_IN_ENDLESS_DEF ((uint8_t) 0x8)
12737
12738static pm_node_t *
12739parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
12740
12745static pm_node_t *
12746parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
12747 pm_node_t *node = parse_expression(parser, binding_power, flags, diag_id, depth);
12748 pm_assert_value_expression(parser, node);
12749 return node;
12750}
12751
12770static PRISM_INLINE bool
12771token_begins_expression_p(pm_token_type_t type) {
12772 switch (type) {
12773 case PM_TOKEN_EQUAL_GREATER:
12774 case PM_TOKEN_KEYWORD_IN:
12775 // We need to special case this because it is a binary operator that
12776 // should not be marked as beginning an expression.
12777 return false;
12778 case PM_TOKEN_BRACE_RIGHT:
12779 case PM_TOKEN_BRACKET_RIGHT:
12780 case PM_TOKEN_COLON:
12781 case PM_TOKEN_COMMA:
12782 case PM_TOKEN_EMBEXPR_END:
12783 case PM_TOKEN_EOF:
12784 case PM_TOKEN_LAMBDA_BEGIN:
12785 case PM_TOKEN_KEYWORD_DO:
12786 case PM_TOKEN_KEYWORD_DO_BLOCK:
12787 case PM_TOKEN_KEYWORD_DO_LOOP:
12788 case PM_TOKEN_KEYWORD_END:
12789 case PM_TOKEN_KEYWORD_ELSE:
12790 case PM_TOKEN_KEYWORD_ELSIF:
12791 case PM_TOKEN_KEYWORD_ENSURE:
12792 case PM_TOKEN_KEYWORD_THEN:
12793 case PM_TOKEN_KEYWORD_RESCUE:
12794 case PM_TOKEN_KEYWORD_WHEN:
12795 case PM_TOKEN_NEWLINE:
12796 case PM_TOKEN_PARENTHESIS_RIGHT:
12797 case PM_TOKEN_SEMICOLON:
12798 // The reason we need this short-circuit is because we're using the
12799 // binding powers table to tell us if the subsequent token could
12800 // potentially be the start of an expression. If there _is_ a binding
12801 // power for one of these tokens, then we should remove it from this list
12802 // and let it be handled by the default case below.
12803 assert(pm_binding_powers[type].left == PM_BINDING_POWER_UNSET);
12804 return false;
12805 case PM_TOKEN_UAMPERSAND:
12806 // This is a special case because this unary operator cannot appear
12807 // as a general operator, it only appears in certain circumstances.
12808 return false;
12809 case PM_TOKEN_UCOLON_COLON:
12810 case PM_TOKEN_UMINUS:
12811 case PM_TOKEN_UMINUS_NUM:
12812 case PM_TOKEN_UPLUS:
12813 case PM_TOKEN_BANG:
12814 case PM_TOKEN_TILDE:
12815 case PM_TOKEN_UDOT_DOT:
12816 case PM_TOKEN_UDOT_DOT_DOT:
12817 // These unary tokens actually do have binding power associated with them
12818 // so that we can correctly place them into the precedence order. But we
12819 // want them to be marked as beginning an expression, so we need to
12820 // special case them here.
12821 return true;
12822 default:
12823 return pm_binding_powers[type].left == PM_BINDING_POWER_UNSET;
12824 }
12825}
12826
12831static pm_node_t *
12832parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
12833 if (accept1(parser, PM_TOKEN_USTAR)) {
12834 pm_token_t operator = parser->previous;
12835 pm_node_t *expression = parse_value_expression(parser, binding_power, (uint8_t) (flags & PM_PARSE_ACCEPTS_DO_BLOCK), PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
12836 return UP(pm_splat_node_create(parser, &operator, expression));
12837 }
12838
12839 return parse_value_expression(parser, binding_power, flags, diag_id, depth);
12840}
12841
12842static bool
12843pm_node_unreference_each(const pm_node_t *node, void *data) {
12844 switch (PM_NODE_TYPE(node)) {
12845 /* When we are about to destroy a set of nodes that could potentially
12846 * contain block exits for the current scope, we need to check if they
12847 * are contained in the list of block exits and remove them if they are.
12848 */
12849 case PM_BREAK_NODE:
12850 case PM_NEXT_NODE:
12851 case PM_REDO_NODE: {
12852 pm_parser_t *parser = (pm_parser_t *) data;
12853 size_t index = 0;
12854
12855 while (index < parser->current_block_exits->size) {
12856 pm_node_t *block_exit = parser->current_block_exits->nodes[index];
12857
12858 if (block_exit == node) {
12859 if (index + 1 < parser->current_block_exits->size) {
12860 memmove(
12861 &parser->current_block_exits->nodes[index],
12862 &parser->current_block_exits->nodes[index + 1],
12863 (parser->current_block_exits->size - index - 1) * sizeof(pm_node_t *)
12864 );
12865 }
12866 parser->current_block_exits->size--;
12867
12868 /* Note returning true here because these nodes could have
12869 * arguments that are themselves block exits. */
12870 return true;
12871 }
12872
12873 index++;
12874 }
12875
12876 return true;
12877 }
12878 /* When an implicit local variable is written to or targeted, it becomes
12879 * a regular, named local variable. This branch removes it from the list
12880 * of implicit parameters when that happens. */
12881 case PM_LOCAL_VARIABLE_READ_NODE:
12882 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12883 pm_parser_t *parser = (pm_parser_t *) data;
12884 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
12885
12886 for (size_t index = 0; index < implicit_parameters->size; index++) {
12887 if (implicit_parameters->nodes[index] == node) {
12888 /* If the node is not the last one in the list, we need to
12889 * shift the remaining nodes down to fill the gap. This is
12890 * extremely unlikely to happen. */
12891 if (index != implicit_parameters->size - 1) {
12892 memmove(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
12893 }
12894
12895 implicit_parameters->size--;
12896 break;
12897 }
12898 }
12899
12900 return false;
12901 }
12902 default:
12903 return true;
12904 }
12905}
12906
12912static void
12913pm_node_unreference(pm_parser_t *parser, const pm_node_t *node) {
12914 pm_visit_node(node, pm_node_unreference_each, parser);
12915}
12916
12921static void
12922parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
12923 // The method name needs to change. If we previously had
12924 // foo, we now need foo=. In this case we'll allocate a new
12925 // owned string, copy the previous method name in, and
12926 // append an =.
12927 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
12928 size_t length = constant->length;
12929 uint8_t *name = (uint8_t *) pm_arena_alloc(parser->arena, length + 1, 1);
12930
12931 memcpy(name, constant->start, length);
12932 name[length] = '=';
12933
12934 *name_field = pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, name, length + 1);
12935}
12936
12943static pm_node_t *
12944parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
12945 switch (PM_NODE_TYPE(target)) {
12946 case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
12947 case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
12948 case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
12949 case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
12950 case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
12951 case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
12952 case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
12953 default: break;
12954 }
12955
12956 pm_constant_id_t name = pm_parser_constant_id_raw(parser, parser->start + PM_NODE_START(target), parser->start + PM_NODE_END(target));
12957 pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
12958
12959 return UP(result);
12960}
12961
12970static pm_node_t *
12971parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
12972 switch (PM_NODE_TYPE(target)) {
12973 case PM_ERROR_RECOVERY_NODE:
12974 return target;
12975 case PM_SOURCE_ENCODING_NODE:
12976 case PM_FALSE_NODE:
12977 case PM_SOURCE_FILE_NODE:
12978 case PM_SOURCE_LINE_NODE:
12979 case PM_NIL_NODE:
12980 case PM_SELF_NODE:
12981 case PM_TRUE_NODE: {
12982 // In these special cases, we have specific error messages and we
12983 // will replace them with local variable writes.
12984 return parse_unwriteable_target(parser, target);
12985 }
12986 case PM_CLASS_VARIABLE_READ_NODE:
12988 target->type = PM_CLASS_VARIABLE_TARGET_NODE;
12989 return target;
12990 case PM_CONSTANT_PATH_NODE:
12991 if (context_def_p(parser)) {
12992 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
12993 }
12994
12996 target->type = PM_CONSTANT_PATH_TARGET_NODE;
12997
12998 return target;
12999 case PM_CONSTANT_READ_NODE:
13000 if (context_def_p(parser)) {
13001 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13002 }
13003
13004 assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
13005 target->type = PM_CONSTANT_TARGET_NODE;
13006
13007 return target;
13008 case PM_BACK_REFERENCE_READ_NODE:
13009 case PM_NUMBERED_REFERENCE_READ_NODE:
13010 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13011 return UP(pm_error_recovery_node_create_unexpected(parser, target));
13012 case PM_GLOBAL_VARIABLE_READ_NODE:
13014 target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
13015 return target;
13016 case PM_LOCAL_VARIABLE_READ_NODE: {
13017 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(target), PM_NODE_LENGTH(target))) {
13018 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(target), PM_NODE_LENGTH(target), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(target));
13019 pm_node_unreference(parser, target);
13020 }
13021
13022 const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
13023 uint32_t name = cast->name;
13024 uint32_t depth = cast->depth;
13025 pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
13026
13028 target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
13029
13030 return target;
13031 }
13032 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
13033 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13034 pm_node_t *node = UP(pm_local_variable_target_node_create(parser, &target->location, name, 0));
13035
13036 pm_node_unreference(parser, target);
13037
13038 return node;
13039 }
13040 case PM_INSTANCE_VARIABLE_READ_NODE:
13042 target->type = PM_INSTANCE_VARIABLE_TARGET_NODE;
13043 return target;
13044 case PM_MULTI_TARGET_NODE:
13045 if (splat_parent) {
13046 // Multi target is not accepted in all positions. If this is one
13047 // of them, then we need to add an error.
13048 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13049 }
13050
13051 return target;
13052 case PM_SPLAT_NODE: {
13053 pm_splat_node_t *splat = (pm_splat_node_t *) target;
13054
13055 if (splat->expression != NULL) {
13056 splat->expression = parse_target(parser, splat->expression, multiple, true);
13057 }
13058
13059 return UP(splat);
13060 }
13061 case PM_CALL_NODE: {
13062 pm_call_node_t *call = (pm_call_node_t *) target;
13063
13064 // If we have no arguments to the call node and we need this to be a
13065 // target then this is either a method call or a local variable
13066 // write.
13067 if (
13068 (call->message_loc.length > 0) &&
13069 (parser->start[call->message_loc.start + call->message_loc.length - 1] != '!') &&
13070 (parser->start[call->message_loc.start + call->message_loc.length - 1] != '?') &&
13071 (call->opening_loc.length == 0) &&
13072 (call->arguments == NULL) &&
13073 (call->block == NULL)
13074 ) {
13075 if (call->receiver == NULL) {
13076 // When we get here, we have a local variable write, because it
13077 // was previously marked as a method call but now we have an =.
13078 // This looks like:
13079 //
13080 // foo = 1
13081 //
13082 // When it was parsed in the prefix position, foo was seen as a
13083 // method call with no receiver and no arguments. Now we have an
13084 // =, so we know it's a local variable write.
13085 pm_location_t message_loc = call->message_loc;
13086 pm_constant_id_t name = pm_parser_local_add_location(parser, &message_loc, 0);
13087
13088 return UP(pm_local_variable_target_node_create(parser, &message_loc, name, 0));
13089 }
13090
13091 if (peek_at(parser, parser->start + call->message_loc.start) == '_' || parser->encoding->alnum_char(parser->start + call->message_loc.start, (ptrdiff_t) call->message_loc.length)) {
13092 if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
13093 pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
13094 }
13095
13096 parse_write_name(parser, &call->name);
13097 return UP(pm_call_target_node_create(parser, call));
13098 }
13099 }
13100
13101 // If there is no call operator and the message is "[]" then this is
13102 // an aref expression, and we can transform it into an aset
13103 // expression.
13104 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13105 return UP(pm_index_target_node_create(parser, call));
13106 }
13107 }
13109 default:
13110 // In this case we have a node that we don't know how to convert
13111 // into a target. We need to treat it as an error. For now, we'll
13112 // mark it as an error and just skip right past it.
13113 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13114 return target;
13115 }
13116}
13117
13122static pm_node_t *
13123parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13124 pm_node_t *result = parse_target(parser, target, multiple, false);
13125
13126 // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
13127 // parens after the targets.
13128 if (
13129 !match1(parser, PM_TOKEN_EQUAL) &&
13130 !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
13131 !(context_p(parser, PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
13132 ) {
13133 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13134 }
13135
13136 return result;
13137}
13138
13143static pm_node_t *
13144parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
13145 pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
13146
13147 if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
13148 return UP(pm_shareable_constant_node_create(parser, write, shareable_constant));
13149 }
13150
13151 return write;
13152}
13153
13157static pm_node_t *
13158parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
13159 switch (PM_NODE_TYPE(target)) {
13160 case PM_ERROR_RECOVERY_NODE:
13161 return target;
13162 case PM_CLASS_VARIABLE_READ_NODE: {
13163 pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
13164 return UP(node);
13165 }
13166 case PM_CONSTANT_PATH_NODE: {
13167 pm_node_t *node = UP(pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value));
13168
13169 if (context_def_p(parser)) {
13170 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13171 }
13172
13173 return parse_shareable_constant_write(parser, node);
13174 }
13175 case PM_CONSTANT_READ_NODE: {
13176 pm_node_t *node = UP(pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value));
13177
13178 if (context_def_p(parser)) {
13179 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13180 }
13181
13182 return parse_shareable_constant_write(parser, node);
13183 }
13184 case PM_BACK_REFERENCE_READ_NODE:
13185 case PM_NUMBERED_REFERENCE_READ_NODE:
13186 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13188 case PM_GLOBAL_VARIABLE_READ_NODE: {
13189 pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
13190 return UP(node);
13191 }
13192 case PM_LOCAL_VARIABLE_READ_NODE: {
13194
13195 pm_location_t location = target->location;
13196 pm_constant_id_t name = local_read->name;
13197 uint32_t depth = local_read->depth;
13198 pm_scope_t *scope = pm_parser_scope_find(parser, depth);
13199
13200 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(target), PM_NODE_LENGTH(target))) {
13201 pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
13202 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(target), PM_NODE_LENGTH(target), diag_id, parser->start + PM_NODE_START(target));
13203 pm_node_unreference(parser, target);
13204 }
13205
13206 pm_locals_unread(&scope->locals, name);
13207
13208 return UP(pm_local_variable_write_node_create(parser, name, depth, value, &location, operator));
13209 }
13210 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
13211 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13212 pm_node_t *node = UP(pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator));
13213
13214 pm_node_unreference(parser, target);
13215
13216 return node;
13217 }
13218 case PM_INSTANCE_VARIABLE_READ_NODE: {
13219 pm_node_t *write_node = UP(pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value));
13220 return write_node;
13221 }
13222 case PM_MULTI_TARGET_NODE:
13223 return UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value));
13224 case PM_SPLAT_NODE: {
13225 pm_splat_node_t *splat = (pm_splat_node_t *) target;
13226
13227 if (splat->expression != NULL) {
13228 splat->expression = parse_write(parser, splat->expression, operator, value);
13229 }
13230
13231 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
13232 pm_multi_target_node_targets_append(parser, multi_target, UP(splat));
13233
13234 return UP(pm_multi_write_node_create(parser, multi_target, operator, value));
13235 }
13236 case PM_CALL_NODE: {
13237 pm_call_node_t *call = (pm_call_node_t *) target;
13238
13239 // If we have no arguments to the call node and we need this to be a
13240 // target then this is either a method call or a local variable
13241 // write.
13242 if (
13243 (call->message_loc.length > 0) &&
13244 (parser->start[call->message_loc.start + call->message_loc.length - 1] != '!') &&
13245 (parser->start[call->message_loc.start + call->message_loc.length - 1] != '?') &&
13246 (call->opening_loc.length == 0) &&
13247 (call->arguments == NULL) &&
13248 (call->block == NULL)
13249 ) {
13250 if (call->receiver == NULL) {
13251 // When we get here, we have a local variable write, because it
13252 // was previously marked as a method call but now we have an =.
13253 // This looks like:
13254 //
13255 // foo = 1
13256 //
13257 // When it was parsed in the prefix position, foo was seen as a
13258 // method call with no receiver and no arguments. Now we have an
13259 // =, so we know it's a local variable write.
13260 pm_location_t message_loc = call->message_loc;
13261
13262 pm_refute_numbered_parameter(parser, message_loc.start, message_loc.length);
13263 pm_parser_local_add_location(parser, &message_loc, 0);
13264
13265 pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, parser->start + PM_LOCATION_START(&message_loc), parser->start + PM_LOCATION_END(&message_loc));
13266 target = UP(pm_local_variable_write_node_create(parser, constant_id, 0, value, &message_loc, operator));
13267
13268 return target;
13269 }
13270
13271 if (char_is_identifier_start(parser, parser->start + call->message_loc.start, (ptrdiff_t) call->message_loc.length)) {
13272 // When we get here, we have a method call, because it was
13273 // previously marked as a method call but now we have an =. This
13274 // looks like:
13275 //
13276 // foo.bar = 1
13277 //
13278 // When it was parsed in the prefix position, foo.bar was seen as a
13279 // method call with no arguments. Now we have an =, so we know it's
13280 // a method call with an argument. In this case we will create the
13281 // arguments node, parse the argument, and add it to the list.
13282 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
13283 call->arguments = arguments;
13284
13285 pm_arguments_node_arguments_append(parser->arena, arguments, value);
13286 PM_NODE_LENGTH_SET_NODE(call, arguments);
13287 call->equal_loc = TOK2LOC(parser, operator);
13288
13289 parse_write_name(parser, &call->name);
13290 pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13291
13292 return UP(call);
13293 }
13294 }
13295
13296 // If there is no call operator and the message is "[]" then this is
13297 // an aref expression, and we can transform it into an aset
13298 // expression.
13299 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13300 if (call->arguments == NULL) {
13301 call->arguments = pm_arguments_node_create(parser);
13302 }
13303
13304 pm_arguments_node_arguments_append(parser->arena, call->arguments, value);
13305 PM_NODE_LENGTH_SET_NODE(target, value);
13306
13307 // Replace the name with "[]=".
13308 call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
13309 call->equal_loc = TOK2LOC(parser, operator);
13310
13311 // Ensure that the arguments for []= don't contain keywords
13312 pm_index_arguments_check(parser, call->arguments, call->block);
13313 pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13314
13315 return target;
13316 }
13317
13318 // If there are arguments on the call node, then it can't be a
13319 // method call ending with = or a local variable write, so it must
13320 // be a syntax error. In this case we'll fall through to our default
13321 // handling. We need to free the value that we parsed because there
13322 // is no way for us to attach it to the tree at this point.
13323 //
13324 // Since it is possible for the value to contain an implicit
13325 // parameter somewhere in its subtree, we need to walk it and remove
13326 // any implicit parameters from the list of implicit parameters for
13327 // the current scope.
13328 pm_node_unreference(parser, value);
13329 }
13331 default:
13332 // In this case we have a node that we don't know how to convert into a
13333 // target. We need to treat it as an error. For now, we'll mark it as an
13334 // error and just skip right past it.
13335 pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
13336 return target;
13337 }
13338}
13339
13346static pm_node_t *
13347parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t *equals, pm_node_t *value) {
13348 switch (PM_NODE_TYPE(target)) {
13349 case PM_SOURCE_ENCODING_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13350 case PM_FALSE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13351 case PM_SOURCE_FILE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13352 case PM_SOURCE_LINE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13353 case PM_NIL_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13354 case PM_SELF_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13355 case PM_TRUE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13356 default: break;
13357 }
13358
13359 pm_constant_id_t name = pm_parser_local_add_location(parser, &target->location, 1);
13360 pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
13361
13362 return UP(result);
13363}
13364
13375static pm_node_t *
13376parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13377 bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13378
13379 pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13380 pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
13381
13382 while (accept1(parser, PM_TOKEN_COMMA)) {
13383 if (accept1(parser, PM_TOKEN_USTAR)) {
13384 // Here we have a splat operator. It can have a name or be
13385 // anonymous. It can be the final target or be in the middle if
13386 // there haven't been any others yet.
13387 if (has_rest) {
13388 pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
13389 }
13390
13391 pm_token_t star_operator = parser->previous;
13392 pm_node_t *name = NULL;
13393
13394 if (token_begins_expression_p(parser->current.type)) {
13395 name = parse_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13396 name = parse_target(parser, name, true, true);
13397 }
13398
13399 pm_node_t *splat = UP(pm_splat_node_create(parser, &star_operator, name));
13400 pm_multi_target_node_targets_append(parser, result, splat);
13401 has_rest = true;
13402 } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13403 context_push(parser, PM_CONTEXT_MULTI_TARGET);
13404 pm_node_t *target = parse_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13405 target = parse_target(parser, target, true, false);
13406
13407 pm_multi_target_node_targets_append(parser, result, target);
13408 context_pop(parser);
13409 } else if (token_begins_expression_p(parser->current.type)) {
13410 pm_node_t *target = parse_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13411 target = parse_target(parser, target, true, false);
13412
13413 pm_multi_target_node_targets_append(parser, result, target);
13414 } else if (!match1(parser, PM_TOKEN_EOF)) {
13415 // If we get here, then we have a trailing , in a multi target node.
13416 // We'll add an implicit rest node to represent this.
13417 pm_node_t *rest = UP(pm_implicit_rest_node_create(parser, &parser->previous));
13418 pm_multi_target_node_targets_append(parser, result, rest);
13419 break;
13420 }
13421 }
13422
13423 return UP(result);
13424}
13425
13430static pm_node_t *
13431parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13432 pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
13433
13434 // If we're inside parentheses, then we allow a newline before the
13435 // closing parenthesis or equals sign. Outside of parentheses, a newline
13436 // is not allowed (e.g., `a, b\n= 1, 2` is not valid).
13437 if (context_p(parser, PM_CONTEXT_PARENS) || context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
13438 accept1(parser, PM_TOKEN_NEWLINE);
13439 }
13440
13441 // Ensure that we have either an = or a ) after the targets.
13442 if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
13443 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13444 }
13445
13446 return result;
13447}
13448
13452static pm_statements_node_t *
13453parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
13454 // First, skip past any optional terminators that might be at the beginning
13455 // of the statements.
13456 while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13457
13458 // If we have a terminator, then we can just return NULL.
13459 if (context_terminator(context, &parser->current)) return NULL;
13460
13461 pm_statements_node_t *statements = pm_statements_node_create(parser);
13462
13463 // At this point we know we have at least one statement, and that it
13464 // immediately follows the current token.
13465 context_push(parser, context);
13466
13467 while (true) {
13468 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
13469 pm_statements_node_body_append(parser, statements, node, true);
13470
13471 // If we're recovering from a syntax error, then we need to stop parsing
13472 // the statements now.
13473 if (parser->recovering) {
13474 // If this is the level of context where the recovery has happened,
13475 // then we can mark the parser as done recovering.
13476 if (context_terminator(context, &parser->current)) parser->recovering = false;
13477 break;
13478 }
13479
13480 // If we have a terminator, then we will parse all consecutive
13481 // terminators and then continue parsing the statements list.
13482 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13483 // If we have a terminator, then we will continue parsing the
13484 // statements list.
13485 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13486 if (context_terminator(context, &parser->current)) break;
13487
13488 // Now we can continue parsing the list of statements.
13489 continue;
13490 }
13491
13492 // At this point we have a list of statements that are not terminated by
13493 // a newline or semicolon. At this point we need to check if we're at
13494 // the end of the statements list. If we are, then we should break out
13495 // of the loop.
13496 if (context_terminator(context, &parser->current)) break;
13497
13498 // At this point, we have a syntax error, because the statement was not
13499 // terminated by a newline or semicolon, and we're not at the end of the
13500 // statements list. Ideally we should scan forward to determine if we
13501 // should insert a missing terminator or break out of parsing the
13502 // statements list at this point.
13503 //
13504 // We don't have that yet, so instead we'll do a more naive approach. If
13505 // we were unable to parse an expression, then we will skip past this
13506 // token and continue parsing the statements list. Otherwise we'll add
13507 // an error and continue parsing the statements list.
13508 if (PM_NODE_TYPE_P(node, PM_ERROR_RECOVERY_NODE)) {
13509 parser_lex(parser);
13510
13511 // If we are at the end of the file, then we need to stop parsing
13512 // the statements entirely at this point. Mark the parser as
13513 // recovering, as we know that EOF closes the top-level context, and
13514 // then break out of the loop.
13515 if (match1(parser, PM_TOKEN_EOF)) {
13516 parser->recovering = true;
13517 break;
13518 }
13519
13520 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13521 if (context_terminator(context, &parser->current)) break;
13522 } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
13523 // This is an inlined version of accept1 because the error that we
13524 // want to add has varargs. If this happens again, we should
13525 // probably extract a helper function.
13526 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
13527 parser->previous.start = parser->previous.end;
13528 parser->previous.type = 0;
13529 }
13530 }
13531
13532 context_pop(parser);
13533
13534 bool last_value = true;
13535 switch (context) {
13536 case PM_CONTEXT_BEGIN_ENSURE:
13537 case PM_CONTEXT_DEF_ENSURE:
13538 last_value = false;
13539 break;
13540 default:
13541 break;
13542 }
13543 pm_void_statements_check(parser, statements, last_value);
13544
13545 return statements;
13546}
13547
13552static void
13553pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13554 const pm_node_t *duplicated = pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, literals, node, true);
13555
13556 if (duplicated != NULL) {
13557 pm_buffer_t buffer = { 0 };
13558 pm_static_literal_inspect(&buffer, &parser->line_offsets, parser->start, parser->start_line, parser->encoding->name, duplicated);
13559
13560 pm_diagnostic_list_append_format(
13561 &parser->metadata_arena,
13562 &parser->warning_list,
13563 duplicated->location.start,
13564 duplicated->location.length,
13565 PM_WARN_DUPLICATED_HASH_KEY,
13566 (int) pm_buffer_length(&buffer),
13567 pm_buffer_value(&buffer),
13568 pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(node), parser->start_line).line
13569 );
13570
13571 pm_buffer_cleanup(&buffer);
13572 }
13573}
13574
13579static void
13580pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13581 pm_node_t *previous;
13582
13583 if ((previous = pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, literals, node, false)) != NULL) {
13584 pm_diagnostic_list_append_format(
13585 &parser->metadata_arena,
13586 &parser->warning_list,
13587 PM_NODE_START(node),
13588 PM_NODE_LENGTH(node),
13589 PM_WARN_DUPLICATED_WHEN_CLAUSE,
13590 pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(node), parser->start_line).line,
13591 pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(previous), parser->start_line).line
13592 );
13593 }
13594}
13595
13599static bool
13600parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node, uint16_t depth) {
13601 assert(PM_NODE_TYPE_P(node, PM_HASH_NODE) || PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE));
13602 bool contains_keyword_splat = false;
13603
13604 while (true) {
13605 pm_node_t *element;
13606
13607 switch (parser->current.type) {
13608 case PM_TOKEN_USTAR_STAR: {
13609 parser_lex(parser);
13610 pm_token_t operator = parser->previous;
13611 pm_node_t *value = NULL;
13612
13613 if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
13614 // If we're about to parse a nested hash that is being
13615 // pushed into this hash directly with **, then we want the
13616 // inner hash to share the static literals with the outer
13617 // hash.
13618 parser->current_hash_keys = literals;
13619 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
13620 } else if (token_begins_expression_p(parser->current.type)) {
13621 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
13622 } else {
13623 pm_parser_scope_forwarding_keywords_check(parser, &operator);
13624 }
13625
13626 element = UP(pm_assoc_splat_node_create(parser, value, &operator));
13627 contains_keyword_splat = true;
13628 break;
13629 }
13630 case PM_TOKEN_LABEL: {
13631 pm_token_t label = parser->current;
13632 parser_lex(parser);
13633
13634 pm_node_t *key = UP(pm_symbol_node_label_create(parser, &label));
13635 pm_hash_key_static_literals_add(parser, literals, key);
13636
13637 pm_node_t *value = NULL;
13638
13639 if (token_begins_expression_p(parser->current.type)) {
13640 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
13641 } else {
13642 if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
13643 pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
13644 value = UP(pm_constant_read_node_create(parser, &constant));
13645 } else {
13646 int depth = -1;
13647 pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
13648
13649 if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
13650 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
13651 } else {
13652 depth = pm_parser_local_depth(parser, &identifier);
13653 }
13654
13655 if (depth == -1) {
13656 value = UP(pm_call_node_variable_call_create(parser, &identifier));
13657 } else {
13658 value = UP(pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth));
13659 }
13660 }
13661
13662 value->location.length++;
13663 value = UP(pm_implicit_node_create(parser, value));
13664 }
13665
13666 element = UP(pm_assoc_node_create(parser, key, NULL, value));
13667 break;
13668 }
13669 default: {
13670 pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK | PM_PARSE_ACCEPTS_LABEL, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
13671
13672 // Hash keys that are strings are automatically frozen. We will
13673 // mark that here.
13674 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
13675 pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
13676 }
13677
13678 pm_hash_key_static_literals_add(parser, literals, key);
13679
13680 pm_token_t operator = { 0 };
13681 if (!pm_symbol_node_label_p(parser, key)) {
13682 expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
13683 operator = parser->previous;
13684 }
13685
13686 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
13687 element = UP(pm_assoc_node_create(parser, key, NTOK2PTR(operator), value));
13688 break;
13689 }
13690 }
13691
13692 if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
13693 pm_hash_node_elements_append(parser->arena, (pm_hash_node_t *) node, element);
13694 } else {
13695 pm_keyword_hash_node_elements_append(parser->arena, (pm_keyword_hash_node_t *) node, element);
13696 }
13697
13698 // If there's no comma after the element, then we're done.
13699 if (!accept1(parser, PM_TOKEN_COMMA)) break;
13700
13701 // If the next element starts with a label or a **, then we know we have
13702 // another element in the hash, so we'll continue parsing.
13703 if (match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)) continue;
13704
13705 // Otherwise we need to check if the subsequent token begins an expression.
13706 // If it does, then we'll continue parsing.
13707 if (token_begins_expression_p(parser->current.type)) continue;
13708
13709 // Otherwise by default we will exit out of this loop.
13710 break;
13711 }
13712
13713 return contains_keyword_splat;
13714}
13715
13716static PRISM_INLINE bool
13717argument_allowed_for_bare_hash(pm_parser_t *parser, pm_node_t *argument) {
13718 if (pm_symbol_node_label_p(parser, argument)) {
13719 return true;
13720 }
13721
13722 switch (PM_NODE_TYPE(argument)) {
13723 case PM_CALL_NODE: {
13724 pm_call_node_t *cast = (pm_call_node_t *) argument;
13725 if (cast->opening_loc.length == 0 && cast->arguments != NULL) {
13726 if (PM_NODE_FLAG_P(cast->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS | PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
13727 return false;
13728 }
13729 if (cast->block != NULL) {
13730 return false;
13731 }
13732 }
13733 break;
13734 }
13735 default: break;
13736 }
13737 return accept1(parser, PM_TOKEN_EQUAL_GREATER);
13738}
13739
13743static PRISM_INLINE void
13744parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
13745 if (arguments->arguments == NULL) {
13746 arguments->arguments = pm_arguments_node_create(parser);
13747 }
13748
13749 pm_arguments_node_arguments_append(parser->arena, arguments->arguments, argument);
13750}
13751
13755static void
13756parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint8_t flags, uint16_t depth) {
13757 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
13758
13759 // First we need to check if the next token is one that could be the start
13760 // of an argument. If it's not, then we can just return.
13761 if (
13762 match2(parser, terminator, PM_TOKEN_EOF) ||
13763 (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
13764 context_terminator(parser->current_context->context, &parser->current)
13765 ) {
13766 return;
13767 }
13768
13769 bool parsed_first_argument = false;
13770 bool parsed_bare_hash = false;
13771 bool parsed_block_argument = false;
13772 bool parsed_forwarding_arguments = false;
13773
13774 while (!match1(parser, PM_TOKEN_EOF)) {
13775 if (parsed_forwarding_arguments) {
13776 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
13777 }
13778
13779 pm_node_t *argument = NULL;
13780
13781 switch (parser->current.type) {
13782 case PM_TOKEN_USTAR_STAR:
13783 case PM_TOKEN_LABEL: {
13784 if (parsed_bare_hash) {
13785 pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
13786 }
13787
13788 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
13789 argument = UP(hash);
13790
13791 pm_static_literals_t hash_keys = { 0 };
13792 bool contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(hash), (uint16_t) (depth + 1));
13793
13794 parse_arguments_append(parser, arguments, argument);
13795
13796 pm_node_flags_t node_flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13797 if (contains_keyword_splat) node_flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13798 pm_node_flag_set(UP(arguments->arguments), node_flags);
13799
13800 pm_static_literals_free(&hash_keys);
13801 parsed_bare_hash = true;
13802
13803 break;
13804 }
13805 case PM_TOKEN_UAMPERSAND: {
13806 parser_lex(parser);
13807 pm_token_t operator = parser->previous;
13808 pm_node_t *expression = NULL;
13809
13810 if (token_begins_expression_p(parser->current.type)) {
13811 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
13812 } else {
13813 pm_parser_scope_forwarding_block_check(parser, &operator);
13814 }
13815
13816 argument = UP(pm_block_argument_node_create(parser, &operator, expression));
13817 if (parsed_block_argument) {
13818 parse_arguments_append(parser, arguments, argument);
13819 } else {
13820 arguments->block = argument;
13821 }
13822
13823 if (match1(parser, PM_TOKEN_COMMA)) {
13824 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
13825 }
13826
13827 parsed_block_argument = true;
13828 break;
13829 }
13830 case PM_TOKEN_USTAR: {
13831 parser_lex(parser);
13832 pm_token_t operator = parser->previous;
13833
13834 if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
13835 pm_parser_scope_forwarding_positionals_check(parser, &operator);
13836 argument = UP(pm_splat_node_create(parser, &operator, NULL));
13837 if (parsed_bare_hash) {
13838 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
13839 }
13840 } else {
13841 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
13842
13843 if (parsed_bare_hash) {
13844 pm_parser_err(parser, PM_TOKEN_START(parser, &operator), PM_NODE_END(expression) - PM_TOKEN_START(parser, &operator), PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
13845 }
13846
13847 argument = UP(pm_splat_node_create(parser, &operator, expression));
13848 }
13849
13850 parse_arguments_append(parser, arguments, argument);
13851 break;
13852 }
13853 case PM_TOKEN_UDOT_DOT_DOT: {
13854 if (accepts_forwarding) {
13855 parser_lex(parser);
13856
13857 if (token_begins_expression_p(parser->current.type)) {
13858 // If the token begins an expression then this ... was
13859 // not actually argument forwarding but was instead a
13860 // range.
13861 pm_token_t operator = parser->previous;
13862 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
13863
13864 // If we parse a range, we need to validate that we
13865 // didn't accidentally violate the nonassoc rules of the
13866 // ... operator.
13867 if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
13868 pm_range_node_t *range = (pm_range_node_t *) right;
13869 pm_parser_err(parser, range->operator_loc.start, range->operator_loc.length, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
13870 }
13871
13872 argument = UP(pm_range_node_create(parser, NULL, &operator, right));
13873 } else {
13874 pm_parser_scope_forwarding_all_check(parser, &parser->previous);
13875 if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
13876 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
13877 }
13878
13879 argument = UP(pm_forwarding_arguments_node_create(parser, &parser->previous));
13880 parse_arguments_append(parser, arguments, argument);
13881 pm_node_flag_set(UP(arguments->arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
13882 arguments->has_forwarding = true;
13883 parsed_forwarding_arguments = true;
13884 break;
13885 }
13886 }
13887 }
13889 default: {
13890 if (argument == NULL) {
13891 argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (!parsed_first_argument ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0u) | PM_PARSE_ACCEPTS_LABEL), PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
13892 }
13893
13894 bool contains_keywords = false;
13895 bool contains_keyword_splat = false;
13896
13897 if (argument_allowed_for_bare_hash(parser, argument)) {
13898 if (parsed_bare_hash) {
13899 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
13900 }
13901
13902 pm_token_t operator = { 0 };
13903 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
13904 operator = parser->previous;
13905 }
13906
13907 pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
13908 contains_keywords = true;
13909
13910 // Create the set of static literals for this hash.
13911 pm_static_literals_t hash_keys = { 0 };
13912 pm_hash_key_static_literals_add(parser, &hash_keys, argument);
13913
13914 // Finish parsing the one we are part way through.
13915 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
13916 argument = UP(pm_assoc_node_create(parser, argument, NTOK2PTR(operator), value));
13917
13918 pm_keyword_hash_node_elements_append(parser->arena, bare_hash, argument);
13919 argument = UP(bare_hash);
13920
13921 // Then parse more if we have a comma
13922 if (accept1(parser, PM_TOKEN_COMMA) && (
13923 token_begins_expression_p(parser->current.type) ||
13924 match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
13925 )) {
13926 contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(bare_hash), (uint16_t) (depth + 1));
13927 }
13928
13929 pm_static_literals_free(&hash_keys);
13930 parsed_bare_hash = true;
13931 }
13932
13933 parse_arguments_append(parser, arguments, argument);
13934
13935 pm_node_flags_t node_flags = 0;
13936 if (contains_keywords) node_flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13937 if (contains_keyword_splat) node_flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13938 pm_node_flag_set(UP(arguments->arguments), node_flags);
13939
13940 break;
13941 }
13942 }
13943
13944 parsed_first_argument = true;
13945
13946 // If parsing the argument failed, we need to stop parsing arguments.
13947 if (PM_NODE_TYPE_P(argument, PM_ERROR_RECOVERY_NODE) || parser->recovering) break;
13948
13949 // If the terminator of these arguments is not EOF, then we have a
13950 // specific token we're looking for. In that case we can accept a
13951 // newline here because it is not functioning as a statement terminator.
13952 bool accepted_newline = false;
13953 if (terminator != PM_TOKEN_EOF) {
13954 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
13955 }
13956
13957 if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
13958 // If we previously were on a comma and we just parsed a bare hash,
13959 // then we want to continue parsing arguments. This is because the
13960 // comma was grabbed up by the hash parser.
13961 } else if (accept1(parser, PM_TOKEN_COMMA)) {
13962 // If there was a comma, then we need to check if we also accepted a
13963 // newline. If we did, then this is a syntax error.
13964 if (accepted_newline) {
13965 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
13966 }
13967
13968 // If this is a command call and an argument takes a block,
13969 // there can be no further arguments. For example,
13970 // `foo(bar 1 do end, 2)` should be rejected.
13971 if (PM_NODE_TYPE_P(argument, PM_CALL_NODE)) {
13972 pm_call_node_t *call = (pm_call_node_t *) argument;
13973 if (call->opening_loc.length == 0 && call->arguments != NULL && call->block != NULL) {
13974 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
13975 break;
13976 }
13977 }
13978 } else {
13979 // If there is no comma at the end of the argument list then we're
13980 // done parsing arguments and can break out of this loop.
13981 break;
13982 }
13983
13984 // If we hit the terminator, then that means we have a trailing comma so
13985 // we can accept that output as well.
13986 if (match1(parser, terminator)) break;
13987 }
13988}
13989
14001parse_required_destructured_parameter(pm_parser_t *parser) {
14002 expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
14003
14004 pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
14005 pm_multi_target_node_opening_set(parser, node, &parser->previous);
14006
14007 do {
14008 pm_node_t *param;
14009
14010 // If we get here then we have a trailing comma, which isn't allowed in
14011 // the grammar. In other places, multi targets _do_ allow trailing
14012 // commas, so here we'll assume this is a mistake of the user not
14013 // knowing it's not allowed here.
14014 if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14015 param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
14016 pm_multi_target_node_targets_append(parser, node, param);
14017 pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14018 break;
14019 }
14020
14021 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
14022 param = UP(parse_required_destructured_parameter(parser));
14023 } else if (accept1(parser, PM_TOKEN_USTAR)) {
14024 pm_token_t star = parser->previous;
14025 pm_node_t *value = NULL;
14026
14027 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14028 pm_token_t name = parser->previous;
14029 value = UP(pm_required_parameter_node_create(parser, &name));
14030 if (pm_parser_parameter_name_check(parser, &name)) {
14031 pm_node_flag_set_repeated_parameter(value);
14032 }
14033 pm_parser_local_add_token(parser, &name, 1);
14034 }
14035
14036 param = UP(pm_splat_node_create(parser, &star, value));
14037 } else {
14038 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
14039 pm_token_t name = parser->previous;
14040
14041 param = UP(pm_required_parameter_node_create(parser, &name));
14042 if (pm_parser_parameter_name_check(parser, &name)) {
14043 pm_node_flag_set_repeated_parameter(param);
14044 }
14045 pm_parser_local_add_token(parser, &name, 1);
14046 }
14047
14048 pm_multi_target_node_targets_append(parser, node, param);
14049 } while (accept1(parser, PM_TOKEN_COMMA));
14050
14051 accept1(parser, PM_TOKEN_NEWLINE);
14052 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
14053 pm_multi_target_node_closing_set(parser, node, &parser->previous);
14054
14055 return node;
14056}
14057
14062typedef enum {
14063 PM_PARAMETERS_NO_CHANGE = 0, // Extra state for tokens that should not change the state
14064 PM_PARAMETERS_ORDER_NOTHING_AFTER = 1,
14065 PM_PARAMETERS_ORDER_KEYWORDS_REST,
14066 PM_PARAMETERS_ORDER_KEYWORDS,
14067 PM_PARAMETERS_ORDER_REST,
14068 PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14069 PM_PARAMETERS_ORDER_OPTIONAL,
14070 PM_PARAMETERS_ORDER_NAMED,
14071 PM_PARAMETERS_ORDER_NONE,
14072} pm_parameters_order_t;
14073
14077static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
14078 [0] = PM_PARAMETERS_NO_CHANGE,
14079 [PM_TOKEN_UAMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14080 [PM_TOKEN_AMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14081 [PM_TOKEN_UDOT_DOT_DOT] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14082 [PM_TOKEN_IDENTIFIER] = PM_PARAMETERS_ORDER_NAMED,
14083 [PM_TOKEN_PARENTHESIS_LEFT] = PM_PARAMETERS_ORDER_NAMED,
14084 [PM_TOKEN_EQUAL] = PM_PARAMETERS_ORDER_OPTIONAL,
14085 [PM_TOKEN_LABEL] = PM_PARAMETERS_ORDER_KEYWORDS,
14086 [PM_TOKEN_USTAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14087 [PM_TOKEN_STAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14088 [PM_TOKEN_USTAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST,
14089 [PM_TOKEN_STAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST
14090};
14091
14099static bool
14100update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
14101 pm_parameters_order_t state = parameters_ordering[token->type];
14102 if (state == PM_PARAMETERS_NO_CHANGE) return true;
14103
14104 // If we see another ordered argument after a optional argument
14105 // we only continue parsing ordered arguments until we stop seeing ordered arguments.
14106 if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14107 *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
14108 return true;
14109 } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14110 return true;
14111 }
14112
14113 if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14114 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
14115 return false;
14116 } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
14117 pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
14118 return false;
14119 } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
14120 // We know what transition we failed on, so we can provide a better error here.
14121 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
14122 return false;
14123 }
14124
14125 if (state < *current) *current = state;
14126 return true;
14127}
14128
14129static PRISM_INLINE void
14130parse_parameters_handle_trailing_comma(
14131 pm_parser_t *parser,
14132 pm_parameters_node_t *params,
14133 pm_parameters_order_t order,
14134 bool in_block,
14135 bool allows_trailing_comma
14136) {
14137 if (!allows_trailing_comma) {
14138 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14139 return;
14140 }
14141
14142 if (in_block) {
14143 if (order >= PM_PARAMETERS_ORDER_NAMED) {
14144 // foo do |bar,|; end
14145 pm_node_t *param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
14146
14147 if (params->rest == NULL) {
14148 pm_parameters_node_rest_set(params, param);
14149 } else {
14150 pm_parser_err_node(parser, UP(param), PM_ERR_PARAMETER_SPLAT_MULTI);
14151 pm_parameters_node_posts_append(parser->arena, params, UP(param));
14152 }
14153 } else {
14154 // foo do |*bar,|; end
14155 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14156 }
14157 } else {
14158 // https://bugs.ruby-lang.org/issues/19107
14159 // Allow `def foo(bar,); end`, `def foo(*bar,); end`, etc. but not `def foo(...,); end`
14160 if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1 || order == PM_PARAMETERS_ORDER_NOTHING_AFTER) {
14161 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14162 }
14163 }
14164}
14165
14169static pm_parameters_node_t *
14170parse_parameters(
14171 pm_parser_t *parser,
14172 pm_binding_power_t binding_power,
14173 bool uses_parentheses,
14174 bool allows_trailing_comma,
14175 bool allows_forwarding_parameters,
14176 bool accepts_blocks_in_defaults,
14177 bool in_block,
14178 pm_diagnostic_id_t diag_id_forwarding,
14179 uint16_t depth
14180) {
14181 pm_do_loop_stack_push(parser, false);
14182
14183 pm_parameters_node_t *params = pm_parameters_node_create(parser);
14184 pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
14185
14186 while (true) {
14187 bool parsing = true;
14188
14189 switch (parser->current.type) {
14190 case PM_TOKEN_PARENTHESIS_LEFT: {
14191 update_parameter_state(parser, &parser->current, &order);
14192 pm_node_t *param = UP(parse_required_destructured_parameter(parser));
14193
14194 if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14195 pm_parameters_node_requireds_append(parser->arena, params, param);
14196 } else {
14197 pm_parameters_node_posts_append(parser->arena, params, param);
14198 }
14199 break;
14200 }
14201 case PM_TOKEN_UAMPERSAND:
14202 case PM_TOKEN_AMPERSAND: {
14203 update_parameter_state(parser, &parser->current, &order);
14204 parser_lex(parser);
14205
14206 pm_token_t operator = parser->previous;
14207 pm_node_t *param;
14208
14209 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1 && accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14210 param = (pm_node_t *) pm_no_block_parameter_node_create(parser, &operator, &parser->previous);
14211 } else {
14212 pm_token_t name = {0};
14213
14214 bool repeated = false;
14215 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14216 name = parser->previous;
14217 repeated = pm_parser_parameter_name_check(parser, &name);
14218 pm_parser_local_add_token(parser, &name, 1);
14219 } else {
14220 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
14221 }
14222
14223 param = (pm_node_t *) pm_block_parameter_node_create(parser, NTOK2PTR(name), &operator);
14224 if (repeated) {
14225 pm_node_flag_set_repeated_parameter(param);
14226 }
14227 }
14228
14229 if (params->block == NULL) {
14230 pm_parameters_node_block_set(params, param);
14231 } else {
14232 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_BLOCK_MULTI);
14233 pm_parameters_node_posts_append(parser->arena, params, UP(pm_error_recovery_node_create_unexpected(parser, param)));
14234 }
14235
14236 break;
14237 }
14238 case PM_TOKEN_UDOT_DOT_DOT: {
14239 if (!allows_forwarding_parameters) {
14240 pm_parser_err_current(parser, diag_id_forwarding);
14241 }
14242
14243 bool succeeded = update_parameter_state(parser, &parser->current, &order);
14244 parser_lex(parser);
14245
14246 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14247 pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
14248
14249 if (params->keyword_rest != NULL) {
14250 // If we already have a keyword rest parameter, then we replace it with the
14251 // forwarding parameter and move the keyword rest parameter to the posts list.
14252 pm_node_t *keyword_rest = params->keyword_rest;
14253 pm_parameters_node_posts_append(parser->arena, params, UP(pm_error_recovery_node_create_unexpected(parser, keyword_rest)));
14254 if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
14255 params->keyword_rest = NULL;
14256 }
14257
14258 pm_parameters_node_keyword_rest_set(params, UP(param));
14259 break;
14260 }
14261 case PM_TOKEN_CLASS_VARIABLE:
14262 case PM_TOKEN_IDENTIFIER:
14263 case PM_TOKEN_CONSTANT:
14264 case PM_TOKEN_INSTANCE_VARIABLE:
14265 case PM_TOKEN_GLOBAL_VARIABLE:
14266 case PM_TOKEN_METHOD_NAME: {
14267 parser_lex(parser);
14268 switch (parser->previous.type) {
14269 case PM_TOKEN_CONSTANT:
14270 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14271 break;
14272 case PM_TOKEN_INSTANCE_VARIABLE:
14273 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14274 break;
14275 case PM_TOKEN_GLOBAL_VARIABLE:
14276 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14277 break;
14278 case PM_TOKEN_CLASS_VARIABLE:
14279 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14280 break;
14281 case PM_TOKEN_METHOD_NAME:
14282 pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
14283 break;
14284 default: break;
14285 }
14286
14287 if (parser->current.type == PM_TOKEN_EQUAL) {
14288 update_parameter_state(parser, &parser->current, &order);
14289 } else {
14290 update_parameter_state(parser, &parser->previous, &order);
14291 }
14292
14293 pm_token_t name = parser->previous;
14294 bool repeated = pm_parser_parameter_name_check(parser, &name);
14295 pm_parser_local_add_token(parser, &name, 1);
14296
14297 if (match1(parser, PM_TOKEN_EQUAL)) {
14298 pm_token_t operator = parser->current;
14299 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14300 parser_lex(parser);
14301
14302 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
14303 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14304
14305 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14306 pm_node_t *value = parse_value_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
14307 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14308
14309 pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
14310
14311 if (repeated) {
14312 pm_node_flag_set_repeated_parameter(UP(param));
14313 }
14314 pm_parameters_node_optionals_append(parser->arena, params, param);
14315
14316 // If the value of the parameter increased the number of
14317 // reads of that parameter, then we need to warn that we
14318 // have a circular definition.
14319 if ((parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14320 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &name, PM_ERR_PARAMETER_CIRCULAR);
14321 }
14322
14323 context_pop(parser);
14324
14325 // If parsing the value of the parameter resulted in error recovery,
14326 // then we can put a missing node in its place and stop parsing the
14327 // parameters entirely now.
14328 if (parser->recovering) {
14329 parsing = false;
14330 break;
14331 }
14332 } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14333 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14334 if (repeated) {
14335 pm_node_flag_set_repeated_parameter(UP(param));
14336 }
14337 pm_parameters_node_requireds_append(parser->arena, params, UP(param));
14338 } else {
14339 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14340 if (repeated) {
14341 pm_node_flag_set_repeated_parameter(UP(param));
14342 }
14343 pm_parameters_node_posts_append(parser->arena, params, UP(param));
14344 }
14345
14346 break;
14347 }
14348 case PM_TOKEN_LABEL: {
14349 if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
14350 update_parameter_state(parser, &parser->current, &order);
14351
14352 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14353 parser_lex(parser);
14354
14355 pm_token_t name = parser->previous;
14356 pm_token_t local = name;
14357 local.end -= 1;
14358
14359 if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
14360 pm_parser_err(parser, PM_TOKEN_START(parser, &local), PM_TOKEN_LENGTH(&local), PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14361 } else if (local.end[-1] == '!' || local.end[-1] == '?') {
14362 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14363 }
14364
14365 bool repeated = pm_parser_parameter_name_check(parser, &local);
14366 pm_parser_local_add_token(parser, &local, 1);
14367
14368 switch (parser->current.type) {
14369 case PM_TOKEN_COMMA:
14370 case PM_TOKEN_PARENTHESIS_RIGHT:
14371 case PM_TOKEN_PIPE: {
14372 context_pop(parser);
14373
14374 pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14375 if (repeated) {
14376 pm_node_flag_set_repeated_parameter(param);
14377 }
14378
14379 pm_parameters_node_keywords_append(parser->arena, params, param);
14380 break;
14381 }
14382 case PM_TOKEN_SEMICOLON:
14383 case PM_TOKEN_NEWLINE: {
14384 context_pop(parser);
14385
14386 if (uses_parentheses) {
14387 parsing = false;
14388 break;
14389 }
14390
14391 pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14392 if (repeated) {
14393 pm_node_flag_set_repeated_parameter(param);
14394 }
14395
14396 pm_parameters_node_keywords_append(parser->arena, params, param);
14397 break;
14398 }
14399 default: {
14400 pm_node_t *param;
14401
14402 if (token_begins_expression_p(parser->current.type)) {
14403 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14404 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14405
14406 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14407 pm_node_t *value = parse_value_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
14408 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14409
14410 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14411 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &local, PM_ERR_PARAMETER_CIRCULAR);
14412 }
14413
14414 param = UP(pm_optional_keyword_parameter_node_create(parser, &name, value));
14415 }
14416 else {
14417 param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14418 }
14419
14420 if (repeated) {
14421 pm_node_flag_set_repeated_parameter(param);
14422 }
14423
14424 context_pop(parser);
14425 pm_parameters_node_keywords_append(parser->arena, params, param);
14426
14427 // If parsing the value of the parameter resulted in error recovery,
14428 // then we can put a missing node in its place and stop parsing the
14429 // parameters entirely now.
14430 if (parser->recovering) {
14431 parsing = false;
14432 break;
14433 }
14434 }
14435 }
14436
14437 parser->in_keyword_arg = false;
14438 break;
14439 }
14440 case PM_TOKEN_USTAR:
14441 case PM_TOKEN_STAR: {
14442 update_parameter_state(parser, &parser->current, &order);
14443 parser_lex(parser);
14444
14445 pm_token_t operator = parser->previous;
14446 pm_token_t name = { 0 };
14447 bool repeated = false;
14448
14449 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14450 name = parser->previous;
14451 repeated = pm_parser_parameter_name_check(parser, &name);
14452 pm_parser_local_add_token(parser, &name, 1);
14453 } else {
14454 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
14455 }
14456
14457 pm_node_t *param = UP(pm_rest_parameter_node_create(parser, &operator, NTOK2PTR(name)));
14458 if (repeated) {
14459 pm_node_flag_set_repeated_parameter(param);
14460 }
14461
14462 if (params->rest == NULL) {
14463 pm_parameters_node_rest_set(params, param);
14464 } else {
14465 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
14466 pm_parameters_node_posts_append(parser->arena, params, param);
14467 }
14468
14469 break;
14470 }
14471 case PM_TOKEN_STAR_STAR:
14472 case PM_TOKEN_USTAR_STAR: {
14473 pm_parameters_order_t previous_order = order;
14474 update_parameter_state(parser, &parser->current, &order);
14475 parser_lex(parser);
14476
14477 pm_token_t operator = parser->previous;
14478 pm_node_t *param;
14479
14480 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14481 if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14482 pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14483 }
14484
14485 param = UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
14486 } else {
14487 pm_token_t name = { 0 };
14488
14489 bool repeated = false;
14490 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14491 name = parser->previous;
14492 repeated = pm_parser_parameter_name_check(parser, &name);
14493 pm_parser_local_add_token(parser, &name, 1);
14494 } else {
14495 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
14496 }
14497
14498 param = UP(pm_keyword_rest_parameter_node_create(parser, &operator, NTOK2PTR(name)));
14499 if (repeated) {
14500 pm_node_flag_set_repeated_parameter(param);
14501 }
14502 }
14503
14504 if (params->keyword_rest == NULL) {
14505 pm_parameters_node_keyword_rest_set(params, param);
14506 } else {
14507 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
14508 pm_parameters_node_posts_append(parser->arena, params, UP(pm_error_recovery_node_create_unexpected(parser, param)));
14509 }
14510
14511 break;
14512 }
14513 default:
14514 if (parser->previous.type == PM_TOKEN_COMMA) {
14515 parse_parameters_handle_trailing_comma(parser, params, order, in_block, allows_trailing_comma);
14516 }
14517
14518 parsing = false;
14519 break;
14520 }
14521
14522 // If we hit some kind of issue while parsing the parameter, this would
14523 // have been set to false. In that case, we need to break out of the
14524 // loop.
14525 if (!parsing) break;
14526
14527 bool accepted_newline = false;
14528 if (uses_parentheses) {
14529 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14530 }
14531
14532 if (accept1(parser, PM_TOKEN_COMMA)) {
14533 // If there was a comma, but we also accepted a newline, then this
14534 // is a syntax error.
14535 if (accepted_newline) {
14536 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14537 }
14538 } else {
14539 // If there was no comma, then we're done parsing parameters.
14540 break;
14541 }
14542 }
14543
14544 pm_do_loop_stack_pop(parser);
14545
14546 // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
14547 if (PM_NODE_START(params) == PM_NODE_END(params)) {
14548 return NULL;
14549 }
14550
14551 return params;
14552}
14553
14558static size_t
14559token_newline_index(const pm_parser_t *parser) {
14560 if (parser->heredoc_end == NULL) {
14561 // This is the common case. In this case we can look at the previously
14562 // recorded newline in the newline list and subtract from the current
14563 // offset.
14564 return parser->line_offsets.size - 1;
14565 } else {
14566 // This is unlikely. This is the case that we have already parsed the
14567 // start of a heredoc, so we cannot rely on looking at the previous
14568 // offset of the newline list, and instead must go through the whole
14569 // process of a binary search for the line number.
14570 return (size_t) pm_line_offset_list_line(&parser->line_offsets, PM_TOKEN_START(parser, &parser->current), 0);
14571 }
14572}
14573
14578static int64_t
14579token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
14580 const uint8_t *cursor = parser->start + parser->line_offsets.offsets[newline_index];
14581 const uint8_t *end = token->start;
14582
14583 // Skip over the BOM if it is present.
14584 if (
14585 newline_index == 0 &&
14586 parser->start[0] == 0xef &&
14587 parser->start[1] == 0xbb &&
14588 parser->start[2] == 0xbf
14589 ) cursor += 3;
14590
14591 int64_t column = 0;
14592 for (; cursor < end; cursor++) {
14593 switch (*cursor) {
14594 case '\t':
14595 column = ((column / PM_TAB_WHITESPACE_SIZE) + 1) * PM_TAB_WHITESPACE_SIZE;
14596 break;
14597 case ' ':
14598 column++;
14599 break;
14600 default:
14601 column++;
14602 if (break_on_non_space) return -1;
14603 break;
14604 }
14605 }
14606
14607 return column;
14608}
14609
14614static void
14615parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening_token, bool if_after_else, bool allow_indent) {
14616 // If these warnings are disabled (unlikely), then we can just return.
14617 if (!parser->warn_mismatched_indentation) return;
14618
14619 // If the tokens are on the same line, we do not warn.
14620 size_t closing_newline_index = token_newline_index(parser);
14621 if (opening_newline_index == closing_newline_index) return;
14622
14623 // If the opening token has anything other than spaces or tabs before it,
14624 // then we do not warn. This is unless we are matching up an `if`/`end` pair
14625 // and the `if` immediately follows an `else` keyword.
14626 int64_t opening_column = token_column(parser, opening_newline_index, opening_token, !if_after_else);
14627 if (!if_after_else && (opening_column == -1)) return;
14628
14629 // Get a reference to the closing token off the current parser. This assumes
14630 // that the caller has placed this in the correct position.
14631 pm_token_t *closing_token = &parser->current;
14632
14633 // If the tokens are at the same indentation, we do not warn.
14634 int64_t closing_column = token_column(parser, closing_newline_index, closing_token, true);
14635 if ((closing_column == -1) || (opening_column == closing_column)) return;
14636
14637 // If the closing column is greater than the opening column and we are
14638 // allowing indentation, then we do not warn.
14639 if (allow_indent && (closing_column > opening_column)) return;
14640
14641 // Otherwise, add a warning.
14642 PM_PARSER_WARN_FORMAT(
14643 parser,
14644 PM_TOKEN_START(parser, closing_token),
14645 PM_TOKEN_LENGTH(closing_token),
14646 PM_WARN_INDENTATION_MISMATCH,
14647 (int) (closing_token->end - closing_token->start),
14648 (const char *) closing_token->start,
14649 (int) (opening_token->end - opening_token->start),
14650 (const char *) opening_token->start,
14651 ((int32_t) opening_newline_index) + parser->start_line
14652 );
14653}
14654
14655typedef enum {
14656 PM_RESCUES_BEGIN = 1,
14657 PM_RESCUES_BLOCK,
14658 PM_RESCUES_CLASS,
14659 PM_RESCUES_DEF,
14660 PM_RESCUES_LAMBDA,
14661 PM_RESCUES_MODULE,
14662 PM_RESCUES_SCLASS
14663} pm_rescues_type_t;
14664
14669static PRISM_INLINE void
14670parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
14671 pm_rescue_node_t *current = NULL;
14672
14673 while (match1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
14674 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14675 parser_lex(parser);
14676
14677 pm_rescue_node_t *rescue = pm_rescue_node_create(parser, &parser->previous);
14678
14679 switch (parser->current.type) {
14680 case PM_TOKEN_EQUAL_GREATER: {
14681 // Here we have an immediate => after the rescue keyword, in which case
14682 // we're going to have an empty list of exceptions to rescue (which
14683 // implies StandardError).
14684 parser_lex(parser);
14685 pm_rescue_node_operator_set(parser, rescue, &parser->previous);
14686
14687 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
14688 reference = parse_target(parser, reference, false, false);
14689
14690 pm_rescue_node_reference_set(rescue, reference);
14691 break;
14692 }
14693 case PM_TOKEN_NEWLINE:
14694 case PM_TOKEN_SEMICOLON:
14695 case PM_TOKEN_KEYWORD_THEN:
14696 // Here we have a terminator for the rescue keyword, in which
14697 // case we're going to just continue on.
14698 break;
14699 default: {
14700 if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
14701 // Here we have something that could be an exception expression, so
14702 // we'll attempt to parse it here and any others delimited by commas.
14703
14704 do {
14705 pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
14706 pm_rescue_node_exceptions_append(parser->arena, rescue, expression);
14707
14708 // If we hit a newline, then this is the end of the rescue expression. We
14709 // can continue on to parse the statements.
14710 if (match3(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_THEN)) break;
14711
14712 // If we hit a `=>` then we're going to parse the exception variable. Once
14713 // we've done that, we'll break out of the loop and parse the statements.
14714 if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
14715 pm_rescue_node_operator_set(parser, rescue, &parser->previous);
14716
14717 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
14718 reference = parse_target(parser, reference, false, false);
14719
14720 pm_rescue_node_reference_set(rescue, reference);
14721 break;
14722 }
14723 } while (accept1(parser, PM_TOKEN_COMMA));
14724 }
14725 }
14726 }
14727
14728 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
14729 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
14730 rescue->then_keyword_loc = TOK2LOC(parser, &parser->previous);
14731 }
14732 } else {
14733 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
14734 rescue->then_keyword_loc = TOK2LOC(parser, &parser->previous);
14735 }
14736
14737 if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
14738 pm_accepts_block_stack_push(parser, true);
14739 pm_context_t context;
14740
14741 switch (type) {
14742 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_RESCUE; break;
14743 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_RESCUE; break;
14744 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_RESCUE; break;
14745 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_RESCUE; break;
14746 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_RESCUE; break;
14747 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_RESCUE; break;
14748 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_RESCUE; break;
14749 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
14750 }
14751
14752 pm_statements_node_t *statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14753 if (statements != NULL) pm_rescue_node_statements_set(rescue, statements);
14754
14755 pm_accepts_block_stack_pop(parser);
14756 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14757 }
14758
14759 if (current == NULL) {
14760 pm_begin_node_rescue_clause_set(parent_node, rescue);
14761 } else {
14762 pm_rescue_node_subsequent_set(current, rescue);
14763 }
14764
14765 current = rescue;
14766 }
14767
14768 // The end node locations on rescue nodes will not be set correctly
14769 // since we won't know the end until we've found all subsequent
14770 // clauses. This sets the end location on all rescues once we know it.
14771 if (current != NULL) {
14772 pm_rescue_node_t *clause = parent_node->rescue_clause;
14773
14774 while (clause != NULL) {
14775 PM_NODE_LENGTH_SET_NODE(clause, current);
14776 clause = clause->subsequent;
14777 }
14778 }
14779
14780 pm_token_t else_keyword;
14781 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
14782 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14783 opening_newline_index = token_newline_index(parser);
14784
14785 else_keyword = parser->current;
14786 opening = &else_keyword;
14787
14788 parser_lex(parser);
14789 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14790
14791 pm_statements_node_t *else_statements = NULL;
14792 if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
14793 pm_accepts_block_stack_push(parser, true);
14794 pm_context_t context;
14795
14796 switch (type) {
14797 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ELSE; break;
14798 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ELSE; break;
14799 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ELSE; break;
14800 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ELSE; break;
14801 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
14802 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
14803 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
14804 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
14805 }
14806
14807 else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14808 pm_accepts_block_stack_pop(parser);
14809
14810 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14811 }
14812
14813 pm_else_node_t *else_clause = pm_else_node_create(parser, &else_keyword, else_statements, &parser->current);
14814 pm_begin_node_else_clause_set(parent_node, else_clause);
14815
14816 // If we don't have a `current` rescue node, then this is a dangling
14817 // else, and it's an error.
14818 if (current == NULL) pm_parser_err_node(parser, UP(else_clause), PM_ERR_BEGIN_LONELY_ELSE);
14819 }
14820
14821 if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
14822 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14823 pm_token_t ensure_keyword = parser->current;
14824
14825 parser_lex(parser);
14826 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14827
14828 pm_statements_node_t *ensure_statements = NULL;
14829 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
14830 pm_accepts_block_stack_push(parser, true);
14831 pm_context_t context;
14832
14833 switch (type) {
14834 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ENSURE; break;
14835 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ENSURE; break;
14836 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ENSURE; break;
14837 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ENSURE; break;
14838 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ENSURE; break;
14839 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ENSURE; break;
14840 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ENSURE; break;
14841 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
14842 }
14843
14844 ensure_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14845 pm_accepts_block_stack_pop(parser);
14846
14847 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14848 }
14849
14850 pm_ensure_node_t *ensure_clause = pm_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->current);
14851 pm_begin_node_ensure_clause_set(parent_node, ensure_clause);
14852 }
14853
14854 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
14855 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14856 pm_begin_node_end_keyword_set(parser, parent_node, &parser->current);
14857 } else {
14858 pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_KEYWORD_END, .start = parser->previous.end, .end = parser->previous.end };
14859 pm_begin_node_end_keyword_set(parser, parent_node, &end_keyword);
14860 }
14861}
14862
14867static pm_begin_node_t *
14868parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
14869 pm_begin_node_t *node = pm_begin_node_create(parser, NULL, statements);
14870 parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
14871
14872 node->base.location.start = U32(start - parser->start);
14873 PM_NODE_LENGTH_SET_TOKEN(parser, node, &parser->current);
14874
14875 return node;
14876}
14877
14882parse_block_parameters(
14883 pm_parser_t *parser,
14884 bool allows_trailing_comma,
14885 const pm_token_t *opening,
14886 bool is_lambda_literal,
14887 bool accepts_blocks_in_defaults,
14888 uint16_t depth
14889) {
14890 pm_parameters_node_t *parameters = NULL;
14891 if (!match1(parser, PM_TOKEN_SEMICOLON)) {
14892 if (!is_lambda_literal) {
14893 context_push(parser, PM_CONTEXT_BLOCK_PARAMETERS);
14894 }
14895 parameters = parse_parameters(
14896 parser,
14897 is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
14898 false,
14899 allows_trailing_comma,
14900 false,
14901 accepts_blocks_in_defaults,
14902 true,
14903 is_lambda_literal ? PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_LAMBDA : PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_BLOCK,
14904 (uint16_t) (depth + 1)
14905 );
14906 if (!is_lambda_literal) {
14907 context_pop(parser);
14908 }
14909 }
14910
14911 pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
14912 if (opening != NULL) {
14913 accept1(parser, PM_TOKEN_NEWLINE);
14914
14915 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
14916 do {
14917 switch (parser->current.type) {
14918 case PM_TOKEN_CONSTANT:
14919 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14920 parser_lex(parser);
14921 break;
14922 case PM_TOKEN_INSTANCE_VARIABLE:
14923 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14924 parser_lex(parser);
14925 break;
14926 case PM_TOKEN_GLOBAL_VARIABLE:
14927 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14928 parser_lex(parser);
14929 break;
14930 case PM_TOKEN_CLASS_VARIABLE:
14931 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14932 parser_lex(parser);
14933 break;
14934 default:
14935 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
14936 break;
14937 }
14938
14939 bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
14940 pm_parser_local_add_token(parser, &parser->previous, 1);
14941
14942 pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
14943 if (repeated) pm_node_flag_set_repeated_parameter(UP(local));
14944
14945 pm_block_parameters_node_append_local(parser->arena, block_parameters, local);
14946 } while (accept1(parser, PM_TOKEN_COMMA));
14947 }
14948 }
14949
14950 return block_parameters;
14951}
14952
14957static bool
14958outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
14959 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
14960 if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
14961 }
14962
14963 return false;
14964}
14965
14971static const char * const pm_numbered_parameter_names[] = {
14972 "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
14973};
14974
14980static pm_node_t *
14981parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
14982 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
14983
14984 // If we have ordinary parameters, then we will return them as the set of
14985 // parameters.
14986 if (parameters != NULL) {
14987 // If we also have implicit parameters, then this is an error.
14988 if (implicit_parameters->size > 0) {
14989 pm_node_t *node = implicit_parameters->nodes[0];
14990
14991 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14992 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
14993 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14994 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
14995 } else {
14996 assert(false && "unreachable");
14997 }
14998 }
14999
15000 return parameters;
15001 }
15002
15003 // If we don't have any implicit parameters, then the set of parameters is
15004 // NULL.
15005 if (implicit_parameters->size == 0) {
15006 return NULL;
15007 }
15008
15009 // If we don't have ordinary parameters, then we now must validate our set
15010 // of implicit parameters. We can only have numbered parameters or it, but
15011 // they cannot be mixed.
15012 uint8_t numbered_parameter = 0;
15013 bool it_parameter = false;
15014
15015 for (size_t index = 0; index < implicit_parameters->size; index++) {
15016 pm_node_t *node = implicit_parameters->nodes[index];
15017
15018 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
15019 if (it_parameter) {
15020 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
15021 } else if (outer_scope_using_numbered_parameters_p(parser)) {
15022 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
15023 } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
15024 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
15025 } else if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
15026 numbered_parameter = MAX(numbered_parameter, (uint8_t) (parser->start[node->location.start + 1] - '0'));
15027 } else {
15028 assert(false && "unreachable");
15029 }
15030 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
15031 if (numbered_parameter > 0) {
15032 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
15033 } else {
15034 it_parameter = true;
15035 }
15036 }
15037 }
15038
15039 if (numbered_parameter > 0) {
15040 // Go through the parent scopes and mark them as being disallowed from
15041 // using numbered parameters because this inner scope is using them.
15042 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15043 scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
15044 }
15045 return UP(pm_numbered_parameters_node_create(parser, opening, closing, numbered_parameter));
15046 }
15047
15048 if (it_parameter) {
15049 return UP(pm_it_parameters_node_create(parser, opening, closing));
15050 }
15051
15052 return NULL;
15053}
15054
15058static pm_block_node_t *
15059parse_block(pm_parser_t *parser, uint16_t depth) {
15060 pm_token_t opening = parser->previous;
15061 accept1(parser, PM_TOKEN_NEWLINE);
15062
15063 pm_accepts_block_stack_push(parser, true);
15064 pm_parser_scope_push(parser, false);
15065
15066 pm_block_parameters_node_t *block_parameters = NULL;
15067
15068 if (accept1(parser, PM_TOKEN_PIPE)) {
15069 pm_token_t block_parameters_opening = parser->previous;
15070 if (match1(parser, PM_TOKEN_PIPE)) {
15071 block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
15072 parser->command_start = true;
15073 parser_lex(parser);
15074 } else {
15075 block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false, true, (uint16_t) (depth + 1));
15076 accept1(parser, PM_TOKEN_NEWLINE);
15077 parser->command_start = true;
15078 expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
15079 }
15080
15081 pm_block_parameters_node_closing_set(parser, block_parameters, &parser->previous);
15082 }
15083
15084 accept1(parser, PM_TOKEN_NEWLINE);
15085 pm_node_t *statements = NULL;
15086
15087 if (opening.type == PM_TOKEN_BRACE_LEFT) {
15088 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
15089 statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1)));
15090 }
15091
15092 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE, &opening);
15093 } else {
15094 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15095 if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE)) {
15096 pm_accepts_block_stack_push(parser, true);
15097 statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1)));
15098 pm_accepts_block_stack_pop(parser);
15099 }
15100
15101 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
15102 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
15103 statements = UP(parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1)));
15104 }
15105 }
15106
15107 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END, &opening);
15108 }
15109
15110 pm_constant_id_list_t locals;
15111 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
15112 pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &opening, &parser->previous);
15113
15114 pm_parser_scope_pop(parser);
15115 pm_accepts_block_stack_pop(parser);
15116
15117 return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
15118}
15119
15125static bool
15126parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, uint8_t flags, uint16_t depth) {
15127 /* Fast path: if the current token can't begin an expression and isn't
15128 * a parenthesis, block opener, or splat/block-pass operator, there are
15129 * no arguments to parse. */
15130 if (
15131 !token_begins_expression_p(parser->current.type) &&
15132 !match6(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_KEYWORD_DO_BLOCK, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)
15133 ) {
15134 return false;
15135 }
15136
15137 bool found = false;
15138 bool parsed_command_args = false;
15139
15140 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
15141 found |= true;
15142 arguments->opening_loc = TOK2LOC(parser, &parser->previous);
15143
15144 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15145 arguments->closing_loc = TOK2LOC(parser, &parser->previous);
15146 } else {
15147 pm_accepts_block_stack_push(parser, true);
15148 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint8_t) (flags & ~PM_PARSE_ACCEPTS_DO_BLOCK), (uint16_t) (depth + 1));
15149
15150 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15151 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_str(parser->current.type));
15152 parser->previous.start = parser->previous.end;
15153 parser->previous.type = 0;
15154 }
15155
15156 pm_accepts_block_stack_pop(parser);
15157 arguments->closing_loc = TOK2LOC(parser, &parser->previous);
15158 }
15159 } else if ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
15160 found |= true;
15161 parsed_command_args = true;
15162 pm_accepts_block_stack_push(parser, false);
15163
15164 // If we get here, then the subsequent token cannot be used as an infix
15165 // operator. In this case we assume the subsequent token is part of an
15166 // argument to this method call.
15167 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, flags, (uint16_t) (depth + 1));
15168
15169 // If we have done with the arguments and still not consumed the comma,
15170 // then we have a trailing comma where we need to check whether it is
15171 // allowed or not.
15172 if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
15173 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_str(parser->current.type));
15174 }
15175
15176 pm_accepts_block_stack_pop(parser);
15177 }
15178
15179 // If we're at the end of the arguments, we can now check if there is a block
15180 // node that starts with a {. If there is, then we can parse it and add it to
15181 // the arguments.
15182 if (accepts_block) {
15183 pm_block_node_t *block = NULL;
15184
15185 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
15186 found |= true;
15187 block = parse_block(parser, (uint16_t) (depth + 1));
15188 pm_arguments_validate_block(parser, arguments, block);
15189 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
15190 found |= true;
15191 block = parse_block(parser, (uint16_t) (depth + 1));
15192 } else if (parsed_command_args && pm_accepts_block_stack_p(parser) && (flags & PM_PARSE_ACCEPTS_DO_BLOCK) && accept1(parser, PM_TOKEN_KEYWORD_DO_BLOCK)) {
15193 found |= true;
15194 block = parse_block(parser, (uint16_t) (depth + 1));
15195 }
15196
15197 if (block != NULL) {
15198 if (arguments->block == NULL && !arguments->has_forwarding) {
15199 arguments->block = UP(block);
15200 } else {
15201 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_BLOCK_MULTI);
15202
15203 if (arguments->block != NULL) {
15204 if (arguments->arguments == NULL) {
15205 arguments->arguments = pm_arguments_node_create(parser);
15206 }
15207 pm_arguments_node_arguments_append(parser->arena, arguments->arguments, arguments->block);
15208 }
15209 arguments->block = UP(block);
15210 }
15211 }
15212 }
15213
15214 return found;
15215}
15216
15221static void
15222parse_return(pm_parser_t *parser, pm_node_t *node) {
15223 bool in_sclass = false;
15224 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15225 switch (context_node->context) {
15226 case PM_CONTEXT_BEGIN_ELSE:
15227 case PM_CONTEXT_BEGIN_ENSURE:
15228 case PM_CONTEXT_BEGIN_RESCUE:
15229 case PM_CONTEXT_BEGIN:
15230 case PM_CONTEXT_CASE_IN:
15231 case PM_CONTEXT_CASE_WHEN:
15232 case PM_CONTEXT_DEFAULT_PARAMS:
15233 case PM_CONTEXT_DEFINED:
15234 case PM_CONTEXT_ELSE:
15235 case PM_CONTEXT_ELSIF:
15236 case PM_CONTEXT_EMBEXPR:
15237 case PM_CONTEXT_FOR_INDEX:
15238 case PM_CONTEXT_FOR:
15239 case PM_CONTEXT_IF:
15240 case PM_CONTEXT_LOOP_PREDICATE:
15241 case PM_CONTEXT_MAIN:
15242 case PM_CONTEXT_MULTI_TARGET:
15243 case PM_CONTEXT_PARENS:
15244 case PM_CONTEXT_POSTEXE:
15245 case PM_CONTEXT_PREDICATE:
15246 case PM_CONTEXT_PREEXE:
15247 case PM_CONTEXT_RESCUE_MODIFIER:
15248 case PM_CONTEXT_TERNARY:
15249 case PM_CONTEXT_UNLESS:
15250 case PM_CONTEXT_UNTIL:
15251 case PM_CONTEXT_WHILE:
15252 // Keep iterating up the lists of contexts, because returns can
15253 // see through these.
15254 continue;
15255 case PM_CONTEXT_SCLASS_ELSE:
15256 case PM_CONTEXT_SCLASS_ENSURE:
15257 case PM_CONTEXT_SCLASS_RESCUE:
15258 case PM_CONTEXT_SCLASS:
15259 in_sclass = true;
15260 continue;
15261 case PM_CONTEXT_CLASS_ELSE:
15262 case PM_CONTEXT_CLASS_ENSURE:
15263 case PM_CONTEXT_CLASS_RESCUE:
15264 case PM_CONTEXT_CLASS:
15265 case PM_CONTEXT_MODULE_ELSE:
15266 case PM_CONTEXT_MODULE_ENSURE:
15267 case PM_CONTEXT_MODULE_RESCUE:
15268 case PM_CONTEXT_MODULE:
15269 // These contexts are invalid for a return.
15270 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15271 return;
15272 case PM_CONTEXT_BLOCK_BRACES:
15273 case PM_CONTEXT_BLOCK_ELSE:
15274 case PM_CONTEXT_BLOCK_ENSURE:
15275 case PM_CONTEXT_BLOCK_KEYWORDS:
15276 case PM_CONTEXT_BLOCK_RESCUE:
15277 case PM_CONTEXT_BLOCK_PARAMETERS:
15278 case PM_CONTEXT_DEF_ELSE:
15279 case PM_CONTEXT_DEF_ENSURE:
15280 case PM_CONTEXT_DEF_PARAMS:
15281 case PM_CONTEXT_DEF_RESCUE:
15282 case PM_CONTEXT_DEF:
15283 case PM_CONTEXT_LAMBDA_BRACES:
15284 case PM_CONTEXT_LAMBDA_DO_END:
15285 case PM_CONTEXT_LAMBDA_ELSE:
15286 case PM_CONTEXT_LAMBDA_ENSURE:
15287 case PM_CONTEXT_LAMBDA_RESCUE:
15288 // These contexts are valid for a return, and we should not
15289 // continue to loop.
15290 return;
15291 case PM_CONTEXT_NONE:
15292 // This case should never happen.
15293 assert(false && "unreachable");
15294 break;
15295 }
15296 }
15297 if (in_sclass && parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
15298 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15299 }
15300}
15301
15306static void
15307parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
15308 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15309 switch (context_node->context) {
15310 case PM_CONTEXT_BLOCK_BRACES:
15311 case PM_CONTEXT_BLOCK_KEYWORDS:
15312 case PM_CONTEXT_BLOCK_ELSE:
15313 case PM_CONTEXT_BLOCK_ENSURE:
15314 case PM_CONTEXT_BLOCK_PARAMETERS:
15315 case PM_CONTEXT_BLOCK_RESCUE:
15316 case PM_CONTEXT_DEFINED:
15317 case PM_CONTEXT_FOR:
15318 case PM_CONTEXT_LAMBDA_BRACES:
15319 case PM_CONTEXT_LAMBDA_DO_END:
15320 case PM_CONTEXT_LAMBDA_ELSE:
15321 case PM_CONTEXT_LAMBDA_ENSURE:
15322 case PM_CONTEXT_LAMBDA_RESCUE:
15323 case PM_CONTEXT_LOOP_PREDICATE:
15324 case PM_CONTEXT_POSTEXE:
15325 case PM_CONTEXT_UNTIL:
15326 case PM_CONTEXT_WHILE:
15327 // These are the good cases. We're allowed to have a block exit
15328 // in these contexts.
15329 return;
15330 case PM_CONTEXT_DEF:
15331 case PM_CONTEXT_DEF_PARAMS:
15332 case PM_CONTEXT_DEF_ELSE:
15333 case PM_CONTEXT_DEF_ENSURE:
15334 case PM_CONTEXT_DEF_RESCUE:
15335 case PM_CONTEXT_MAIN:
15336 case PM_CONTEXT_PREEXE:
15337 case PM_CONTEXT_SCLASS:
15338 case PM_CONTEXT_SCLASS_ELSE:
15339 case PM_CONTEXT_SCLASS_ENSURE:
15340 case PM_CONTEXT_SCLASS_RESCUE:
15341 // These are the bad cases. We're not allowed to have a block
15342 // exit in these contexts.
15343 //
15344 // If we get here, then we're about to mark this block exit
15345 // as invalid. However, it could later _become_ valid if we
15346 // find a trailing while/until on the expression. In this
15347 // case instead of adding the error here, we'll add the
15348 // block exit to the list of exits for the expression, and
15349 // the node parsing will handle validating it instead.
15350 assert(parser->current_block_exits != NULL);
15351 pm_node_list_append(parser->arena, parser->current_block_exits, node);
15352 return;
15353 case PM_CONTEXT_BEGIN_ELSE:
15354 case PM_CONTEXT_BEGIN_ENSURE:
15355 case PM_CONTEXT_BEGIN_RESCUE:
15356 case PM_CONTEXT_BEGIN:
15357 case PM_CONTEXT_CASE_IN:
15358 case PM_CONTEXT_CASE_WHEN:
15359 case PM_CONTEXT_CLASS_ELSE:
15360 case PM_CONTEXT_CLASS_ENSURE:
15361 case PM_CONTEXT_CLASS_RESCUE:
15362 case PM_CONTEXT_CLASS:
15363 case PM_CONTEXT_DEFAULT_PARAMS:
15364 case PM_CONTEXT_ELSE:
15365 case PM_CONTEXT_ELSIF:
15366 case PM_CONTEXT_EMBEXPR:
15367 case PM_CONTEXT_FOR_INDEX:
15368 case PM_CONTEXT_IF:
15369 case PM_CONTEXT_MODULE_ELSE:
15370 case PM_CONTEXT_MODULE_ENSURE:
15371 case PM_CONTEXT_MODULE_RESCUE:
15372 case PM_CONTEXT_MODULE:
15373 case PM_CONTEXT_MULTI_TARGET:
15374 case PM_CONTEXT_PARENS:
15375 case PM_CONTEXT_PREDICATE:
15376 case PM_CONTEXT_RESCUE_MODIFIER:
15377 case PM_CONTEXT_TERNARY:
15378 case PM_CONTEXT_UNLESS:
15379 // In these contexts we should continue walking up the list of
15380 // contexts.
15381 break;
15382 case PM_CONTEXT_NONE:
15383 // This case should never happen.
15384 assert(false && "unreachable");
15385 break;
15386 }
15387 }
15388}
15389
15394static pm_node_list_t *
15395push_block_exits(pm_parser_t *parser, pm_node_list_t *current_block_exits) {
15396 pm_node_list_t *previous_block_exits = parser->current_block_exits;
15397 parser->current_block_exits = current_block_exits;
15398 return previous_block_exits;
15399}
15400
15406static void
15407flush_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15408 pm_node_t *block_exit;
15409 PM_NODE_LIST_FOREACH(parser->current_block_exits, index, block_exit) {
15410 const char *type;
15411
15412 switch (PM_NODE_TYPE(block_exit)) {
15413 case PM_BREAK_NODE: type = "break"; break;
15414 case PM_NEXT_NODE: type = "next"; break;
15415 case PM_REDO_NODE: type = "redo"; break;
15416 default: assert(false && "unreachable"); type = ""; break;
15417 }
15418
15419 PM_PARSER_ERR_NODE_FORMAT(parser, block_exit, PM_ERR_INVALID_BLOCK_EXIT, type);
15420 }
15421
15422 parser->current_block_exits = previous_block_exits;
15423}
15424
15429static void
15430pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15431 if (match2(parser, PM_TOKEN_KEYWORD_WHILE_MODIFIER, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) {
15432 // If we matched a trailing while/until, then all of the block exits in
15433 // the contained list are valid. In this case we do not need to do
15434 // anything.
15435 parser->current_block_exits = previous_block_exits;
15436 } else if (previous_block_exits != NULL) {
15437 // If we did not matching a trailing while/until, then all of the block
15438 // exits contained in the list are invalid for this specific context.
15439 // However, they could still become valid in a higher level context if
15440 // there is another list above this one. In this case we'll push all of
15441 // the block exits up to the previous list.
15442 pm_node_list_concat(parser->arena, previous_block_exits, parser->current_block_exits);
15443 parser->current_block_exits = previous_block_exits;
15444 } else {
15445 // If we did not match a trailing while/until and this was the last
15446 // chance to do so, then all of the block exits in the list are invalid
15447 // and we need to add an error for each of them.
15448 flush_block_exits(parser, previous_block_exits);
15449 }
15450}
15451
15452static PRISM_INLINE pm_node_t *
15453parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
15454 context_push(parser, PM_CONTEXT_PREDICATE);
15455 pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
15456 pm_node_t *predicate = parse_value_expression(parser, binding_power, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, error_id, (uint16_t) (depth + 1));
15457
15458 // Predicates are closed by a term, a "then", or a term and then a "then".
15459 bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15460
15461 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15462 predicate_closed = true;
15463 *then_keyword = parser->previous;
15464 }
15465
15466 if (!predicate_closed) {
15467 pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
15468 }
15469
15470 context_pop(parser);
15471 return predicate;
15472}
15473
15474static PRISM_INLINE pm_node_t *
15475parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
15476 pm_node_list_t current_block_exits = { 0 };
15477 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
15478
15479 pm_token_t keyword = parser->previous;
15480 pm_token_t then_keyword = { 0 };
15481
15482 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
15483 pm_statements_node_t *statements = NULL;
15484
15485 if (!match3(parser, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
15486 pm_accepts_block_stack_push(parser, true);
15487 statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15488 pm_accepts_block_stack_pop(parser);
15489 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15490 }
15491
15492 pm_node_t *parent = NULL;
15493
15494 switch (context) {
15495 case PM_CONTEXT_IF:
15496 parent = UP(pm_if_node_create(parser, &keyword, predicate, NTOK2PTR(then_keyword), statements, NULL, NULL));
15497 break;
15498 case PM_CONTEXT_UNLESS:
15499 parent = UP(pm_unless_node_create(parser, &keyword, predicate, NTOK2PTR(then_keyword), statements));
15500 break;
15501 default:
15502 assert(false && "unreachable");
15503 break;
15504 }
15505
15506 pm_node_t *current = parent;
15507
15508 // Parse any number of elsif clauses. This will form a linked list of if
15509 // nodes pointing to each other from the top.
15510 if (context == PM_CONTEXT_IF) {
15511 while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
15512 if (parser_end_of_line_p(parser)) {
15513 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_KEYWORD_EOL);
15514 }
15515
15516 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15517 pm_token_t elsif_keyword = parser->current;
15518 parser_lex(parser);
15519
15520 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
15521 pm_accepts_block_stack_push(parser, true);
15522
15523 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF, (uint16_t) (depth + 1));
15524 pm_accepts_block_stack_pop(parser);
15525 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15526
15527 pm_node_t *elsif = UP(pm_if_node_create(parser, &elsif_keyword, predicate, NTOK2PTR(then_keyword), statements, NULL, NULL));
15528 ((pm_if_node_t *) current)->subsequent = elsif;
15529 current = elsif;
15530 }
15531 }
15532
15533 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15534 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15535 opening_newline_index = token_newline_index(parser);
15536
15537 parser_lex(parser);
15538 pm_token_t else_keyword = parser->previous;
15539
15540 pm_accepts_block_stack_push(parser, true);
15541 pm_statements_node_t *else_statements = parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1));
15542 pm_accepts_block_stack_pop(parser);
15543
15544 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15545 parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
15546 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE, &keyword);
15547
15548 pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
15549
15550 switch (context) {
15551 case PM_CONTEXT_IF:
15552 ((pm_if_node_t *) current)->subsequent = UP(else_node);
15553 break;
15554 case PM_CONTEXT_UNLESS:
15555 ((pm_unless_node_t *) parent)->else_clause = else_node;
15556 break;
15557 default:
15558 assert(false && "unreachable");
15559 break;
15560 }
15561 } else {
15562 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
15563 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM, &keyword);
15564 }
15565
15566 // Set the appropriate end location for all of the nodes in the subtree.
15567 switch (context) {
15568 case PM_CONTEXT_IF: {
15569 pm_node_t *current = parent;
15570 bool recursing = true;
15571
15572 while (recursing) {
15573 switch (PM_NODE_TYPE(current)) {
15574 case PM_IF_NODE:
15575 pm_if_node_end_keyword_loc_set(parser, (pm_if_node_t *) current, &parser->previous);
15576 current = ((pm_if_node_t *) current)->subsequent;
15577 recursing = current != NULL;
15578 break;
15579 case PM_ELSE_NODE:
15580 pm_else_node_end_keyword_loc_set(parser, (pm_else_node_t *) current, &parser->previous);
15581 recursing = false;
15582 break;
15583 default: {
15584 recursing = false;
15585 break;
15586 }
15587 }
15588 }
15589 break;
15590 }
15591 case PM_CONTEXT_UNLESS:
15592 pm_unless_node_end_keyword_loc_set(parser, (pm_unless_node_t *) parent, &parser->previous);
15593 break;
15594 default:
15595 assert(false && "unreachable");
15596 break;
15597 }
15598
15599 pop_block_exits(parser, previous_block_exits);
15600 return parent;
15601}
15602
15607#define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15608 case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
15609 case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
15610 case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_BLOCK: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
15611 case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
15612 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
15613 case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
15614 case PM_TOKEN_KEYWORD_OR: case PM_TOKEN_KEYWORD_REDO: case PM_TOKEN_KEYWORD_RESCUE: case PM_TOKEN_KEYWORD_RETRY: \
15615 case PM_TOKEN_KEYWORD_RETURN: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_SUPER: case PM_TOKEN_KEYWORD_THEN: \
15616 case PM_TOKEN_KEYWORD_TRUE: case PM_TOKEN_KEYWORD_UNDEF: case PM_TOKEN_KEYWORD_UNLESS: case PM_TOKEN_KEYWORD_UNTIL: \
15617 case PM_TOKEN_KEYWORD_WHEN: case PM_TOKEN_KEYWORD_WHILE: case PM_TOKEN_KEYWORD_YIELD
15618
15623#define PM_CASE_OPERATOR PM_TOKEN_AMPERSAND: case PM_TOKEN_BACKTICK: case PM_TOKEN_BANG_EQUAL: \
15624 case PM_TOKEN_BANG_TILDE: case PM_TOKEN_BANG: case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \
15625 case PM_TOKEN_BRACKET_LEFT_RIGHT: case PM_TOKEN_CARET: case PM_TOKEN_EQUAL_EQUAL_EQUAL: case PM_TOKEN_EQUAL_EQUAL: \
15626 case PM_TOKEN_EQUAL_TILDE: case PM_TOKEN_GREATER_EQUAL: case PM_TOKEN_GREATER_GREATER: case PM_TOKEN_GREATER: \
15627 case PM_TOKEN_LESS_EQUAL_GREATER: case PM_TOKEN_LESS_EQUAL: case PM_TOKEN_LESS_LESS: case PM_TOKEN_LESS: \
15628 case PM_TOKEN_MINUS: case PM_TOKEN_PERCENT: case PM_TOKEN_PIPE: case PM_TOKEN_PLUS: case PM_TOKEN_SLASH: \
15629 case PM_TOKEN_STAR_STAR: case PM_TOKEN_STAR: case PM_TOKEN_TILDE: case PM_TOKEN_UAMPERSAND: case PM_TOKEN_UMINUS: \
15630 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_UPLUS: case PM_TOKEN_USTAR: case PM_TOKEN_USTAR_STAR
15631
15637#define PM_CASE_PRIMITIVE PM_TOKEN_INTEGER: case PM_TOKEN_INTEGER_IMAGINARY: case PM_TOKEN_INTEGER_RATIONAL: \
15638 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: case PM_TOKEN_FLOAT: case PM_TOKEN_FLOAT_IMAGINARY: \
15639 case PM_TOKEN_FLOAT_RATIONAL: case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: case PM_TOKEN_SYMBOL_BEGIN: \
15640 case PM_TOKEN_REGEXP_BEGIN: case PM_TOKEN_BACKTICK: case PM_TOKEN_PERCENT_LOWER_X: case PM_TOKEN_PERCENT_LOWER_I: \
15641 case PM_TOKEN_PERCENT_LOWER_W: case PM_TOKEN_PERCENT_UPPER_I: case PM_TOKEN_PERCENT_UPPER_W: \
15642 case PM_TOKEN_STRING_BEGIN: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_TRUE: \
15643 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15644 case PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_MINUS_GREATER: case PM_TOKEN_HEREDOC_START: \
15645 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_CHARACTER_LITERAL
15646
15651#define PM_CASE_PARAMETER PM_TOKEN_UAMPERSAND: case PM_TOKEN_AMPERSAND: case PM_TOKEN_UDOT_DOT_DOT: \
15652 case PM_TOKEN_IDENTIFIER: case PM_TOKEN_LABEL: case PM_TOKEN_USTAR: case PM_TOKEN_STAR: case PM_TOKEN_STAR_STAR: \
15653 case PM_TOKEN_USTAR_STAR: case PM_TOKEN_CONSTANT: case PM_TOKEN_INSTANCE_VARIABLE: case PM_TOKEN_GLOBAL_VARIABLE: \
15654 case PM_TOKEN_CLASS_VARIABLE
15655
15660#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
15661 case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
15662 case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
15663 case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
15664
15665// Assert here that the flags are the same so that we can safely switch the type
15666// of the node without having to move the flags.
15667PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING), "Expected the flags to match.");
15668
15673static PRISM_INLINE pm_node_flags_t
15674parse_unescaped_encoding(const pm_parser_t *parser) {
15675 if (parser->explicit_encoding != NULL) {
15676 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
15677 // If the there's an explicit encoding and it's using a UTF-8 escape
15678 // sequence, then mark the string as UTF-8.
15679 return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
15680 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
15681 // If there's a non-UTF-8 escape sequence being used, then the
15682 // string uses the source encoding, unless the source is marked as
15683 // US-ASCII. In that case the string is forced as ASCII-8BIT in
15684 // order to keep the string valid.
15685 return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
15686 }
15687 }
15688 return 0;
15689}
15690
15695static pm_node_t *
15696parse_string_part(pm_parser_t *parser, uint16_t depth) {
15697 switch (parser->current.type) {
15698 // Here the lexer has returned to us plain string content. In this case
15699 // we'll create a string node that has no opening or closing and return that
15700 // as the part. These kinds of parts look like:
15701 //
15702 // "aaa #{bbb} #@ccc ddd"
15703 // ^^^^ ^ ^^^^
15704 case PM_TOKEN_STRING_CONTENT: {
15705 pm_node_t *node = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
15706 pm_node_flag_set(node, parse_unescaped_encoding(parser));
15707
15708 parser_lex(parser);
15709 return node;
15710 }
15711 // Here the lexer has returned the beginning of an embedded expression. In
15712 // that case we'll parse the inner statements and return that as the part.
15713 // These kinds of parts look like:
15714 //
15715 // "aaa #{bbb} #@ccc ddd"
15716 // ^^^^^^
15717 case PM_TOKEN_EMBEXPR_BEGIN: {
15718 // Ruby disallows seeing encoding around interpolation in strings,
15719 // even though it is known at parse time.
15720 parser->explicit_encoding = NULL;
15721
15722 pm_lex_state_t state = parser->lex_state;
15723 int brace_nesting = parser->brace_nesting;
15724
15725 parser->brace_nesting = 0;
15726 lex_state_set(parser, PM_LEX_STATE_BEG);
15727 parser_lex(parser);
15728
15729 pm_token_t opening = parser->previous;
15730 pm_statements_node_t *statements = NULL;
15731
15732 if (!match3(parser, PM_TOKEN_EMBEXPR_END, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
15733 pm_accepts_block_stack_push(parser, true);
15734 statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
15735 pm_accepts_block_stack_pop(parser);
15736 }
15737
15738 parser->brace_nesting = brace_nesting;
15739 lex_state_set(parser, state);
15740 expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
15741
15742 // If this set of embedded statements only contains a single
15743 // statement, then Ruby does not consider it as a possible statement
15744 // that could emit a line event.
15745 if (statements != NULL && statements->body.size == 1) {
15746 pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
15747 }
15748
15749 return UP(pm_embedded_statements_node_create(parser, &opening, statements, &parser->previous));
15750 }
15751
15752 // Here the lexer has returned the beginning of an embedded variable.
15753 // In that case we'll parse the variable and create an appropriate node
15754 // for it and then return that node. These kinds of parts look like:
15755 //
15756 // "aaa #{bbb} #@ccc ddd"
15757 // ^^^^^
15758 case PM_TOKEN_EMBVAR: {
15759 // Ruby disallows seeing encoding around interpolation in strings,
15760 // even though it is known at parse time.
15761 parser->explicit_encoding = NULL;
15762
15763 lex_state_set(parser, PM_LEX_STATE_BEG);
15764 parser_lex(parser);
15765
15766 pm_token_t operator = parser->previous;
15767 pm_node_t *variable;
15768
15769 switch (parser->current.type) {
15770 // In this case a back reference is being interpolated. We'll
15771 // create a global variable read node.
15772 case PM_TOKEN_BACK_REFERENCE:
15773 parser_lex(parser);
15774 variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
15775 break;
15776 // In this case an nth reference is being interpolated. We'll
15777 // create a global variable read node.
15778 case PM_TOKEN_NUMBERED_REFERENCE:
15779 parser_lex(parser);
15780 variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
15781 break;
15782 // In this case a global variable is being interpolated. We'll
15783 // create a global variable read node.
15784 case PM_TOKEN_GLOBAL_VARIABLE:
15785 parser_lex(parser);
15786 variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
15787 break;
15788 // In this case an instance variable is being interpolated.
15789 // We'll create an instance variable read node.
15790 case PM_TOKEN_INSTANCE_VARIABLE:
15791 parser_lex(parser);
15792 variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
15793 break;
15794 // In this case a class variable is being interpolated. We'll
15795 // create a class variable read node.
15796 case PM_TOKEN_CLASS_VARIABLE:
15797 parser_lex(parser);
15798 variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
15799 break;
15800 // We can hit here if we got an invalid token. In that case
15801 // we'll not attempt to lex this token and instead just return a
15802 // missing node.
15803 default:
15804 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
15805 variable = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
15806 break;
15807 }
15808
15809 return UP(pm_embedded_variable_node_create(parser, &operator, variable));
15810 }
15811 default:
15812 parser_lex(parser);
15813 pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
15814 return NULL;
15815 }
15816}
15817
15823static const uint8_t *
15824parse_operator_symbol_name(const pm_token_t *name) {
15825 switch (name->type) {
15826 case PM_TOKEN_TILDE:
15827 case PM_TOKEN_BANG:
15828 if (name->end[-1] == '@') return name->end - 1;
15830 default:
15831 return name->end;
15832 }
15833}
15834
15835static pm_node_t *
15836parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
15837 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, NULL);
15838 const uint8_t *end = parse_operator_symbol_name(&parser->current);
15839
15840 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15841 parser_lex(parser);
15842
15843 pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
15844 pm_node_flag_set(UP(symbol), PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
15845
15846 return UP(symbol);
15847}
15848
15854static pm_node_t *
15855parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state, uint16_t depth) {
15856 const pm_token_t opening = parser->previous;
15857
15858 if (lex_mode->mode != PM_LEX_STRING) {
15859 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15860
15861 switch (parser->current.type) {
15862 case PM_CASE_OPERATOR:
15863 return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
15864 case PM_TOKEN_IDENTIFIER:
15865 case PM_TOKEN_CONSTANT:
15866 case PM_TOKEN_INSTANCE_VARIABLE:
15867 case PM_TOKEN_METHOD_NAME:
15868 case PM_TOKEN_CLASS_VARIABLE:
15869 case PM_TOKEN_GLOBAL_VARIABLE:
15870 case PM_TOKEN_NUMBERED_REFERENCE:
15871 case PM_TOKEN_BACK_REFERENCE:
15872 case PM_CASE_KEYWORD:
15873 parser_lex(parser);
15874 break;
15875 default:
15876 expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
15877 break;
15878 }
15879
15880 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, NULL);
15881 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15882 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15883
15884 return UP(symbol);
15885 }
15886
15887 if (lex_mode->as.string.interpolation) {
15888 // If we have the end of the symbol, then we can return an empty symbol.
15889 if (match1(parser, PM_TOKEN_STRING_END)) {
15890 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15891 parser_lex(parser);
15892 pm_token_t content = {
15893 .type = PM_TOKEN_STRING_CONTENT,
15894 .start = parser->previous.start,
15895 .end = parser->previous.start
15896 };
15897
15898 return UP(pm_symbol_node_create(parser, &opening, &content, &parser->previous));
15899 }
15900
15901 // Now we can parse the first part of the symbol.
15902 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
15903
15904 // If we got a string part, then it's possible that we could transform
15905 // what looks like an interpolated symbol into a regular symbol.
15906 if (part && PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15907 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15908 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15909
15910 return UP(pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous));
15911 }
15912
15913 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
15914 if (part) pm_interpolated_symbol_node_append(parser->arena, symbol, part);
15915
15916 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15917 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
15918 pm_interpolated_symbol_node_append(parser->arena, symbol, part);
15919 }
15920 }
15921
15922 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15923 if (match1(parser, PM_TOKEN_EOF)) {
15924 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15925 } else {
15926 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15927 }
15928
15929 pm_interpolated_symbol_node_closing_loc_set(parser, symbol, &parser->previous);
15930 return UP(symbol);
15931 }
15932
15933 pm_token_t content;
15934 pm_string_t unescaped;
15935
15936 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15937 content = parser->current;
15938 unescaped = parser->current_string;
15939 parser_lex(parser);
15940
15941 // If we have two string contents in a row, then the content of this
15942 // symbol is split because of heredoc contents. This looks like:
15943 //
15944 // <<A; :'a
15945 // A
15946 // b'
15947 //
15948 // In this case, the best way we have to represent this is as an
15949 // interpolated string node, so that's what we'll do here.
15950 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15951 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
15952 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &unescaped));
15953 pm_interpolated_symbol_node_append(parser->arena, symbol, part);
15954
15955 part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->current, NULL, &parser->current_string));
15956 pm_interpolated_symbol_node_append(parser->arena, symbol, part);
15957
15958 if (next_state != PM_LEX_STATE_NONE) {
15959 lex_state_set(parser, next_state);
15960 }
15961
15962 parser_lex(parser);
15963 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
15964
15965 pm_interpolated_symbol_node_closing_loc_set(parser, symbol, &parser->previous);
15966 return UP(symbol);
15967 }
15968 } else {
15969 content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
15970 pm_string_shared_init(&unescaped, content.start, content.end);
15971 }
15972
15973 if (next_state != PM_LEX_STATE_NONE) {
15974 lex_state_set(parser, next_state);
15975 }
15976
15977 if (match1(parser, PM_TOKEN_EOF)) {
15978 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
15979 } else {
15980 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
15981 }
15982
15983 return UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false)));
15984}
15985
15990static PRISM_INLINE pm_node_t *
15991parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
15992 switch (parser->current.type) {
15993 case PM_CASE_OPERATOR:
15994 return parse_operator_symbol(parser, NULL, PM_LEX_STATE_NONE);
15995 case PM_CASE_KEYWORD:
15996 case PM_TOKEN_CONSTANT:
15997 case PM_TOKEN_IDENTIFIER:
15998 case PM_TOKEN_METHOD_NAME: {
15999 parser_lex(parser);
16000
16001 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, NULL, &parser->previous, NULL);
16002 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16003 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16004
16005 return UP(symbol);
16006 }
16007 case PM_TOKEN_SYMBOL_BEGIN: {
16008 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16009 parser_lex(parser);
16010
16011 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16012 }
16013 default:
16014 pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
16015 return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
16016 }
16017}
16018
16025static PRISM_INLINE pm_node_t *
16026parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
16027 switch (parser->current.type) {
16028 case PM_CASE_OPERATOR:
16029 return parse_operator_symbol(parser, NULL, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
16030 case PM_CASE_KEYWORD:
16031 case PM_TOKEN_CONSTANT:
16032 case PM_TOKEN_IDENTIFIER:
16033 case PM_TOKEN_METHOD_NAME: {
16034 if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
16035 parser_lex(parser);
16036
16037 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, NULL, &parser->previous, NULL);
16038 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16039 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16040
16041 return UP(symbol);
16042 }
16043 case PM_TOKEN_SYMBOL_BEGIN: {
16044 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16045 parser_lex(parser);
16046
16047 return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16048 }
16049 case PM_TOKEN_BACK_REFERENCE:
16050 parser_lex(parser);
16051 return UP(pm_back_reference_read_node_create(parser, &parser->previous));
16052 case PM_TOKEN_NUMBERED_REFERENCE:
16053 parser_lex(parser);
16054 return UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
16055 case PM_TOKEN_GLOBAL_VARIABLE:
16056 parser_lex(parser);
16057 return UP(pm_global_variable_read_node_create(parser, &parser->previous));
16058 default:
16059 pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
16060 return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
16061 }
16062}
16063
16068static pm_node_t *
16069parse_variable(pm_parser_t *parser) {
16070 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
16071 int depth;
16072 bool is_numbered_param = pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous));
16073
16074 if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
16075 return UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false));
16076 }
16077
16078 pm_scope_t *current_scope = parser->current_scope;
16079 if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
16080 if (is_numbered_param) {
16081 // When you use a numbered parameter, it implies the existence of
16082 // all of the locals that exist before it. For example, referencing
16083 // _2 means that _1 must exist. Therefore here we loop through all
16084 // of the possibilities and add them into the constant pool.
16085 uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
16086 for (uint8_t number = 1; number <= maximum; number++) {
16087 pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
16088 }
16089
16090 if (!match1(parser, PM_TOKEN_EQUAL)) {
16091 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
16092 }
16093
16094 pm_node_t *node = UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false));
16095 pm_node_list_append(parser->arena, &current_scope->implicit_parameters, node);
16096
16097 return node;
16098 } else if ((parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
16099 pm_node_t *node = UP(pm_it_local_variable_read_node_create(parser, &parser->previous));
16100 pm_node_list_append(parser->arena, &current_scope->implicit_parameters, node);
16101
16102 return node;
16103 }
16104 }
16105
16106 return NULL;
16107}
16108
16112static pm_node_t *
16113parse_variable_call(pm_parser_t *parser) {
16114 pm_node_flags_t flags = 0;
16115
16116 if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
16117 pm_node_t *node = parse_variable(parser);
16118 if (node != NULL) return node;
16119 flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
16120 }
16121
16122 pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
16123 pm_node_flag_set(UP(node), flags);
16124
16125 return UP(node);
16126}
16127
16134parse_method_definition_name(pm_parser_t *parser) {
16135 switch (parser->current.type) {
16136 case PM_CASE_KEYWORD:
16137 case PM_TOKEN_CONSTANT:
16138 case PM_TOKEN_METHOD_NAME:
16139 parser_lex(parser);
16140 return parser->previous;
16141 case PM_TOKEN_IDENTIFIER:
16142 pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current));
16143 parser_lex(parser);
16144 return parser->previous;
16145 case PM_CASE_OPERATOR:
16146 lex_state_set(parser, PM_LEX_STATE_ENDFN);
16147 parser_lex(parser);
16148 return parser->previous;
16149 default:
16150 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_NAME, pm_token_str(parser->current.type));
16151 return (pm_token_t) { .type = 0, .start = parser->current.start, .end = parser->current.end };
16152 }
16153}
16154
16155static void
16156parse_heredoc_dedent_string(pm_arena_t *arena, pm_string_t *string, size_t common_whitespace) {
16157 // Make a writable copy in the arena if the string isn't already writable.
16158 // We keep a mutable pointer to the arena memory so we can memmove into it
16159 // below without casting away const from the string's source field.
16160 uint8_t *writable;
16161
16162 if (string->type != PM_STRING_OWNED) {
16163 size_t length = pm_string_length(string);
16164 writable = (uint8_t *) pm_arena_memdup(arena, pm_string_source(string), length, PRISM_ALIGNOF(uint8_t));
16165 pm_string_constant_init(string, (const char *) writable, length);
16166 } else {
16167 writable = (uint8_t *) string->source;
16168 }
16169
16170 // Now get the bounds of the existing string. We'll use this as a
16171 // destination to move bytes into. We'll also use it for bounds checking
16172 // since we don't require that these strings be null terminated.
16173 size_t dest_length = pm_string_length(string);
16174 const uint8_t *source_cursor = writable;
16175 const uint8_t *source_end = source_cursor + dest_length;
16176
16177 // We're going to move bytes backward in the string when we get leading
16178 // whitespace, so we'll maintain a pointer to the current position in the
16179 // string that we're writing to.
16180 size_t trimmed_whitespace = 0;
16181
16182 // While we haven't reached the amount of common whitespace that we need to
16183 // trim and we haven't reached the end of the string, we'll keep trimming
16184 // whitespace. Trimming in this context means skipping over these bytes such
16185 // that they aren't copied into the new string.
16186 while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
16187 if (*source_cursor == '\t') {
16188 trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
16189 if (trimmed_whitespace > common_whitespace) break;
16190 } else {
16191 trimmed_whitespace++;
16192 }
16193
16194 source_cursor++;
16195 dest_length--;
16196 }
16197
16198 memmove(writable, source_cursor, (size_t) (source_end - source_cursor));
16199 string->length = dest_length;
16200}
16201
16206static PRISM_INLINE bool
16207heredoc_dedent_discard_string_node(pm_parser_t *parser, pm_string_node_t *string_node) {
16208 if (string_node->unescaped.length == 0) {
16209 const uint8_t *cursor = parser->start + PM_LOCATION_START(&string_node->content_loc);
16210 return pm_memchr(cursor, '\\', string_node->content_loc.length, parser->encoding_changed, parser->encoding) == NULL;
16211 }
16212 return false;
16213}
16214
16218static void
16219parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
16220 // The next node should be dedented if it's the first node in the list or if
16221 // it follows a string node.
16222 bool dedent_next = true;
16223
16224 // Iterate over all nodes, and trim whitespace accordingly. We're going to
16225 // keep around two indices: a read and a write.
16226 size_t write_index = 0;
16227
16228 pm_node_t *node;
16229 PM_NODE_LIST_FOREACH(nodes, read_index, node) {
16230 // We're not manipulating child nodes that aren't strings. In this case
16231 // we'll skip past it and indicate that the subsequent node should not
16232 // be dedented.
16233 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) {
16234 nodes->nodes[write_index++] = node;
16235 dedent_next = false;
16236 continue;
16237 }
16238
16239 pm_string_node_t *string_node = ((pm_string_node_t *) node);
16240 if (dedent_next) {
16241 parse_heredoc_dedent_string(parser->arena, &string_node->unescaped, common_whitespace);
16242 }
16243
16244 if (heredoc_dedent_discard_string_node(parser, string_node)) {
16245 } else {
16246 nodes->nodes[write_index++] = node;
16247 }
16248
16249 // We always dedent the next node if it follows a string node.
16250 dedent_next = true;
16251 }
16252
16253 nodes->size = write_index;
16254}
16255
16259static pm_token_t
16260parse_strings_empty_content(const uint8_t *location) {
16261 return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
16262}
16263
16267static PRISM_INLINE pm_node_t *
16268parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
16269 assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
16270 bool concating = false;
16271
16272 while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16273 pm_node_t *node = NULL;
16274
16275 // Here we have found a string literal. We'll parse it and add it to
16276 // the list of strings.
16277 const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
16278 assert(lex_mode->mode == PM_LEX_STRING);
16279 bool lex_interpolation = lex_mode->as.string.interpolation;
16280 bool label_allowed = lex_mode->as.string.label_allowed && accepts_label;
16281
16282 pm_token_t opening = parser->current;
16283 parser_lex(parser);
16284
16285 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16286 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16287 // If we get here, then we have an end immediately after a
16288 // start. In that case we'll create an empty content token and
16289 // return an uninterpolated string.
16290 pm_token_t content = parse_strings_empty_content(parser->previous.start);
16291 pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
16292
16293 pm_string_shared_init(&string->unescaped, content.start, content.end);
16294 node = UP(string);
16295 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16296 // If we get here, then we have an end of a label immediately
16297 // after a start. In that case we'll create an empty symbol
16298 // node.
16299 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, NULL, &parser->previous);
16300 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.start);
16301 node = UP(symbol);
16302
16303 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16304 } else if (!lex_interpolation) {
16305 // If we don't accept interpolation then we expect the string to
16306 // start with a single string content node.
16307 pm_string_t unescaped;
16308 pm_token_t content;
16309
16310 if (match1(parser, PM_TOKEN_EOF)) {
16311 unescaped = PM_STRING_EMPTY;
16312 content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->start, .end = parser->start };
16313 } else {
16314 unescaped = parser->current_string;
16315 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16316 content = parser->previous;
16317 }
16318
16319 // It is unfortunately possible to have multiple string content
16320 // nodes in a row in the case that there's heredoc content in
16321 // the middle of the string, like this cursed example:
16322 //
16323 // <<-END+'b
16324 // a
16325 // END
16326 // c'+'d'
16327 //
16328 // In that case we need to switch to an interpolated string to
16329 // be able to contain all of the parts.
16330 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16331 pm_node_list_t parts = { 0 };
16332 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &unescaped));
16333 pm_node_list_append(parser->arena, &parts, part);
16334
16335 do {
16336 part = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
16337 pm_node_list_append(parser->arena, &parts, part);
16338 parser_lex(parser);
16339 } while (match1(parser, PM_TOKEN_STRING_CONTENT));
16340
16341 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16342 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16343 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16344 node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
16345 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16346 } else if (match1(parser, PM_TOKEN_EOF)) {
16347 pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16348 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
16349 } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16350 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
16351 } else {
16352 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_str(parser->previous.type));
16353 parser->previous.start = parser->previous.end;
16354 parser->previous.type = 0;
16355 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
16356 }
16357 } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16358 // In this case we've hit string content so we know the string
16359 // at least has something in it. We'll need to check if the
16360 // following token is the end (in which case we can return a
16361 // plain string) or if it's not then it has interpolation.
16362 pm_token_t content = parser->current;
16363 pm_string_t unescaped = parser->current_string;
16364 parser_lex(parser);
16365
16366 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16367 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
16368 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16369
16370 // Kind of odd behavior, but basically if we have an
16371 // unterminated string and it ends in a newline, we back up one
16372 // character so that the error message is on the last line of
16373 // content in the string.
16374 if (!accept1(parser, PM_TOKEN_STRING_END)) {
16375 const uint8_t *location = parser->previous.end;
16376 if (location > parser->start && location[-1] == '\n') location--;
16377 pm_parser_err(parser, U32(location - parser->start), 0, PM_ERR_STRING_LITERAL_EOF);
16378
16379 parser->previous.start = parser->previous.end;
16380 parser->previous.type = 0;
16381 }
16382 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16383 node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
16384 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16385 } else {
16386 // If we get here, then we have interpolation so we'll need
16387 // to create a string or symbol node with interpolation.
16388 pm_node_list_t parts = { 0 };
16389 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
16390 pm_node_flag_set(part, parse_unescaped_encoding(parser));
16391 pm_node_list_append(parser->arena, &parts, part);
16392
16393 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16394 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16395 pm_node_list_append(parser->arena, &parts, part);
16396 }
16397 }
16398
16399 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16400 node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
16401 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16402 } else if (match1(parser, PM_TOKEN_EOF)) {
16403 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16404 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
16405 } else {
16406 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16407 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16408 }
16409 }
16410 } else {
16411 // If we get here, then the first part of the string is not plain
16412 // string content, in which case we need to parse the string as an
16413 // interpolated string.
16414 pm_node_list_t parts = { 0 };
16415 pm_node_t *part;
16416
16417 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16418 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16419 pm_node_list_append(parser->arena, &parts, part);
16420 }
16421 }
16422
16423 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16424 node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
16425 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16426 } else if (match1(parser, PM_TOKEN_EOF)) {
16427 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16428 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
16429 } else {
16430 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16431 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16432 }
16433 }
16434
16435 if (current == NULL) {
16436 // If the node we just parsed is a symbol node, then we can't
16437 // concatenate it with anything else, so we can now return that
16438 // node.
16439 if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
16440 return node;
16441 }
16442
16443 // If we don't already have a node, then it's fine and we can just
16444 // set the result to be the node we just parsed.
16445 current = node;
16446 } else {
16447 // Otherwise we need to check the type of the node we just parsed.
16448 // If it cannot be concatenated with the previous node, then we'll
16449 // need to add a syntax error.
16450 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
16451 pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16452 }
16453
16454 // If we haven't already created our container for concatenation,
16455 // we'll do that now.
16456 if (!concating) {
16457 if (!PM_NODE_TYPE_P(current, PM_STRING_NODE) && !PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
16458 pm_parser_err_node(parser, current, PM_ERR_STRING_CONCATENATION);
16459 }
16460
16461 concating = true;
16462 pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
16463 pm_interpolated_string_node_append(parser, container, current);
16464 current = UP(container);
16465 }
16466
16467 pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, node);
16468 }
16469 }
16470
16471 return current;
16472}
16473
16474#define PM_PARSE_PATTERN_SINGLE 0
16475#define PM_PARSE_PATTERN_TOP 1
16476#define PM_PARSE_PATTERN_MULTI 2
16477
16478static pm_node_t *
16479parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
16480
16486static void
16487parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
16488 // Skip this capture if it starts with an underscore.
16489 if (peek_at(parser, parser->start + location->start) == '_') return;
16490
16491 if (pm_constant_id_list_includes(captures, capture)) {
16492 pm_parser_err(parser, location->start, location->length, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
16493 } else {
16494 pm_constant_id_list_append(parser->arena, captures, capture);
16495 }
16496}
16497
16501static pm_node_t *
16502parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node, uint16_t depth) {
16503 // Now, if there are any :: operators that follow, parse them as constant
16504 // path nodes.
16505 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
16506 pm_token_t delimiter = parser->previous;
16507 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16508 node = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
16509 }
16510
16511 // If there is a [ or ( that follows, then this is part of a larger pattern
16512 // expression. We'll parse the inner pattern here, then modify the returned
16513 // inner pattern with our constant path attached.
16514 if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
16515 return node;
16516 }
16517
16518 pm_token_t opening;
16519 pm_token_t closing;
16520 pm_node_t *inner = NULL;
16521
16522 if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
16523 opening = parser->previous;
16524 accept1(parser, PM_TOKEN_NEWLINE);
16525
16526 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16527 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16528 accept1(parser, PM_TOKEN_NEWLINE);
16529 expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
16530 }
16531
16532 closing = parser->previous;
16533 } else {
16534 parser_lex(parser);
16535 opening = parser->previous;
16536 accept1(parser, PM_TOKEN_NEWLINE);
16537
16538 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
16539 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16540 accept1(parser, PM_TOKEN_NEWLINE);
16541 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
16542 }
16543
16544 closing = parser->previous;
16545 }
16546
16547 if (!inner) {
16548 // If there was no inner pattern, then we have something like Foo() or
16549 // Foo[]. In that case we'll create an array pattern with no requireds.
16550 return UP(pm_array_pattern_node_constant_create(parser, node, &opening, &closing));
16551 }
16552
16553 // Now that we have the inner pattern, check to see if it's an array, find,
16554 // or hash pattern. If it is, then we'll attach our constant path to it if
16555 // it doesn't already have a constant. If it's not one of those node types
16556 // or it does have a constant, then we'll create an array pattern.
16557 switch (PM_NODE_TYPE(inner)) {
16558 case PM_ARRAY_PATTERN_NODE: {
16559 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16560
16561 if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
16562 PM_NODE_START_SET_NODE(pattern_node, node);
16563 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16564
16565 pattern_node->constant = node;
16566 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16567 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16568
16569 return UP(pattern_node);
16570 }
16571
16572 break;
16573 }
16574 case PM_FIND_PATTERN_NODE: {
16575 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16576
16577 if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
16578 PM_NODE_START_SET_NODE(pattern_node, node);
16579 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16580
16581 pattern_node->constant = node;
16582 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16583 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16584
16585 return UP(pattern_node);
16586 }
16587
16588 break;
16589 }
16590 case PM_HASH_PATTERN_NODE: {
16591 pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
16592
16593 if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
16594 PM_NODE_START_SET_NODE(pattern_node, node);
16595 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16596
16597 pattern_node->constant = node;
16598 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16599 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16600
16601 return UP(pattern_node);
16602 }
16603
16604 break;
16605 }
16606 default:
16607 break;
16608 }
16609
16610 // If we got here, then we didn't return one of the inner patterns by
16611 // attaching its constant. In this case we'll create an array pattern and
16612 // attach our constant to it.
16613 pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16614 pm_array_pattern_node_requireds_append(parser->arena, pattern_node, inner);
16615 return UP(pattern_node);
16616}
16617
16621static pm_splat_node_t *
16622parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16623 assert(parser->previous.type == PM_TOKEN_USTAR);
16624 pm_token_t operator = parser->previous;
16625 pm_node_t *name = NULL;
16626
16627 // Rest patterns don't necessarily have a name associated with them. So we
16628 // will check for that here. If they do, then we'll add it to the local
16629 // table since this pattern will cause it to become a local variable.
16630 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16631 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16632
16633 int depth;
16634 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16635 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16636 }
16637
16638 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
16639 name = UP(pm_local_variable_target_node_create(
16640 parser,
16641 &TOK2LOC(parser, &parser->previous),
16642 constant_id,
16643 (uint32_t) (depth == -1 ? 0 : depth)
16644 ));
16645 }
16646
16647 // Finally we can return the created node.
16648 return pm_splat_node_create(parser, &operator, name);
16649}
16650
16654static pm_node_t *
16655parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16656 assert(parser->current.type == PM_TOKEN_USTAR_STAR);
16657 parser_lex(parser);
16658
16659 pm_token_t operator = parser->previous;
16660 pm_node_t *value = NULL;
16661
16662 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
16663 return UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
16664 }
16665
16666 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16667 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16668
16669 int depth;
16670 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16671 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16672 }
16673
16674 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
16675 value = UP(pm_local_variable_target_node_create(
16676 parser,
16677 &TOK2LOC(parser, &parser->previous),
16678 constant_id,
16679 (uint32_t) (depth == -1 ? 0 : depth)
16680 ));
16681 }
16682
16683 return UP(pm_assoc_splat_node_create(parser, value, &operator));
16684}
16685
16690static bool
16691pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
16692 ptrdiff_t length = end - start;
16693 if (length == 0) return false;
16694
16695 // First ensure that it starts with a valid identifier starting character.
16696 size_t width = char_is_identifier_start(parser, start, end - start);
16697 if (width == 0) return false;
16698
16699 // Next, ensure that it's not an uppercase character.
16700 if (parser->encoding_changed) {
16701 if (parser->encoding->isupper_char(start, length)) return false;
16702 } else {
16703 if (pm_encoding_utf_8_isupper_char(start, length)) return false;
16704 }
16705
16706 // Next, iterate through all of the bytes of the string to ensure that they
16707 // are all valid identifier characters.
16708 const uint8_t *cursor = start + width;
16709 while ((width = char_is_identifier(parser, cursor, end - cursor))) cursor += width;
16710 return cursor == end;
16711}
16712
16717static pm_node_t *
16718parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
16719 const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
16720 const uint8_t *start = parser->start + PM_LOCATION_START(value_loc);
16721 const uint8_t *end = parser->start + PM_LOCATION_END(value_loc);
16722
16723 pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, start, end);
16724 int depth = -1;
16725
16726 if (pm_slice_is_valid_local(parser, start, end)) {
16727 depth = pm_parser_local_depth_constant_id(parser, constant_id);
16728 } else {
16729 pm_parser_err(parser, PM_NODE_START(key), PM_NODE_LENGTH(key), PM_ERR_PATTERN_HASH_KEY_LOCALS);
16730
16731 if ((end > start) && ((end[-1] == '!') || (end[-1] == '?'))) {
16732 PM_PARSER_ERR_FORMAT(parser, value_loc->start, value_loc->length, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (end - start), (const char *) start);
16733 }
16734 }
16735
16736 if (depth == -1) {
16737 pm_parser_local_add(parser, constant_id, start, end, 0);
16738 }
16739
16740 parse_pattern_capture(parser, captures, constant_id, value_loc);
16741 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
16742 parser,
16743 value_loc,
16744 constant_id,
16745 (uint32_t) (depth == -1 ? 0 : depth)
16746 );
16747
16748 return UP(pm_implicit_node_create(parser, UP(target)));
16749}
16750
16755static void
16756parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
16757 if (pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, keys, node, true) != NULL) {
16758 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
16759 }
16760}
16761
16766parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, uint16_t depth) {
16767 pm_node_list_t assocs = { 0 };
16768 pm_static_literals_t keys = { 0 };
16769 pm_node_t *rest = NULL;
16770
16771 switch (PM_NODE_TYPE(first_node)) {
16772 case PM_ASSOC_SPLAT_NODE:
16773 case PM_NO_KEYWORDS_PARAMETER_NODE:
16774 rest = first_node;
16775 break;
16776 case PM_INTERPOLATED_SYMBOL_NODE:
16777 case PM_SYMBOL_NODE: {
16778 if (pm_symbol_node_label_p(parser, first_node)) {
16779 if (PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE)) {
16780 pm_parser_err_node(parser, first_node, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
16781 } else {
16782 parse_pattern_hash_key(parser, &keys, first_node);
16783 }
16784
16785 pm_node_t *value;
16786
16787 if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
16788 if (PM_NODE_TYPE_P(first_node, PM_SYMBOL_NODE)) {
16789 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
16790 } else {
16791 value = UP(pm_error_recovery_node_create(parser, PM_NODE_END(first_node), 0));
16792 }
16793 } else {
16794 // Here we have a value for the first assoc in the list, so
16795 // we will parse it now.
16796 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
16797 }
16798
16799 pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, NULL, value));
16800 pm_node_list_append(parser->arena, &assocs, assoc);
16801 break;
16802 }
16803 }
16805 default: {
16806 // If we get anything else, then this is an error. For this we'll
16807 // create a missing node for the value and create an assoc node for
16808 // the first node in the list.
16809 pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
16810 pm_parser_err_node(parser, first_node, diag_id);
16811
16812 pm_node_t *value = UP(pm_error_recovery_node_create(parser, PM_NODE_START(first_node), PM_NODE_LENGTH(first_node)));
16813 pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, NULL, value));
16814
16815 pm_node_list_append(parser->arena, &assocs, assoc);
16816 break;
16817 }
16818 }
16819
16820 // If there are any other assocs, then we'll parse them now.
16821 while (accept1(parser, PM_TOKEN_COMMA)) {
16822 // Here we need to break to support trailing commas.
16823 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
16824 // Trailing commas are not allowed to follow a rest pattern.
16825 if (rest != NULL) {
16826 pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16827 }
16828
16829 break;
16830 }
16831
16832 if (match1(parser, PM_TOKEN_USTAR_STAR)) {
16833 pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures);
16834
16835 if (rest == NULL) {
16836 rest = assoc;
16837 } else {
16838 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16839 pm_node_list_append(parser->arena, &assocs, assoc);
16840 }
16841 } else {
16842 pm_node_t *key;
16843
16844 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16845 key = parse_strings(parser, NULL, true, (uint16_t) (depth + 1));
16846
16847 if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
16848 pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
16849 } else if (!pm_symbol_node_label_p(parser, key)) {
16850 pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16851 }
16852 } else if (accept1(parser, PM_TOKEN_LABEL)) {
16853 key = UP(pm_symbol_node_label_create(parser, &parser->previous));
16854 } else {
16855 expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16856
16857 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = parser->previous.end, .end = parser->previous.end };
16858 key = UP(pm_symbol_node_create(parser, NULL, &label, NULL));
16859 }
16860
16861 parse_pattern_hash_key(parser, &keys, key);
16862 pm_node_t *value = NULL;
16863
16864 if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
16865 if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) {
16866 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
16867 } else {
16868 value = UP(pm_error_recovery_node_create(parser, PM_NODE_END(key), 0));
16869 }
16870 } else {
16871 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
16872 }
16873
16874 pm_node_t *assoc = UP(pm_assoc_node_create(parser, key, NULL, value));
16875
16876 if (rest != NULL) {
16877 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16878 }
16879
16880 pm_node_list_append(parser->arena, &assocs, assoc);
16881 }
16882 }
16883
16884 pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
16885 // assocs.nodes is arena-allocated; no explicit free needed.
16886
16887 pm_static_literals_free(&keys);
16888 return node;
16889}
16890
16894static pm_node_t *
16895parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id, uint16_t depth) {
16896 switch (parser->current.type) {
16897 case PM_TOKEN_IDENTIFIER:
16898 case PM_TOKEN_METHOD_NAME: {
16899 parser_lex(parser);
16900 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16901
16902 int depth;
16903 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16904 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16905 }
16906
16907 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
16908 return UP(pm_local_variable_target_node_create(
16909 parser,
16910 &TOK2LOC(parser, &parser->previous),
16911 constant_id,
16912 (uint32_t) (depth == -1 ? 0 : depth)
16913 ));
16914 }
16915 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
16916 pm_token_t opening = parser->current;
16917 parser_lex(parser);
16918
16919 if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16920 // If we have an empty array pattern, then we'll just return a new
16921 // array pattern node.
16922 return UP(pm_array_pattern_node_empty_create(parser, &opening, &parser->previous));
16923 }
16924
16925 // Otherwise, we'll parse the inner pattern, then deal with it depending
16926 // on the type it returns.
16927 pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16928
16929 accept1(parser, PM_TOKEN_NEWLINE);
16930 expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
16931 pm_token_t closing = parser->previous;
16932
16933 switch (PM_NODE_TYPE(inner)) {
16934 case PM_ARRAY_PATTERN_NODE: {
16935 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16936 if (pattern_node->opening_loc.length == 0) {
16937 PM_NODE_START_SET_TOKEN(parser, pattern_node, &opening);
16938 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16939
16940 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16941 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16942
16943 return UP(pattern_node);
16944 }
16945
16946 break;
16947 }
16948 case PM_FIND_PATTERN_NODE: {
16949 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16950 if (pattern_node->opening_loc.length == 0) {
16951 PM_NODE_START_SET_TOKEN(parser, pattern_node, &opening);
16952 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16953
16954 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16955 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16956
16957 return UP(pattern_node);
16958 }
16959
16960 break;
16961 }
16962 default:
16963 break;
16964 }
16965
16966 pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
16967 pm_array_pattern_node_requireds_append(parser->arena, node, inner);
16968 return UP(node);
16969 }
16970 case PM_TOKEN_BRACE_LEFT: {
16971 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
16972 parser->pattern_matching_newlines = false;
16973
16975 pm_token_t opening = parser->current;
16976 parser_lex(parser);
16977
16978 if (accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
16979 // If we have an empty hash pattern, then we'll just return a new hash
16980 // pattern node.
16981 node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
16982 } else {
16983 pm_node_t *first_node;
16984
16985 switch (parser->current.type) {
16986 case PM_TOKEN_LABEL:
16987 parser_lex(parser);
16988 first_node = UP(pm_symbol_node_label_create(parser, &parser->previous));
16989 break;
16990 case PM_TOKEN_USTAR_STAR:
16991 first_node = parse_pattern_keyword_rest(parser, captures);
16992 break;
16993 case PM_TOKEN_STRING_BEGIN:
16994 first_node = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_DO_BLOCK | PM_PARSE_ACCEPTS_LABEL, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
16995 break;
16996 default: {
16997 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_str(parser->current.type));
16998 parser_lex(parser);
16999
17000 first_node = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
17001 break;
17002 }
17003 }
17004
17005 node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
17006
17007 accept1(parser, PM_TOKEN_NEWLINE);
17008 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE, &opening);
17009 pm_token_t closing = parser->previous;
17010
17011 PM_NODE_START_SET_TOKEN(parser, node, &opening);
17012 PM_NODE_LENGTH_SET_TOKEN(parser, node, &closing);
17013
17014 node->opening_loc = TOK2LOC(parser, &opening);
17015 node->closing_loc = TOK2LOC(parser, &closing);
17016 }
17017
17018 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17019 return UP(node);
17020 }
17021 case PM_TOKEN_UDOT_DOT:
17022 case PM_TOKEN_UDOT_DOT_DOT: {
17023 pm_token_t operator = parser->current;
17024 parser_lex(parser);
17025
17026 // Since we have a unary range operator, we need to parse the subsequent
17027 // expression as the right side of the range.
17028 switch (parser->current.type) {
17029 case PM_CASE_PRIMITIVE: {
17030 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17031 return UP(pm_range_node_create(parser, NULL, &operator, right));
17032 }
17033 default: {
17034 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
17035 pm_node_t *right = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &operator), PM_TOKEN_LENGTH(&operator)));
17036 return UP(pm_range_node_create(parser, NULL, &operator, right));
17037 }
17038 }
17039 }
17040 case PM_CASE_PRIMITIVE: {
17041 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_LABEL | PM_PARSE_ACCEPTS_DO_BLOCK, diag_id, (uint16_t) (depth + 1));
17042
17043 // If we found a label, we need to immediately return to the caller.
17044 if (pm_symbol_node_label_p(parser, node)) return node;
17045
17046 // Call nodes (arithmetic operations) are not allowed in patterns
17047 if (PM_NODE_TYPE(node) == PM_CALL_NODE) {
17048 pm_parser_err_node(parser, node, diag_id);
17049 return UP(pm_error_recovery_node_create_unexpected(parser, node));
17050 }
17051
17052 // Now that we have a primitive, we need to check if it's part of a range.
17053 if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
17054 pm_token_t operator = parser->previous;
17055
17056 // Now that we have the operator, we need to check if this is followed
17057 // by another expression. If it is, then we will create a full range
17058 // node. Otherwise, we'll create an endless range.
17059 switch (parser->current.type) {
17060 case PM_CASE_PRIMITIVE: {
17061 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17062 return UP(pm_range_node_create(parser, node, &operator, right));
17063 }
17064 default:
17065 return UP(pm_range_node_create(parser, node, &operator, NULL));
17066 }
17067 }
17068
17069 return node;
17070 }
17071 case PM_TOKEN_CARET: {
17072 parser_lex(parser);
17073 pm_token_t operator = parser->previous;
17074
17075 // At this point we have a pin operator. We need to check the subsequent
17076 // expression to determine if it's a variable or an expression.
17077 switch (parser->current.type) {
17078 case PM_TOKEN_IDENTIFIER: {
17079 parser_lex(parser);
17080 pm_node_t *variable = UP(parse_variable(parser));
17081
17082 if (variable == NULL) {
17083 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
17084 variable = UP(pm_local_variable_read_node_missing_create(parser, &parser->previous, 0));
17085 }
17086
17087 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
17088 }
17089 case PM_TOKEN_INSTANCE_VARIABLE: {
17090 parser_lex(parser);
17091 pm_node_t *variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
17092
17093 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
17094 }
17095 case PM_TOKEN_CLASS_VARIABLE: {
17096 parser_lex(parser);
17097 pm_node_t *variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
17098
17099 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
17100 }
17101 case PM_TOKEN_GLOBAL_VARIABLE: {
17102 parser_lex(parser);
17103 pm_node_t *variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
17104
17105 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
17106 }
17107 case PM_TOKEN_NUMBERED_REFERENCE: {
17108 parser_lex(parser);
17109 pm_node_t *variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
17110
17111 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
17112 }
17113 case PM_TOKEN_BACK_REFERENCE: {
17114 parser_lex(parser);
17115 pm_node_t *variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
17116
17117 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
17118 }
17119 case PM_TOKEN_PARENTHESIS_LEFT: {
17120 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17121 parser->pattern_matching_newlines = false;
17122
17123 pm_token_t lparen = parser->current;
17124 parser_lex(parser);
17125
17126 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_PARSE_ACCEPTS_DO_BLOCK | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
17127 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17128
17129 accept1(parser, PM_TOKEN_NEWLINE);
17130 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &lparen);
17131 return UP(pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous));
17132 }
17133 default: {
17134 // If we get here, then we have a pin operator followed by something
17135 // not understood. We'll create a missing node and return that.
17136 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
17137 pm_node_t *variable = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &operator), PM_TOKEN_LENGTH(&operator)));
17138 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
17139 }
17140 }
17141 }
17142 case PM_TOKEN_UCOLON_COLON: {
17143 pm_token_t delimiter = parser->current;
17144 parser_lex(parser);
17145
17146 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17147 pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
17148
17149 return parse_pattern_constant_path(parser, captures, UP(node), (uint16_t) (depth + 1));
17150 }
17151 case PM_TOKEN_CONSTANT: {
17152 pm_token_t constant = parser->current;
17153 parser_lex(parser);
17154
17155 pm_node_t *node = UP(pm_constant_read_node_create(parser, &constant));
17156 return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
17157 }
17158 default:
17159 pm_parser_err_current(parser, diag_id);
17160 return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
17161 }
17162}
17163
17164static bool
17165parse_pattern_alternation_error_each(const pm_node_t *node, void *data) {
17166 switch (PM_NODE_TYPE(node)) {
17167 case PM_LOCAL_VARIABLE_TARGET_NODE: {
17168 pm_parser_t *parser = (pm_parser_t *) data;
17169 pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE);
17170 return false;
17171 }
17172 default:
17173 return true;
17174 }
17175}
17176
17181static void
17182parse_pattern_alternation_error(pm_parser_t *parser, const pm_node_t *node) {
17183 pm_visit_node(node, parse_pattern_alternation_error_each, parser);
17184}
17185
17190static pm_node_t *
17191parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
17192 pm_node_t *node = first_node;
17193 bool alternation = false;
17194
17195 while ((node == NULL) || (alternation = accept1(parser, PM_TOKEN_PIPE))) {
17196 if (alternation && !PM_NODE_TYPE_P(node, PM_ALTERNATION_PATTERN_NODE) && captures->size) {
17197 parse_pattern_alternation_error(parser, node);
17198 }
17199
17200 switch (parser->current.type) {
17201 case PM_TOKEN_IDENTIFIER:
17202 case PM_TOKEN_BRACKET_LEFT_ARRAY:
17203 case PM_TOKEN_BRACE_LEFT:
17204 case PM_TOKEN_CARET:
17205 case PM_TOKEN_CONSTANT:
17206 case PM_TOKEN_UCOLON_COLON:
17207 case PM_TOKEN_UDOT_DOT:
17208 case PM_TOKEN_UDOT_DOT_DOT:
17209 case PM_CASE_PRIMITIVE: {
17210 if (!alternation) {
17211 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17212 } else {
17213 pm_token_t operator = parser->previous;
17214 pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
17215
17216 if (captures->size) parse_pattern_alternation_error(parser, right);
17217 node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
17218 }
17219
17220 break;
17221 }
17222 case PM_TOKEN_PARENTHESIS_LEFT:
17223 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
17224 pm_token_t operator = parser->previous;
17225 pm_token_t opening = parser->current;
17226 parser_lex(parser);
17227
17228 pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
17229 accept1(parser, PM_TOKEN_NEWLINE);
17230 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
17231 pm_node_t *right = UP(pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0));
17232
17233 if (!alternation) {
17234 node = right;
17235 } else {
17236 if (captures->size) parse_pattern_alternation_error(parser, right);
17237 node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
17238 }
17239
17240 break;
17241 }
17242 default: {
17243 pm_parser_err_current(parser, diag_id);
17244 pm_node_t *right = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
17245
17246 if (!alternation) {
17247 node = right;
17248 } else {
17249 if (captures->size) parse_pattern_alternation_error(parser, right);
17250 node = UP(pm_alternation_pattern_node_create(parser, node, right, &parser->previous));
17251 }
17252
17253 break;
17254 }
17255 }
17256 }
17257
17258 // If we have an =>, then we are assigning this pattern to a variable.
17259 // In this case we should create an assignment node.
17260 while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
17261 pm_token_t operator = parser->previous;
17262 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
17263
17264 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17265 int depth;
17266
17267 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17268 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17269 }
17270
17271 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
17272 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17273 parser,
17274 &TOK2LOC(parser, &parser->previous),
17275 constant_id,
17276 (uint32_t) (depth == -1 ? 0 : depth)
17277 );
17278
17279 node = UP(pm_capture_pattern_node_create(parser, node, target, &operator));
17280 }
17281
17282 return node;
17283}
17284
17288static pm_node_t *
17289parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
17290 pm_node_t *node = NULL;
17291
17292 bool leading_rest = false;
17293 bool trailing_rest = false;
17294
17295 switch (parser->current.type) {
17296 case PM_TOKEN_LABEL: {
17297 parser_lex(parser);
17298 pm_node_t *key = UP(pm_symbol_node_label_create(parser, &parser->previous));
17299 node = UP(parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1)));
17300
17301 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17302 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17303 }
17304
17305 return node;
17306 }
17307 case PM_TOKEN_USTAR_STAR: {
17308 node = parse_pattern_keyword_rest(parser, captures);
17309 node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
17310
17311 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17312 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17313 }
17314
17315 return node;
17316 }
17317 case PM_TOKEN_STRING_BEGIN: {
17318 // We need special handling for string beginnings because they could
17319 // be dynamic symbols leading to hash patterns.
17320 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17321
17322 if (pm_symbol_node_label_p(parser, node)) {
17323 node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
17324
17325 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17326 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17327 }
17328
17329 return node;
17330 }
17331
17332 node = parse_pattern_primitives(parser, captures, node, diag_id, (uint16_t) (depth + 1));
17333 break;
17334 }
17335 case PM_TOKEN_USTAR: {
17336 if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
17337 parser_lex(parser);
17338 node = UP(parse_pattern_rest(parser, captures));
17339 leading_rest = true;
17340 break;
17341 }
17342 }
17344 default:
17345 node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
17346 break;
17347 }
17348
17349 // If we got a dynamic label symbol, then we need to treat it like the
17350 // beginning of a hash pattern.
17351 if (pm_symbol_node_label_p(parser, node)) {
17352 return UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
17353 }
17354
17355 if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
17356 // If we have a comma, then we are now parsing either an array pattern
17357 // or a find pattern. We need to parse all of the patterns, put them
17358 // into a big list, and then determine which type of node we have.
17359 pm_node_list_t nodes = { 0 };
17360 pm_node_list_append(parser->arena, &nodes, node);
17361
17362 // Gather up all of the patterns into the list.
17363 while (accept1(parser, PM_TOKEN_COMMA)) {
17364 // Break early here in case we have a trailing comma.
17365 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
17366 node = UP(pm_implicit_rest_node_create(parser, &parser->previous));
17367 pm_node_list_append(parser->arena, &nodes, node);
17368 trailing_rest = true;
17369 break;
17370 }
17371
17372 if (accept1(parser, PM_TOKEN_USTAR)) {
17373 node = UP(parse_pattern_rest(parser, captures));
17374
17375 // If we have already parsed a splat pattern, then this is an
17376 // error. We will continue to parse the rest of the patterns,
17377 // but we will indicate it as an error.
17378 if (trailing_rest) {
17379 pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
17380 }
17381
17382 trailing_rest = true;
17383 } else {
17384 node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
17385 }
17386
17387 pm_node_list_append(parser->arena, &nodes, node);
17388 }
17389
17390 // If the first pattern and the last pattern are rest patterns, then we
17391 // will call this a find pattern, regardless of how many rest patterns
17392 // are in between because we know we already added the appropriate
17393 // errors. Otherwise we will create an array pattern.
17394 if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
17395 node = UP(pm_find_pattern_node_create(parser, &nodes));
17396
17397 if (nodes.size == 2) {
17398 pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
17399 }
17400 } else {
17401 node = UP(pm_array_pattern_node_node_list_create(parser, &nodes));
17402
17403 if (leading_rest && trailing_rest) {
17404 pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
17405 }
17406 }
17407
17408 // nodes.nodes is arena-allocated; no explicit free needed.
17409 } else if (leading_rest) {
17410 // Otherwise, if we parsed a single splat pattern, then we know we have
17411 // an array pattern, so we can go ahead and create that node.
17412 node = UP(pm_array_pattern_node_rest_create(parser, node));
17413 }
17414
17415 return node;
17416}
17417
17423static PRISM_INLINE void
17424parse_negative_numeric(pm_node_t *node) {
17425 switch (PM_NODE_TYPE(node)) {
17426 case PM_INTEGER_NODE: {
17427 pm_integer_node_t *cast = (pm_integer_node_t *) node;
17428 cast->base.location.start--;
17429 cast->base.location.length++;
17430 cast->value.negative = true;
17431 break;
17432 }
17433 case PM_FLOAT_NODE: {
17434 pm_float_node_t *cast = (pm_float_node_t *) node;
17435 cast->base.location.start--;
17436 cast->base.location.length++;
17437 cast->value = -cast->value;
17438 break;
17439 }
17440 case PM_RATIONAL_NODE: {
17441 pm_rational_node_t *cast = (pm_rational_node_t *) node;
17442 cast->base.location.start--;
17443 cast->base.location.length++;
17444 cast->numerator.negative = true;
17445 break;
17446 }
17447 case PM_IMAGINARY_NODE:
17448 node->location.start--;
17449 node->location.length++;
17450 parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
17451 break;
17452 default:
17453 assert(false && "unreachable");
17454 break;
17455 }
17456}
17457
17463static void
17464pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
17465 switch (diag_id) {
17466 case PM_ERR_HASH_KEY: {
17467 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, pm_token_str(parser->previous.type));
17468 break;
17469 }
17470 case PM_ERR_HASH_VALUE:
17471 case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17472 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_str(parser->current.type));
17473 break;
17474 }
17475 case PM_ERR_UNARY_RECEIVER: {
17476 const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_str(parser->current.type));
17477 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, human, parser->previous.start[0]);
17478 break;
17479 }
17480 case PM_ERR_UNARY_DISALLOWED:
17481 case PM_ERR_EXPECT_ARGUMENT: {
17482 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_str(parser->current.type));
17483 break;
17484 }
17485 default:
17486 pm_parser_err_previous(parser, diag_id);
17487 break;
17488 }
17489}
17490
17494static void
17495parse_retry(pm_parser_t *parser, const pm_node_t *node) {
17496#define CONTEXT_NONE 0
17497#define CONTEXT_THROUGH_ENSURE 1
17498#define CONTEXT_THROUGH_ELSE 2
17499
17500 pm_context_node_t *context_node = parser->current_context;
17501 int context = CONTEXT_NONE;
17502
17503 while (context_node != NULL) {
17504 switch (context_node->context) {
17505 case PM_CONTEXT_BEGIN_RESCUE:
17506 case PM_CONTEXT_BLOCK_RESCUE:
17507 case PM_CONTEXT_CLASS_RESCUE:
17508 case PM_CONTEXT_DEF_RESCUE:
17509 case PM_CONTEXT_LAMBDA_RESCUE:
17510 case PM_CONTEXT_MODULE_RESCUE:
17511 case PM_CONTEXT_SCLASS_RESCUE:
17512 case PM_CONTEXT_DEFINED:
17513 case PM_CONTEXT_RESCUE_MODIFIER:
17514 // These are the good cases. We're allowed to have a retry here.
17515 return;
17516 case PM_CONTEXT_CLASS:
17517 case PM_CONTEXT_DEF:
17518 case PM_CONTEXT_DEF_PARAMS:
17519 case PM_CONTEXT_MAIN:
17520 case PM_CONTEXT_MODULE:
17521 case PM_CONTEXT_PREEXE:
17522 case PM_CONTEXT_SCLASS:
17523 // These are the bad cases. We're not allowed to have a retry in
17524 // these contexts.
17525 if (context == CONTEXT_NONE) {
17526 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE);
17527 } else if (context == CONTEXT_THROUGH_ENSURE) {
17528 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ENSURE);
17529 } else if (context == CONTEXT_THROUGH_ELSE) {
17530 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ELSE);
17531 }
17532 return;
17533 case PM_CONTEXT_BEGIN_ELSE:
17534 case PM_CONTEXT_BLOCK_ELSE:
17535 case PM_CONTEXT_CLASS_ELSE:
17536 case PM_CONTEXT_DEF_ELSE:
17537 case PM_CONTEXT_LAMBDA_ELSE:
17538 case PM_CONTEXT_MODULE_ELSE:
17539 case PM_CONTEXT_SCLASS_ELSE:
17540 // These are also bad cases, but with a more specific error
17541 // message indicating the else.
17542 context = CONTEXT_THROUGH_ELSE;
17543 break;
17544 case PM_CONTEXT_BEGIN_ENSURE:
17545 case PM_CONTEXT_BLOCK_ENSURE:
17546 case PM_CONTEXT_CLASS_ENSURE:
17547 case PM_CONTEXT_DEF_ENSURE:
17548 case PM_CONTEXT_LAMBDA_ENSURE:
17549 case PM_CONTEXT_MODULE_ENSURE:
17550 case PM_CONTEXT_SCLASS_ENSURE:
17551 // These are also bad cases, but with a more specific error
17552 // message indicating the ensure.
17553 context = CONTEXT_THROUGH_ENSURE;
17554 break;
17555 case PM_CONTEXT_NONE:
17556 // This case should never happen.
17557 assert(false && "unreachable");
17558 break;
17559 case PM_CONTEXT_BEGIN:
17560 case PM_CONTEXT_BLOCK_BRACES:
17561 case PM_CONTEXT_BLOCK_KEYWORDS:
17562 case PM_CONTEXT_BLOCK_PARAMETERS:
17563 case PM_CONTEXT_CASE_IN:
17564 case PM_CONTEXT_CASE_WHEN:
17565 case PM_CONTEXT_DEFAULT_PARAMS:
17566 case PM_CONTEXT_ELSE:
17567 case PM_CONTEXT_ELSIF:
17568 case PM_CONTEXT_EMBEXPR:
17569 case PM_CONTEXT_FOR_INDEX:
17570 case PM_CONTEXT_FOR:
17571 case PM_CONTEXT_IF:
17572 case PM_CONTEXT_LAMBDA_BRACES:
17573 case PM_CONTEXT_LAMBDA_DO_END:
17574 case PM_CONTEXT_LOOP_PREDICATE:
17575 case PM_CONTEXT_MULTI_TARGET:
17576 case PM_CONTEXT_PARENS:
17577 case PM_CONTEXT_POSTEXE:
17578 case PM_CONTEXT_PREDICATE:
17579 case PM_CONTEXT_TERNARY:
17580 case PM_CONTEXT_UNLESS:
17581 case PM_CONTEXT_UNTIL:
17582 case PM_CONTEXT_WHILE:
17583 // In these contexts we should continue walking up the list of
17584 // contexts.
17585 break;
17586 }
17587
17588 context_node = context_node->prev;
17589 }
17590
17591#undef CONTEXT_NONE
17592#undef CONTEXT_ENSURE
17593#undef CONTEXT_ELSE
17594}
17595
17599static void
17600parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17601 pm_context_node_t *context_node = parser->current_context;
17602
17603 while (context_node != NULL) {
17604 switch (context_node->context) {
17605 case PM_CONTEXT_DEF:
17606 case PM_CONTEXT_DEF_PARAMS:
17607 case PM_CONTEXT_DEFINED:
17608 case PM_CONTEXT_DEF_ENSURE:
17609 case PM_CONTEXT_DEF_RESCUE:
17610 case PM_CONTEXT_DEF_ELSE:
17611 // These are the good cases. We're allowed to have a block exit
17612 // in these contexts.
17613 return;
17614 case PM_CONTEXT_CLASS:
17615 case PM_CONTEXT_CLASS_ENSURE:
17616 case PM_CONTEXT_CLASS_RESCUE:
17617 case PM_CONTEXT_CLASS_ELSE:
17618 case PM_CONTEXT_MAIN:
17619 case PM_CONTEXT_MODULE:
17620 case PM_CONTEXT_MODULE_ENSURE:
17621 case PM_CONTEXT_MODULE_RESCUE:
17622 case PM_CONTEXT_MODULE_ELSE:
17623 case PM_CONTEXT_SCLASS:
17624 case PM_CONTEXT_SCLASS_RESCUE:
17625 case PM_CONTEXT_SCLASS_ENSURE:
17626 case PM_CONTEXT_SCLASS_ELSE:
17627 // These are the bad cases. We're not allowed to have a retry in
17628 // these contexts.
17629 pm_parser_err_node(parser, node, PM_ERR_INVALID_YIELD);
17630 return;
17631 case PM_CONTEXT_NONE:
17632 // This case should never happen.
17633 assert(false && "unreachable");
17634 break;
17635 case PM_CONTEXT_BEGIN:
17636 case PM_CONTEXT_BEGIN_ELSE:
17637 case PM_CONTEXT_BEGIN_ENSURE:
17638 case PM_CONTEXT_BEGIN_RESCUE:
17639 case PM_CONTEXT_BLOCK_BRACES:
17640 case PM_CONTEXT_BLOCK_KEYWORDS:
17641 case PM_CONTEXT_BLOCK_ELSE:
17642 case PM_CONTEXT_BLOCK_ENSURE:
17643 case PM_CONTEXT_BLOCK_PARAMETERS:
17644 case PM_CONTEXT_BLOCK_RESCUE:
17645 case PM_CONTEXT_CASE_IN:
17646 case PM_CONTEXT_CASE_WHEN:
17647 case PM_CONTEXT_DEFAULT_PARAMS:
17648 case PM_CONTEXT_ELSE:
17649 case PM_CONTEXT_ELSIF:
17650 case PM_CONTEXT_EMBEXPR:
17651 case PM_CONTEXT_FOR_INDEX:
17652 case PM_CONTEXT_FOR:
17653 case PM_CONTEXT_IF:
17654 case PM_CONTEXT_LAMBDA_BRACES:
17655 case PM_CONTEXT_LAMBDA_DO_END:
17656 case PM_CONTEXT_LAMBDA_ELSE:
17657 case PM_CONTEXT_LAMBDA_ENSURE:
17658 case PM_CONTEXT_LAMBDA_RESCUE:
17659 case PM_CONTEXT_LOOP_PREDICATE:
17660 case PM_CONTEXT_MULTI_TARGET:
17661 case PM_CONTEXT_PARENS:
17662 case PM_CONTEXT_POSTEXE:
17663 case PM_CONTEXT_PREDICATE:
17664 case PM_CONTEXT_PREEXE:
17665 case PM_CONTEXT_RESCUE_MODIFIER:
17666 case PM_CONTEXT_TERNARY:
17667 case PM_CONTEXT_UNLESS:
17668 case PM_CONTEXT_UNTIL:
17669 case PM_CONTEXT_WHILE:
17670 // In these contexts we should continue walking up the list of
17671 // contexts.
17672 break;
17673 }
17674
17675 context_node = context_node->prev;
17676 }
17677}
17678
17683static PRISM_INLINE bool
17684pm_call_node_command_p(const pm_call_node_t *node) {
17685 return (
17686 (node->opening_loc.length == 0) &&
17687 (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
17688 (node->arguments != NULL || node->block != NULL)
17689 );
17690}
17691
17697static bool
17698pm_command_call_value_p(const pm_node_t *node) {
17699 switch (PM_NODE_TYPE(node)) {
17700 case PM_CALL_NODE: {
17701 const pm_call_node_t *call = (const pm_call_node_t *) node;
17702
17703 // Command-style calls (e.g., foo bar, obj.foo bar).
17704 // Attribute writes (e.g., a.b = 1) are not commands.
17705 if (pm_call_node_command_p(call) && !PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE) && (call->receiver == NULL || call->call_operator_loc.length > 0)) {
17706 return true;
17707 }
17708
17709 // A `!` or `not` prefix wrapping a command call (e.g.,
17710 // `!foo bar`, `not foo bar`) is also a command-call value.
17711 if (call->receiver != NULL && call->arguments == NULL && call->opening_loc.length == 0 && call->call_operator_loc.length == 0) {
17712 return pm_command_call_value_p(call->receiver);
17713 }
17714
17715 return false;
17716 }
17717 case PM_SUPER_NODE: {
17718 const pm_super_node_t *cast = (const pm_super_node_t *) node;
17719 return cast->lparen_loc.length == 0 && (cast->arguments != NULL || cast->block != NULL);
17720 }
17721 case PM_YIELD_NODE: {
17722 const pm_yield_node_t *cast = (const pm_yield_node_t *) node;
17723 return cast->lparen_loc.length == 0 && cast->arguments != NULL;
17724 }
17725 case PM_RESCUE_MODIFIER_NODE:
17726 return pm_command_call_value_p(((const pm_rescue_modifier_node_t *) node)->expression);
17727 case PM_DEF_NODE: {
17728 const pm_def_node_t *cast = (const pm_def_node_t *) node;
17729 if (cast->equal_loc.length > 0 && cast->body != NULL) {
17730 const pm_node_t *body = cast->body;
17731 if (PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE)) {
17732 body = ((const pm_statements_node_t *) body)->body.nodes[((const pm_statements_node_t *) body)->body.size - 1];
17733 }
17734 return pm_command_call_value_p(body);
17735 }
17736 return false;
17737 }
17738 default:
17739 return false;
17740 }
17741}
17742
17749static bool
17750pm_block_call_p(const pm_node_t *node) {
17751 while (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
17752 const pm_call_node_t *call = (const pm_call_node_t *) node;
17753 if (call->opening_loc.length > 0) return false;
17754
17755 // Root: command with do-block (e.g., `foo bar do end`).
17756 if (call->arguments != NULL && call->block != NULL && PM_NODE_TYPE_P(call->block, PM_BLOCK_NODE)) {
17757 return true;
17758 }
17759
17760 // Walk up the receiver chain (e.g., `foo bar do end.baz`).
17761 if (call->call_operator_loc.length > 0 && call->receiver != NULL) {
17762 node = call->receiver;
17763 continue;
17764 }
17765
17766 return false;
17767 }
17768
17769 return false;
17770}
17771
17776static pm_node_t *
17777parse_case(pm_parser_t *parser, uint8_t flags, uint16_t depth) {
17778 size_t opening_newline_index = token_newline_index(parser);
17779 parser_lex(parser);
17780
17781 pm_token_t case_keyword = parser->previous;
17782 pm_node_t *predicate = NULL;
17783
17784 pm_node_list_t current_block_exits = { 0 };
17785 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
17786
17787 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
17788 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
17789 predicate = NULL;
17790 } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
17791 predicate = NULL;
17792 } else if (!token_begins_expression_p(parser->current.type)) {
17793 predicate = NULL;
17794 } else {
17795 predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
17796 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
17797 }
17798
17799 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
17800 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
17801 parser_lex(parser);
17802 pop_block_exits(parser, previous_block_exits);
17803 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
17804 return UP(pm_case_node_create(parser, &case_keyword, predicate, &parser->previous));
17805 }
17806
17807 /* At this point we can create a case node, though we don't yet know if it
17808 * is a case-in or case-when node. */
17809 pm_node_t *node;
17810
17811 if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
17812 pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, NULL);
17813 pm_static_literals_t literals = { 0 };
17814
17815 /* At this point we've seen a when keyword, so we know this is a
17816 * case-when node. We will continue to parse the when nodes until we hit
17817 * the end of the list. */
17818 while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
17819 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
17820 parser_lex(parser);
17821
17822 pm_token_t when_keyword = parser->previous;
17823 pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
17824
17825 do {
17826 if (accept1(parser, PM_TOKEN_USTAR)) {
17827 pm_token_t operator = parser->previous;
17828 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
17829
17830 pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
17831 pm_when_node_conditions_append(parser->arena, when_node, UP(splat_node));
17832
17833 if (PM_NODE_TYPE_P(expression, PM_ERROR_RECOVERY_NODE)) break;
17834 } else {
17835 pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
17836 pm_when_node_conditions_append(parser->arena, when_node, condition);
17837
17838 /* If we found a missing node, then this is a syntax error
17839 * and we should stop looping. */
17840 if (PM_NODE_TYPE_P(condition, PM_ERROR_RECOVERY_NODE)) break;
17841
17842 /* If this is a string node, then we need to mark it as
17843 * frozen because when clause strings are frozen. */
17844 if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
17845 pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
17846 } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
17847 pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
17848 }
17849
17850 pm_when_clause_static_literals_add(parser, &literals, condition);
17851 }
17852 } while (accept1(parser, PM_TOKEN_COMMA));
17853
17854 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
17855 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
17856 pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous);
17857 }
17858 } else {
17859 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
17860 pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous);
17861 }
17862
17863 if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
17864 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
17865 if (statements != NULL) {
17866 pm_when_node_statements_set(when_node, statements);
17867 }
17868 }
17869
17870 pm_case_node_condition_append(parser->arena, case_node, UP(when_node));
17871 }
17872
17873 /* If we didn't parse any conditions (in or when) then we need to
17874 * indicate that we have an error. */
17875 if (case_node->conditions.size == 0) {
17876 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
17877 }
17878
17879 pm_static_literals_free(&literals);
17880 node = UP(case_node);
17881 } else {
17882 pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate);
17883
17884 /* If this is a case-match node (i.e., it is a pattern matching case
17885 * statement) then we must have a predicate. */
17886 if (predicate == NULL) {
17887 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
17888 }
17889
17890 /* At this point we expect that we're parsing a case-in node. We will
17891 * continue to parse the in nodes until we hit the end of the list. */
17892 while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
17893 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
17894
17895 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17896 parser->pattern_matching_newlines = true;
17897
17898 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
17899 parser->command_start = false;
17900 parser_lex(parser);
17901
17902 pm_token_t in_keyword = parser->previous;
17903
17904 pm_constant_id_list_t captures = { 0 };
17905 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
17906
17907 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17908
17909 /* Since we're in the top-level of the case-in node we need to
17910 * check for guard clauses in the form of `if` or `unless`
17911 * statements. */
17912 if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
17913 pm_token_t keyword = parser->previous;
17914 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
17915 pattern = UP(pm_if_node_modifier_create(parser, pattern, &keyword, predicate));
17916 } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
17917 pm_token_t keyword = parser->previous;
17918 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
17919 pattern = UP(pm_unless_node_modifier_create(parser, pattern, &keyword, predicate));
17920 }
17921
17922 /* Now we need to check for the terminator of the in node's pattern.
17923 * It can be a newline or semicolon optionally followed by a `then`
17924 * keyword. */
17925 pm_token_t then_keyword = { 0 };
17926 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
17927 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
17928 then_keyword = parser->previous;
17929 }
17930 } else {
17931 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
17932 then_keyword = parser->previous;
17933 }
17934
17935 /* Now we can actually parse the statements associated with the in
17936 * node. */
17937 pm_statements_node_t *statements;
17938 if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
17939 statements = NULL;
17940 } else {
17941 statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
17942 }
17943
17944 /* Now that we have the full pattern and statements, we can create
17945 * the node and attach it to the case node. */
17946 pm_node_t *condition = UP(pm_in_node_create(parser, pattern, statements, &in_keyword, NTOK2PTR(then_keyword)));
17947 pm_case_match_node_condition_append(parser->arena, case_node, condition);
17948 }
17949
17950 /* If we didn't parse any conditions (in or when) then we need to
17951 * indicate that we have an error. */
17952 if (case_node->conditions.size == 0) {
17953 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
17954 }
17955
17956 node = UP(case_node);
17957 }
17958
17959 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
17960 if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
17961 pm_token_t else_keyword = parser->previous;
17962 pm_else_node_t *else_node;
17963
17964 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
17965 else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
17966 } else {
17967 else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
17968 }
17969
17970 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
17971 pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
17972 } else {
17973 pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
17974 }
17975 }
17976
17977 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
17978 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM, &case_keyword);
17979
17980 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
17981 pm_case_node_end_keyword_loc_set(parser, (pm_case_node_t *) node, &parser->previous);
17982 } else {
17983 pm_case_match_node_end_keyword_loc_set(parser, (pm_case_match_node_t *) node, &parser->previous);
17984 }
17985
17986 pop_block_exits(parser, previous_block_exits);
17987 return node;
17988}
17989
17994static pm_node_t *
17995parse_class(pm_parser_t *parser, uint8_t flags, uint16_t depth) {
17996 size_t opening_newline_index = token_newline_index(parser);
17997 parser_lex(parser);
17998
17999 pm_token_t class_keyword = parser->previous;
18000 pm_do_loop_stack_push(parser, false);
18001
18002 pm_node_list_t current_block_exits = { 0 };
18003 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18004
18005 if (accept1(parser, PM_TOKEN_LESS_LESS)) {
18006 pm_token_t operator = parser->previous;
18007 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
18008
18009 pm_parser_scope_push(parser, true);
18010 if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18011 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_str(parser->current.type));
18012 }
18013
18014 pm_node_t *statements = NULL;
18015 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18016 pm_accepts_block_stack_push(parser, true);
18017 statements = UP(parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1)));
18018 pm_accepts_block_stack_pop(parser);
18019 }
18020
18021 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
18022 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18023 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1)));
18024 } else {
18025 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
18026 }
18027
18028 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
18029
18030 pm_constant_id_list_t locals;
18031 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18032
18033 pm_parser_scope_pop(parser);
18034 pm_do_loop_stack_pop(parser);
18035
18036 flush_block_exits(parser, previous_block_exits);
18037 return UP(pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous));
18038 }
18039
18040 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
18041 pm_token_t name = parser->previous;
18042 if (name.type != PM_TOKEN_CONSTANT) {
18043 pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
18044 }
18045
18046 pm_token_t inheritance_operator = { 0 };
18047 pm_node_t *superclass;
18048
18049 if (match1(parser, PM_TOKEN_LESS)) {
18050 inheritance_operator = parser->current;
18051 lex_state_set(parser, PM_LEX_STATE_BEG);
18052
18053 parser->command_start = true;
18054 parser_lex(parser);
18055
18056 superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
18057 } else {
18058 superclass = NULL;
18059 }
18060
18061 pm_parser_scope_push(parser, true);
18062
18063 if (inheritance_operator.start != NULL) {
18064 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
18065 } else {
18066 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18067 }
18068 pm_node_t *statements = NULL;
18069
18070 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18071 pm_accepts_block_stack_push(parser, true);
18072 statements = UP(parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1)));
18073 pm_accepts_block_stack_pop(parser);
18074 }
18075
18076 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
18077 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18078 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1)));
18079 } else {
18080 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
18081 }
18082
18083 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
18084
18085 if (context_def_p(parser)) {
18086 pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
18087 }
18088
18089 pm_constant_id_list_t locals;
18090 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18091
18092 pm_parser_scope_pop(parser);
18093 pm_do_loop_stack_pop(parser);
18094
18095 if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
18096 pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
18097 if (!PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) {
18098 constant_path = UP(pm_error_recovery_node_create_unexpected(parser, constant_path));
18099 }
18100 }
18101
18102 pop_block_exits(parser, previous_block_exits);
18103 return UP(pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, NTOK2PTR(inheritance_operator), superclass, statements, &parser->previous));
18104}
18105
18109static pm_node_t *
18110parse_def(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, uint16_t depth) {
18111 pm_node_list_t current_block_exits = { 0 };
18112 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18113
18114 pm_token_t def_keyword = parser->current;
18115 size_t opening_newline_index = token_newline_index(parser);
18116
18117 pm_node_t *receiver = NULL;
18118 pm_token_t operator = { 0 };
18119 pm_token_t name;
18120
18121 /* This context is necessary for lexing `...` in a bare params correctly. It
18122 * must be pushed before lexing the first param, so it is here. */
18123 context_push(parser, PM_CONTEXT_DEF_PARAMS);
18124 parser_lex(parser);
18125
18126 /* This will be false if the method name is not a valid identifier but could
18127 * be followed by an operator. */
18128 bool valid_name = true;
18129
18130 switch (parser->current.type) {
18131 case PM_CASE_OPERATOR:
18132 pm_parser_scope_push(parser, true);
18133 lex_state_set(parser, PM_LEX_STATE_ENDFN);
18134 parser_lex(parser);
18135
18136 name = parser->previous;
18137 break;
18138 case PM_TOKEN_IDENTIFIER: {
18139 parser_lex(parser);
18140
18141 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
18142 receiver = parse_variable_call(parser);
18143
18144 pm_parser_scope_push(parser, true);
18145 lex_state_set(parser, PM_LEX_STATE_FNAME);
18146 parser_lex(parser);
18147
18148 operator = parser->previous;
18149 name = parse_method_definition_name(parser);
18150 } else {
18151 pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous));
18152 pm_parser_scope_push(parser, true);
18153
18154 name = parser->previous;
18155 }
18156
18157 break;
18158 }
18159 case PM_TOKEN_INSTANCE_VARIABLE:
18160 case PM_TOKEN_CLASS_VARIABLE:
18161 case PM_TOKEN_GLOBAL_VARIABLE:
18162 valid_name = false;
18164 case PM_TOKEN_CONSTANT:
18165 case PM_TOKEN_KEYWORD_NIL:
18166 case PM_TOKEN_KEYWORD_SELF:
18167 case PM_TOKEN_KEYWORD_TRUE:
18168 case PM_TOKEN_KEYWORD_FALSE:
18169 case PM_TOKEN_KEYWORD___FILE__:
18170 case PM_TOKEN_KEYWORD___LINE__:
18171 case PM_TOKEN_KEYWORD___ENCODING__: {
18172 pm_parser_scope_push(parser, true);
18173 parser_lex(parser);
18174
18175 pm_token_t identifier = parser->previous;
18176
18177 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
18178 lex_state_set(parser, PM_LEX_STATE_FNAME);
18179 parser_lex(parser);
18180 operator = parser->previous;
18181
18182 switch (identifier.type) {
18183 case PM_TOKEN_CONSTANT:
18184 receiver = UP(pm_constant_read_node_create(parser, &identifier));
18185 break;
18186 case PM_TOKEN_INSTANCE_VARIABLE:
18187 receiver = UP(pm_instance_variable_read_node_create(parser, &identifier));
18188 break;
18189 case PM_TOKEN_CLASS_VARIABLE:
18190 receiver = UP(pm_class_variable_read_node_create(parser, &identifier));
18191 break;
18192 case PM_TOKEN_GLOBAL_VARIABLE:
18193 receiver = UP(pm_global_variable_read_node_create(parser, &identifier));
18194 break;
18195 case PM_TOKEN_KEYWORD_NIL:
18196 receiver = UP(pm_nil_node_create(parser, &identifier));
18197 break;
18198 case PM_TOKEN_KEYWORD_SELF:
18199 receiver = UP(pm_self_node_create(parser, &identifier));
18200 break;
18201 case PM_TOKEN_KEYWORD_TRUE:
18202 receiver = UP(pm_true_node_create(parser, &identifier));
18203 break;
18204 case PM_TOKEN_KEYWORD_FALSE:
18205 receiver = UP(pm_false_node_create(parser, &identifier));
18206 break;
18207 case PM_TOKEN_KEYWORD___FILE__:
18208 receiver = UP(pm_source_file_node_create(parser, &identifier));
18209 break;
18210 case PM_TOKEN_KEYWORD___LINE__:
18211 receiver = UP(pm_source_line_node_create(parser, &identifier));
18212 break;
18213 case PM_TOKEN_KEYWORD___ENCODING__:
18214 receiver = UP(pm_source_encoding_node_create(parser, &identifier));
18215 break;
18216 default:
18217 break;
18218 }
18219
18220 name = parse_method_definition_name(parser);
18221 } else {
18222 if (!valid_name) {
18223 PM_PARSER_ERR_TOKEN_FORMAT(parser, &identifier, PM_ERR_DEF_NAME, pm_token_str(identifier.type));
18224 }
18225
18226 name = identifier;
18227 }
18228 break;
18229 }
18230 case PM_TOKEN_PARENTHESIS_LEFT: {
18231 /* The current context is `PM_CONTEXT_DEF_PARAMS`, however the inner
18232 * expression of this parenthesis should not be processed under this
18233 * context. Thus, the context is popped here. */
18234 context_pop(parser);
18235 parser_lex(parser);
18236
18237 pm_token_t lparen = parser->previous;
18238 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
18239
18240 accept1(parser, PM_TOKEN_NEWLINE);
18241 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18242 pm_token_t rparen = parser->previous;
18243
18244 lex_state_set(parser, PM_LEX_STATE_FNAME);
18245 expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
18246
18247 operator = parser->previous;
18248 receiver = UP(pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0));
18249
18250 /* To push `PM_CONTEXT_DEF_PARAMS` again is for the same reason as
18251 * described the above. */
18252 pm_parser_scope_push(parser, true);
18253 context_push(parser, PM_CONTEXT_DEF_PARAMS);
18254 name = parse_method_definition_name(parser);
18255 break;
18256 }
18257 default:
18258 pm_parser_scope_push(parser, true);
18259 name = parse_method_definition_name(parser);
18260 break;
18261 }
18262
18263 pm_token_t lparen = { 0 };
18264 pm_token_t rparen = { 0 };
18265 pm_parameters_node_t *params;
18266
18267 bool accept_endless_def = true;
18268 switch (parser->current.type) {
18269 case PM_TOKEN_PARENTHESIS_LEFT: {
18270 parser_lex(parser);
18271 lparen = parser->previous;
18272
18273 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18274 params = NULL;
18275 } else {
18276 /* https://bugs.ruby-lang.org/issues/19107 */
18277 bool allow_trailing_comma = parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1;
18278 params = parse_parameters(
18279 parser,
18280 PM_BINDING_POWER_DEFINED,
18281 true,
18282 allow_trailing_comma,
18283 true,
18284 true,
18285 false,
18286 PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
18287 (uint16_t) (depth + 1)
18288 );
18289 }
18290
18291 lex_state_set(parser, PM_LEX_STATE_BEG);
18292 parser->command_start = true;
18293
18294 context_pop(parser);
18295 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18296 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_str(parser->current.type));
18297 parser->previous.start = parser->previous.end;
18298 parser->previous.type = 0;
18299 }
18300
18301 rparen = parser->previous;
18302 break;
18303 }
18304 case PM_CASE_PARAMETER: {
18305 /* If we're about to lex a label, we need to add the label state to
18306 * make sure the next newline is ignored. */
18307 if (parser->current.type == PM_TOKEN_LABEL) {
18308 lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
18309 }
18310
18311 params = parse_parameters(
18312 parser,
18313 PM_BINDING_POWER_DEFINED,
18314 false,
18315 false,
18316 true,
18317 true,
18318 false,
18319 PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
18320 (uint16_t) (depth + 1)
18321 );
18322
18323 /* Reject `def * = 1` and similar. We have to specifically check for
18324 * them because they create ambiguity with optional arguments. */
18325 accept_endless_def = false;
18326
18327 context_pop(parser);
18328 break;
18329 }
18330 default: {
18331 params = NULL;
18332 context_pop(parser);
18333 break;
18334 }
18335 }
18336
18337 pm_node_t *statements = NULL;
18338 pm_token_t equal = { 0 };
18339 pm_token_t end_keyword = { 0 };
18340
18341 if (accept1(parser, PM_TOKEN_EQUAL)) {
18342 if (token_is_setter_name(&name)) {
18343 pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
18344 }
18345 if (!accept_endless_def) {
18346 pm_parser_err_previous(parser, PM_ERR_DEF_ENDLESS_PARAMETERS);
18347 }
18348 if (
18349 parser->current_context->context == PM_CONTEXT_DEFAULT_PARAMS &&
18350 parser->current_context->prev->context == PM_CONTEXT_BLOCK_PARAMETERS
18351 ) {
18352 PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &def_keyword), PM_TOKENS_LENGTH(&def_keyword, &parser->previous), PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE, "endless method definition");
18353 }
18354 equal = parser->previous;
18355
18356 context_push(parser, PM_CONTEXT_DEF);
18357 pm_do_loop_stack_push(parser, false);
18358 statements = UP(pm_statements_node_create(parser));
18359
18360 uint8_t allow_flags;
18361 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
18362 allow_flags = flags & PM_PARSE_ACCEPTS_COMMAND_CALL;
18363 } else {
18364 /* Allow `def foo = puts "Hello"` but not
18365 * `private def foo = puts "Hello"` */
18366 allow_flags = (binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION) ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0;
18367 }
18368
18369 /* Inside a def body, we push true onto the accepts_block_stack so that
18370 * `do` is lexed as PM_TOKEN_KEYWORD_DO (which can only start a block
18371 * for primary-level constructs, not commands). During command argument
18372 * parsing, the stack is pushed to false, causing `do` to be lexed as
18373 * PM_TOKEN_KEYWORD_DO_BLOCK, which is not consumed inside the endless
18374 * def body and instead left for the outer context. */
18375 pm_accepts_block_stack_push(parser, true);
18376 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_flags | PM_PARSE_IN_ENDLESS_DEF, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
18377 pm_accepts_block_stack_pop(parser);
18378
18379 /* If an unconsumed PM_TOKEN_KEYWORD_DO follows the body, it is an error
18380 * (e.g., `def f = 1 do end`). PM_TOKEN_KEYWORD_DO_BLOCK is
18381 * intentionally not caught here — it should bubble up to the outer
18382 * context (e.g., `private def f = puts "Hello" do end` where the block
18383 * attaches to `private`). */
18384 if (accept1(parser, PM_TOKEN_KEYWORD_DO)) {
18385 pm_block_node_t *block = parse_block(parser, (uint16_t) (depth + 1));
18386 pm_parser_err_node(parser, UP(block), PM_ERR_DEF_ENDLESS_DO_BLOCK);
18387 }
18388
18389 if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
18390 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
18391
18392 pm_token_t rescue_keyword = parser->previous;
18393
18394 /* In the Ruby grammar, the rescue value of an endless method
18395 * command excludes and/or and in/=>. */
18396 pm_node_t *value = parse_expression(parser, PM_BINDING_POWER_MATCH + 1, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
18397 context_pop(parser);
18398
18399 statement = UP(pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value));
18400 }
18401
18402 /* A nested endless def whose body is a command call (e.g.,
18403 * `def f = def g = foo bar`) is a command assignment and cannot appear
18404 * as a def body. */
18405 if (PM_NODE_TYPE_P(statement, PM_DEF_NODE) && pm_command_call_value_p(statement)) {
18406 PM_PARSER_ERR_NODE_FORMAT(parser, statement, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
18407 }
18408
18409 pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
18410 pm_do_loop_stack_pop(parser);
18411 context_pop(parser);
18412 } else {
18413 if (lparen.start == NULL) {
18414 lex_state_set(parser, PM_LEX_STATE_BEG);
18415 parser->command_start = true;
18416 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
18417 } else {
18418 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18419 }
18420
18421 pm_accepts_block_stack_push(parser, true);
18422 pm_do_loop_stack_push(parser, false);
18423
18424 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18425 pm_accepts_block_stack_push(parser, true);
18426 statements = UP(parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1)));
18427 pm_accepts_block_stack_pop(parser);
18428 }
18429
18430 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
18431 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18432 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1)));
18433 } else {
18434 parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
18435 }
18436
18437 pm_accepts_block_stack_pop(parser);
18438 pm_do_loop_stack_pop(parser);
18439
18440 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM, &def_keyword);
18441 end_keyword = parser->previous;
18442 }
18443
18444 pm_constant_id_list_t locals;
18445 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18446 pm_parser_scope_pop(parser);
18447
18448 /* If the final character is `@` as is the case when defining methods to
18449 * override the unary operators, we should ignore the @ in the same way we
18450 * do for symbols. */
18451 pm_constant_id_t name_id = pm_parser_constant_id_raw(parser, name.start, parse_operator_symbol_name(&name));
18452
18453 flush_block_exits(parser, previous_block_exits);
18454
18455 return UP(pm_def_node_create(
18456 parser,
18457 name_id,
18458 &name,
18459 receiver,
18460 params,
18461 statements,
18462 &locals,
18463 &def_keyword,
18464 NTOK2PTR(operator),
18465 NTOK2PTR(lparen),
18466 NTOK2PTR(rparen),
18467 NTOK2PTR(equal),
18468 NTOK2PTR(end_keyword)
18469 ));
18470}
18471
18475static pm_node_t *
18476parse_module(pm_parser_t *parser, uint8_t flags, uint16_t depth) {
18477 pm_node_list_t current_block_exits = { 0 };
18478 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18479
18480 size_t opening_newline_index = token_newline_index(parser);
18481 parser_lex(parser);
18482 pm_token_t module_keyword = parser->previous;
18483
18484 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
18485 pm_token_t name;
18486
18487 /* If we can recover from a syntax error that occurred while parsing the
18488 * name of the module, then we'll handle that here. */
18489 if (PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) {
18490 pop_block_exits(parser, previous_block_exits);
18491
18492 pm_token_t missing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
18493 return UP(pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing));
18494 }
18495
18496 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
18497 pm_token_t double_colon = parser->previous;
18498
18499 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
18500 constant_path = UP(pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous));
18501 }
18502
18503 /* Here we retrieve the name of the module. If it wasn't a constant, then
18504 * it's possible that `module foo` was passed, which is a syntax error. We
18505 * handle that here as well. */
18506 name = parser->previous;
18507 if (name.type != PM_TOKEN_CONSTANT) {
18508 pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
18509 }
18510
18511 if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE) && !PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) {
18512 constant_path = UP(pm_error_recovery_node_create_unexpected(parser, constant_path));
18513 }
18514
18515 pm_parser_scope_push(parser, true);
18516 accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
18517 pm_node_t *statements = NULL;
18518
18519 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18520 pm_accepts_block_stack_push(parser, true);
18521 statements = UP(parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1)));
18522 pm_accepts_block_stack_pop(parser);
18523 }
18524
18525 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
18526 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18527 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1)));
18528 } else {
18529 parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
18530 }
18531
18532 pm_constant_id_list_t locals;
18533 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18534
18535 pm_parser_scope_pop(parser);
18536 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM, &module_keyword);
18537
18538 if (context_def_p(parser)) {
18539 pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
18540 }
18541
18542 pop_block_exits(parser, previous_block_exits);
18543
18544 return UP(pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous));
18545}
18546
18550static pm_node_t *
18551parse_string_array(pm_parser_t *parser, uint16_t depth) {
18552 parser_lex(parser);
18553 pm_token_t opening = parser->previous;
18554 pm_array_node_t *array = pm_array_node_create(parser, &opening);
18555
18556 /* This is the current node that we are parsing that will be added to the
18557 * list of elements. */
18558 pm_node_t *current = NULL;
18559
18560 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
18561 switch (parser->current.type) {
18562 case PM_TOKEN_WORDS_SEP: {
18563 /* Reset the explicit encoding if we hit a separator since each
18564 * element can have its own encoding. */
18565 parser->explicit_encoding = NULL;
18566
18567 if (current == NULL) {
18568 /* If we hit a separator before we have any content, then we
18569 * don't need to do anything. */
18570 } else {
18571 /* If we hit a separator after we've hit content, then we
18572 * need to append that content to the list and reset the
18573 * current node. */
18574 pm_array_node_elements_append(parser->arena, array, current);
18575 current = NULL;
18576 }
18577
18578 parser_lex(parser);
18579 break;
18580 }
18581 case PM_TOKEN_STRING_CONTENT: {
18582 pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
18583 pm_node_flag_set(string, parse_unescaped_encoding(parser));
18584 parser_lex(parser);
18585
18586 if (current == NULL) {
18587 /* If we hit content and the current node is NULL, then this
18588 * is the first string content we've seen. In that case
18589 * we're going to create a new string node and set that to
18590 * the current. */
18591 current = string;
18592 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
18593 /* If we hit string content and the current node is an
18594 * interpolated string, then we need to append the string
18595 * content to the list of child nodes. */
18596 pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, string);
18597 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
18598 /* If we hit string content and the current node is a string
18599 * node, then we need to convert the current node into an
18600 * interpolated string and add the string content to the
18601 * list of child nodes. */
18602 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
18603 pm_interpolated_string_node_append(parser, interpolated, current);
18604 pm_interpolated_string_node_append(parser, interpolated, string);
18605 current = UP(interpolated);
18606 } else {
18607 assert(false && "unreachable");
18608 }
18609
18610 break;
18611 }
18612 case PM_TOKEN_EMBVAR: {
18613 if (current == NULL) {
18614 /* If we hit an embedded variable and the current node is
18615 * NULL, then this is the start of a new string. We'll set
18616 * the current node to a new interpolated string. */
18617 current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL));
18618 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
18619 /* If we hit an embedded variable and the current node is a
18620 * string node, then we'll convert the current into an
18621 * interpolated string and add the string node to the list
18622 * of parts. */
18623 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
18624 pm_interpolated_string_node_append(parser, interpolated, current);
18625 current = UP(interpolated);
18626 } else {
18627 /* If we hit an embedded variable and the current node is an
18628 * interpolated string, then we'll just add the embedded
18629 * variable. */
18630 }
18631
18632 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
18633 pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part);
18634 break;
18635 }
18636 case PM_TOKEN_EMBEXPR_BEGIN: {
18637 if (current == NULL) {
18638 /* If we hit an embedded expression and the current node is
18639 * NULL, then this is the start of a new string. We'll set
18640 * the current node to a new interpolated string. */
18641 current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL));
18642 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
18643 /* If we hit an embedded expression and the current node is
18644 * a string node, then we'll convert the current into an
18645 * interpolated string and add the string node to the list
18646 * of parts. */
18647 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
18648 pm_interpolated_string_node_append(parser, interpolated, current);
18649 current = UP(interpolated);
18650 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
18651 /* If we hit an embedded expression and the current node is
18652 * an interpolated string, then we'll just continue on. */
18653 } else {
18654 assert(false && "unreachable");
18655 }
18656
18657 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
18658 pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part);
18659 break;
18660 }
18661 default:
18662 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
18663 parser_lex(parser);
18664 break;
18665 }
18666 }
18667
18668 /* If we have a current node, then we need to append it to the list. */
18669 if (current) {
18670 pm_array_node_elements_append(parser->arena, array, current);
18671 }
18672
18673 pm_token_t closing = parser->current;
18674 if (match1(parser, PM_TOKEN_EOF)) {
18675 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
18676 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
18677 } else {
18678 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
18679 }
18680
18681 pm_array_node_close_set(parser, array, &closing);
18682 return UP(array);
18683}
18684
18688static pm_node_t *
18689parse_symbol_array(pm_parser_t *parser, uint16_t depth) {
18690 parser_lex(parser);
18691 pm_token_t opening = parser->previous;
18692 pm_array_node_t *array = pm_array_node_create(parser, &opening);
18693
18694 /* This is the current node that we are parsing that will be added to the
18695 * list of elements. */
18696 pm_node_t *current = NULL;
18697
18698 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
18699 switch (parser->current.type) {
18700 case PM_TOKEN_WORDS_SEP: {
18701 if (current == NULL) {
18702 /* If we hit a separator before we have any content, then we
18703 * don't need to do anything. */
18704 } else {
18705 /* If we hit a separator after we've hit content, then we
18706 * need to append that content to the list and reset the
18707 * current node. */
18708 pm_array_node_elements_append(parser->arena, array, current);
18709 current = NULL;
18710 }
18711
18712 parser_lex(parser);
18713 break;
18714 }
18715 case PM_TOKEN_STRING_CONTENT: {
18716 if (current == NULL) {
18717 /* If we hit content and the current node is NULL, then this
18718 * is the first string content we've seen. In that case
18719 * we're going to create a new string node and set that to
18720 * the current. */
18721 current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL));
18722 parser_lex(parser);
18723 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
18724 /* If we hit string content and the current node is an
18725 * interpolated string, then we need to append the string
18726 * content to the list of child nodes. */
18727 pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
18728 parser_lex(parser);
18729
18730 pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, string);
18731 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
18732 /* If we hit string content and the current node is a symbol
18733 * node, then we need to convert the current node into an
18734 * interpolated string and add the string content to the
18735 * list of child nodes. */
18736 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
18737 pm_token_t content = {
18738 .type = PM_TOKEN_STRING_CONTENT,
18739 .start = parser->start + cast->value_loc.start,
18740 .end = parser->start + cast->value_loc.start + cast->value_loc.length
18741 };
18742
18743 pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped));
18744 pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL));
18745 parser_lex(parser);
18746
18747 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
18748 pm_interpolated_symbol_node_append(parser->arena, interpolated, first_string);
18749 pm_interpolated_symbol_node_append(parser->arena, interpolated, second_string);
18750
18751 current = UP(interpolated);
18752 } else {
18753 assert(false && "unreachable");
18754 }
18755
18756 break;
18757 }
18758 case PM_TOKEN_EMBVAR: {
18759 bool start_location_set = false;
18760 if (current == NULL) {
18761 /* If we hit an embedded variable and the current node is
18762 * NULL, then this is the start of a new string. We'll set
18763 * the current node to a new interpolated string. */
18764 current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL));
18765 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
18766 /* If we hit an embedded variable and the current node is a
18767 * string node, then we'll convert the current into an
18768 * interpolated string and add the string node to the list
18769 * of parts. */
18770 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
18771
18772 current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
18773 pm_interpolated_symbol_node_append(parser->arena, interpolated, current);
18774 PM_NODE_START_SET_NODE(interpolated, current);
18775 start_location_set = true;
18776 current = UP(interpolated);
18777 } else {
18778 /* If we hit an embedded variable and the current node is an
18779 * interpolated string, then we'll just add the embedded
18780 * variable. */
18781 }
18782
18783 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
18784 pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, part);
18785 if (!start_location_set) {
18786 PM_NODE_START_SET_NODE(current, part);
18787 }
18788 break;
18789 }
18790 case PM_TOKEN_EMBEXPR_BEGIN: {
18791 bool start_location_set = false;
18792 if (current == NULL) {
18793 /* If we hit an embedded expression and the current node is
18794 * NULL, then this is the start of a new string. We'll set
18795 * the current node to a new interpolated string. */
18796 current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL));
18797 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
18798 /* If we hit an embedded expression and the current node is
18799 * a string node, then we'll convert the current into an
18800 * interpolated string and add the string node to the list
18801 * of parts. */
18802 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
18803
18804 current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
18805 pm_interpolated_symbol_node_append(parser->arena, interpolated, current);
18806 PM_NODE_START_SET_NODE(interpolated, current);
18807 start_location_set = true;
18808 current = UP(interpolated);
18809 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
18810 /* If we hit an embedded expression and the current node is
18811 * an interpolated string, then we'll just continue on. */
18812 } else {
18813 assert(false && "unreachable");
18814 }
18815
18816 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
18817 pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, part);
18818 if (!start_location_set) {
18819 PM_NODE_START_SET_NODE(current, part);
18820 }
18821 break;
18822 }
18823 default:
18824 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
18825 parser_lex(parser);
18826 break;
18827 }
18828 }
18829
18830 /* If we have a current node, then we need to append it to the list. */
18831 if (current) {
18832 pm_array_node_elements_append(parser->arena, array, current);
18833 }
18834
18835 pm_token_t closing = parser->current;
18836 if (match1(parser, PM_TOKEN_EOF)) {
18837 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
18838 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
18839 } else {
18840 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
18841 }
18842 pm_array_node_close_set(parser, array, &closing);
18843
18844 return UP(array);
18845}
18846
18851static pm_node_t *
18852parse_parentheses(pm_parser_t *parser, pm_binding_power_t binding_power, uint16_t depth) {
18853 pm_token_t opening = parser->current;
18854 pm_node_flags_t paren_flags = 0;
18855
18856 pm_node_list_t current_block_exits = { 0 };
18857 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18858
18859 parser_lex(parser);
18860 while (true) {
18861 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18862 paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18863 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
18864 break;
18865 }
18866 }
18867
18868 /* If this is the end of the file or we match a right parenthesis, then we
18869 * have an empty parentheses node, and we can immediately return. */
18870 if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
18871 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18872 pop_block_exits(parser, previous_block_exits);
18873 return UP(pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, paren_flags));
18874 }
18875
18876 /* Otherwise, we're going to parse the first statement in the list of
18877 * statements within the parentheses. */
18878 pm_accepts_block_stack_push(parser, true);
18879 context_push(parser, PM_CONTEXT_PARENS);
18880 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18881 context_pop(parser);
18882
18883 /* Determine if this statement is followed by a terminator. In the case of a
18884 * single statement, this is fine. But in the case of multiple statements
18885 * it's required. */
18886 bool terminator_found = false;
18887
18888 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18889 terminator_found = true;
18890 paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18891 } else if (accept1(parser, PM_TOKEN_NEWLINE)) {
18892 terminator_found = true;
18893 }
18894
18895 if (terminator_found) {
18896 while (true) {
18897 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18898 paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18899 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
18900 break;
18901 }
18902 }
18903 }
18904
18905 /* If we hit a right parenthesis, then we're done parsing the parentheses
18906 * node, and we can check which kind of node we should return. */
18907 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18908 if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
18909 lex_state_set(parser, PM_LEX_STATE_ENDARG);
18910 }
18911
18912 parser_lex(parser);
18913 pm_accepts_block_stack_pop(parser);
18914 pop_block_exits(parser, previous_block_exits);
18915
18916 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18917 /* If we have a single statement and are ending on a right
18918 * parenthesis, then we need to check if this is possibly a multiple
18919 * target node. */
18920 pm_multi_target_node_t *multi_target;
18921
18922 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.length == 0) {
18923 multi_target = (pm_multi_target_node_t *) statement;
18924 } else {
18925 multi_target = pm_multi_target_node_create(parser);
18926 pm_multi_target_node_targets_append(parser, multi_target, statement);
18927 }
18928
18929 multi_target->lparen_loc = TOK2LOC(parser, &opening);
18930 multi_target->rparen_loc = TOK2LOC(parser, &parser->previous);
18931 PM_NODE_START_SET_TOKEN(parser, multi_target, &opening);
18932 PM_NODE_LENGTH_SET_TOKEN(parser, multi_target, &parser->previous);
18933
18934 pm_node_t *result;
18935 if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
18936 result = parse_targets(parser, UP(multi_target), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18937 accept1(parser, PM_TOKEN_NEWLINE);
18938 } else {
18939 result = UP(multi_target);
18940 }
18941
18942 if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
18943 /* All set, this is explicitly allowed by the parent context. */
18944 } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
18945 /* All set, we're inside a for loop and we're parsing multiple
18946 * targets. */
18947 } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
18948 /* Multi targets are not allowed when it's not a statement
18949 * level. */
18950 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18951 } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
18952 /* Multi targets must be followed by an equal sign in order to
18953 * be valid (or a right parenthesis if they are nested). */
18954 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18955 }
18956
18957 return result;
18958 }
18959
18960 /* If we have a single statement and are ending on a right parenthesis
18961 * and we didn't return a multiple assignment node, then we can return a
18962 * regular parentheses node now. */
18963 pm_statements_node_t *statements = pm_statements_node_create(parser);
18964 pm_statements_node_body_append(parser, statements, statement, true);
18965
18966 return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, paren_flags));
18967 }
18968
18969 /* If we have more than one statement in the set of parentheses, then we are
18970 * going to parse all of them as a list of statements. We'll do that here.
18971 */
18972 context_push(parser, PM_CONTEXT_PARENS);
18973 paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18974
18975 pm_statements_node_t *statements = pm_statements_node_create(parser);
18976 pm_statements_node_body_append(parser, statements, statement, true);
18977
18978 /* If we didn't find a terminator and we didn't find a right parenthesis,
18979 * then this is a syntax error. */
18980 if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
18981 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
18982 }
18983
18984 /* Parse each statement within the parentheses. */
18985 while (true) {
18986 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18987 pm_statements_node_body_append(parser, statements, node, true);
18988
18989 /* If we're recovering from a syntax error, then we need to stop parsing
18990 * the statements now. */
18991 if (parser->recovering) {
18992 /* If this is the level of context where the recovery has happened,
18993 * then we can mark the parser as done recovering. */
18994 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
18995 break;
18996 }
18997
18998 /* If we couldn't parse an expression at all, then we need to bail out
18999 * of the loop. */
19000 if (PM_NODE_TYPE_P(node, PM_ERROR_RECOVERY_NODE)) break;
19001
19002 /* If we successfully parsed a statement, then we are going to need a
19003 * terminator to delimit them. */
19004 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
19005 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
19006 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
19007 } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19008 break;
19009 } else if (!match1(parser, PM_TOKEN_EOF)) {
19010 /* If we're at the end of the file, then we're going to add an error
19011 * after this for the ) anyway. */
19012 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
19013 }
19014 }
19015
19016 context_pop(parser);
19017 pm_accepts_block_stack_pop(parser);
19018 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19019
19020 /* When we're parsing multi targets, we allow them to be followed by a right
19021 * parenthesis if they are at the statement level. This is only possible if
19022 * they are the final statement in a parentheses. We need to explicitly
19023 * reject that here. */
19024 {
19025 pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
19026
19027 if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
19028 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
19029 pm_multi_target_node_targets_append(parser, multi_target, statement);
19030
19031 statement = UP(multi_target);
19032 statements->body.nodes[statements->body.size - 1] = statement;
19033 }
19034
19035 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
19036 const uint8_t *offset = parser->start + PM_NODE_END(statement);
19037 pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
19038 pm_node_t *value = UP(pm_error_recovery_node_create(parser, PM_NODE_END(statement), 0));
19039
19040 statement = UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value));
19041 statements->body.nodes[statements->body.size - 1] = statement;
19042
19043 pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
19044 }
19045 }
19046
19047 pop_block_exits(parser, previous_block_exits);
19048 pm_void_statements_check(parser, statements, true);
19049 return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, paren_flags));
19050}
19051
19055static PRISM_INLINE pm_node_t *
19056parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
19057 switch (parser->current.type) {
19058 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
19059 parser_lex(parser);
19060
19061 pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
19062 pm_accepts_block_stack_push(parser, true);
19063 bool parsed_bare_hash = false;
19064
19065 while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
19066 bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
19067
19068 // Handle the case where we don't have a comma and we have a
19069 // newline followed by a right bracket.
19070 if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
19071 break;
19072 }
19073
19074 // Ensure that we have a comma between elements in the array.
19075 if (array->elements.size > 0) {
19076 if (accept1(parser, PM_TOKEN_COMMA)) {
19077 // If there was a comma but we also accepts a newline,
19078 // then this is a syntax error.
19079 if (accepted_newline) {
19080 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
19081 }
19082 } else {
19083 // If there was no comma, then we need to add a syntax
19084 // error.
19085 PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_END(parser, &parser->previous), 0, PM_ERR_ARRAY_SEPARATOR, pm_token_str(parser->current.type));
19086 parser->previous.start = parser->previous.end;
19087 parser->previous.type = 0;
19088 }
19089 }
19090
19091 // If we have a right bracket immediately following a comma,
19092 // this is allowed since it's a trailing comma. In this case we
19093 // can break out of the loop.
19094 if (match1(parser, PM_TOKEN_BRACKET_RIGHT)) break;
19095
19096 pm_node_t *element;
19097
19098 if (accept1(parser, PM_TOKEN_USTAR)) {
19099 pm_token_t operator = parser->previous;
19100 pm_node_t *expression = NULL;
19101
19102 if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
19103 pm_parser_scope_forwarding_positionals_check(parser, &operator);
19104 } else {
19105 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19106 }
19107
19108 element = UP(pm_splat_node_create(parser, &operator, expression));
19109 } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
19110 if (parsed_bare_hash) {
19111 pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
19112 }
19113
19114 element = UP(pm_keyword_hash_node_create(parser));
19115 pm_static_literals_t hash_keys = { 0 };
19116
19117 if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_DO_BLOCK, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
19118 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
19119 }
19120
19121 pm_static_literals_free(&hash_keys);
19122 parsed_bare_hash = true;
19123 } else {
19124 element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_LABEL), PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
19125
19126 if (pm_symbol_node_label_p(parser, element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
19127 if (parsed_bare_hash) {
19128 pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
19129 }
19130
19131 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
19132 pm_static_literals_t hash_keys = { 0 };
19133 pm_hash_key_static_literals_add(parser, &hash_keys, element);
19134
19135 pm_token_t operator = { 0 };
19136 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
19137 operator = parser->previous;
19138 }
19139
19140 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
19141 pm_node_t *assoc = UP(pm_assoc_node_create(parser, element, NTOK2PTR(operator), value));
19142 pm_keyword_hash_node_elements_append(parser->arena, hash, assoc);
19143
19144 element = UP(hash);
19145 if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
19146 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
19147 }
19148
19149 pm_static_literals_free(&hash_keys);
19150 parsed_bare_hash = true;
19151 }
19152 }
19153
19154 pm_array_node_elements_append(parser->arena, array, element);
19155 if (PM_NODE_TYPE_P(element, PM_ERROR_RECOVERY_NODE)) break;
19156 }
19157
19158 accept1(parser, PM_TOKEN_NEWLINE);
19159
19160 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
19161 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_str(parser->current.type));
19162 parser->previous.start = parser->previous.end;
19163 parser->previous.type = 0;
19164 }
19165
19166 pm_array_node_close_set(parser, array, &parser->previous);
19167 pm_accepts_block_stack_pop(parser);
19168
19169 return UP(array);
19170 }
19171 case PM_TOKEN_PARENTHESIS_LEFT:
19172 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES:
19173 return parse_parentheses(parser, binding_power, depth);
19174 case PM_TOKEN_BRACE_LEFT: {
19175 // If we were passed a current_hash_keys via the parser, then that
19176 // means we're already parsing a hash and we want to share the set
19177 // of hash keys with this inner hash we're about to parse for the
19178 // sake of warnings. We'll set it to NULL after we grab it to make
19179 // sure subsequent expressions don't use it. Effectively this is a
19180 // way of getting around passing it to every call to
19181 // parse_expression.
19182 pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
19183 parser->current_hash_keys = NULL;
19184
19185 pm_accepts_block_stack_push(parser, true);
19186 parser_lex(parser);
19187
19188 pm_token_t opening = parser->previous;
19189 pm_hash_node_t *node = pm_hash_node_create(parser, &opening);
19190
19191 if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
19192 if (current_hash_keys != NULL) {
19193 parse_assocs(parser, current_hash_keys, UP(node), (uint16_t) (depth + 1));
19194 } else {
19195 pm_static_literals_t hash_keys = { 0 };
19196 parse_assocs(parser, &hash_keys, UP(node), (uint16_t) (depth + 1));
19197 pm_static_literals_free(&hash_keys);
19198 }
19199
19200 accept1(parser, PM_TOKEN_NEWLINE);
19201 }
19202
19203 pm_accepts_block_stack_pop(parser);
19204 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM, &opening);
19205 pm_hash_node_closing_loc_set(parser, node, &parser->previous);
19206
19207 return UP(node);
19208 }
19209 case PM_TOKEN_CHARACTER_LITERAL: {
19210 pm_node_t *node = UP(pm_string_node_create_current_string(
19211 parser,
19212 &(pm_token_t) {
19213 .type = PM_TOKEN_STRING_BEGIN,
19214 .start = parser->current.start,
19215 .end = parser->current.start + 1
19216 },
19217 &(pm_token_t) {
19218 .type = PM_TOKEN_STRING_CONTENT,
19219 .start = parser->current.start + 1,
19220 .end = parser->current.end
19221 },
19222 NULL
19223 ));
19224
19225 pm_node_flag_set(node, parse_unescaped_encoding(parser));
19226
19227 // Skip past the character literal here, since now we have handled
19228 // parser->explicit_encoding correctly.
19229 parser_lex(parser);
19230
19231 // Characters can be followed by strings in which case they are
19232 // automatically concatenated.
19233 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
19234 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
19235 }
19236
19237 return node;
19238 }
19239 case PM_TOKEN_CLASS_VARIABLE: {
19240 parser_lex(parser);
19241 pm_node_t *node = UP(pm_class_variable_read_node_create(parser, &parser->previous));
19242
19243 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
19244 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19245 }
19246
19247 return node;
19248 }
19249 case PM_TOKEN_CONSTANT: {
19250 parser_lex(parser);
19251 pm_token_t constant = parser->previous;
19252
19253 // If a constant is immediately followed by parentheses, then this is in
19254 // fact a method call, not a constant read.
19255 if (
19256 match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
19257 ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
19258 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
19259 match1(parser, PM_TOKEN_BRACE_LEFT)
19260 ) {
19261 pm_arguments_t arguments = { 0 };
19262 parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
19263 return UP(pm_call_node_fcall_create(parser, &constant, &arguments));
19264 }
19265
19266 pm_node_t *node = UP(pm_constant_read_node_create(parser, &parser->previous));
19267
19268 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
19269 // If we get here, then we have a comma immediately following a
19270 // constant, so we're going to parse this as a multiple assignment.
19271 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19272 }
19273
19274 return node;
19275 }
19276 case PM_TOKEN_UCOLON_COLON: {
19277 parser_lex(parser);
19278 pm_token_t delimiter = parser->previous;
19279
19280 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
19281 pm_node_t *node = UP(pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous));
19282
19283 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
19284 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19285 }
19286
19287 return node;
19288 }
19289 case PM_TOKEN_UDOT_DOT:
19290 case PM_TOKEN_UDOT_DOT_DOT: {
19291 pm_token_t operator = parser->current;
19292 parser_lex(parser);
19293
19294 pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
19295
19296 // Unary .. and ... are special because these are non-associative
19297 // operators that can also be unary operators. In this case we need
19298 // to explicitly reject code that has a .. or ... that follows this
19299 // expression.
19300 if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
19301 pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
19302 }
19303
19304 return UP(pm_range_node_create(parser, NULL, &operator, right));
19305 }
19306 case PM_TOKEN_FLOAT:
19307 parser_lex(parser);
19308 return UP(pm_float_node_create(parser, &parser->previous));
19309 case PM_TOKEN_FLOAT_IMAGINARY:
19310 parser_lex(parser);
19311 return UP(pm_float_node_imaginary_create(parser, &parser->previous));
19312 case PM_TOKEN_FLOAT_RATIONAL:
19313 parser_lex(parser);
19314 return UP(pm_float_node_rational_create(parser, &parser->previous));
19315 case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
19316 parser_lex(parser);
19317 return UP(pm_float_node_rational_imaginary_create(parser, &parser->previous));
19318 case PM_TOKEN_NUMBERED_REFERENCE: {
19319 parser_lex(parser);
19320 pm_node_t *node = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
19321
19322 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
19323 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19324 }
19325
19326 return node;
19327 }
19328 case PM_TOKEN_GLOBAL_VARIABLE: {
19329 parser_lex(parser);
19330 pm_node_t *node = UP(pm_global_variable_read_node_create(parser, &parser->previous));
19331
19332 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
19333 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19334 }
19335
19336 return node;
19337 }
19338 case PM_TOKEN_BACK_REFERENCE: {
19339 parser_lex(parser);
19340 pm_node_t *node = UP(pm_back_reference_read_node_create(parser, &parser->previous));
19341
19342 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
19343 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19344 }
19345
19346 return node;
19347 }
19348 case PM_TOKEN_IDENTIFIER:
19349 case PM_TOKEN_METHOD_NAME: {
19350 parser_lex(parser);
19351 pm_token_t identifier = parser->previous;
19352 pm_node_t *node = parse_variable_call(parser);
19353
19354 if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
19355 // If parse_variable_call returned with a call node, then we
19356 // know the identifier is not in the local table. In that case
19357 // we need to check if there are arguments following the
19358 // identifier.
19359 pm_call_node_t *call = (pm_call_node_t *) node;
19360 pm_arguments_t arguments = { 0 };
19361
19362 if (parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1))) {
19363 // Since we found arguments, we need to turn off the
19364 // variable call bit in the flags.
19365 pm_node_flag_unset(UP(call), PM_CALL_NODE_FLAGS_VARIABLE_CALL);
19366
19367 call->opening_loc = arguments.opening_loc;
19368 call->arguments = arguments.arguments;
19369 call->closing_loc = arguments.closing_loc;
19370 call->block = arguments.block;
19371
19372 const pm_location_t *end = pm_arguments_end(&arguments);
19373 if (end == NULL) {
19374 PM_NODE_LENGTH_SET_LOCATION(call, &call->message_loc);
19375 } else {
19376 PM_NODE_LENGTH_SET_LOCATION(call, end);
19377 }
19378 }
19379 } else {
19380 // Otherwise, we know the identifier is in the local table. This
19381 // can still be a method call if it is followed by arguments or
19382 // a block, so we need to check for that here.
19383 if (
19384 ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
19385 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
19386 match1(parser, PM_TOKEN_BRACE_LEFT)
19387 ) {
19388 pm_arguments_t arguments = { 0 };
19389 parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
19390 pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
19391
19392 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
19393 // If we're about to convert an 'it' implicit local
19394 // variable read into a method call, we need to remove
19395 // it from the list of implicit local variables.
19396 pm_node_unreference(parser, node);
19397 } else {
19398 // Otherwise, we're about to convert a regular local
19399 // variable read into a method call, in which case we
19400 // need to indicate that this was not a read for the
19401 // purposes of warnings.
19402 assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
19403
19404 if (pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &identifier), PM_TOKEN_LENGTH(&identifier))) {
19405 pm_node_unreference(parser, node);
19406 } else {
19408 pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
19409 }
19410 }
19411
19412 return UP(fcall);
19413 }
19414 }
19415
19416 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
19417 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19418 }
19419
19420 return node;
19421 }
19422 case PM_TOKEN_HEREDOC_START: {
19423 // Here we have found a heredoc. We'll parse it and add it to the
19424 // list of strings.
19425 assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
19426 pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
19427
19428 size_t common_whitespace = (size_t) -1;
19429 parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
19430
19431 parser_lex(parser);
19432 pm_token_t opening = parser->previous;
19433
19434 pm_node_t *node;
19435 pm_node_t *part;
19436
19437 if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
19438 // If we get here, then we have an empty heredoc. We'll create
19439 // an empty content token and return an empty string node.
19440 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
19441 pm_token_t content = parse_strings_empty_content(parser->previous.start);
19442
19443 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
19444 node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
19445 } else {
19446 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
19447 }
19448
19449 PM_NODE_LENGTH_SET_TOKEN(parser, node, &opening);
19450 } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
19451 // If we get here, then we tried to find something in the
19452 // heredoc but couldn't actually parse anything, so we'll just
19453 // return a missing node.
19454 //
19455 // parse_string_part handles its own errors, so there is no need
19456 // for us to add one here.
19457 node = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
19458 } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
19459 // If we get here, then the part that we parsed was plain string
19460 // content and we're at the end of the heredoc, so we can return
19461 // just a string node with the heredoc opening and closing as
19462 // its opening and closing.
19463 pm_node_flag_set(part, parse_unescaped_encoding(parser));
19464 pm_string_node_t *cast = (pm_string_node_t *) part;
19465
19466 cast->opening_loc = TOK2LOC(parser, &opening);
19467 cast->closing_loc = TOK2LOC(parser, &parser->current);
19468 cast->base.location = cast->opening_loc;
19469
19470 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
19471 assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
19472 cast->base.type = PM_X_STRING_NODE;
19473 }
19474
19475 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
19476 parse_heredoc_dedent_string(parser->arena, &cast->unescaped, common_whitespace);
19477 }
19478
19479 node = UP(cast);
19480 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
19481 } else {
19482 // If we get here, then we have multiple parts in the heredoc,
19483 // so we'll need to create an interpolated string node to hold
19484 // them all.
19485 pm_node_list_t parts = { 0 };
19486 pm_node_list_append(parser->arena, &parts, part);
19487
19488 while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
19489 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
19490 pm_node_list_append(parser->arena, &parts, part);
19491 }
19492 }
19493
19494 // Now that we have all of the parts, create the correct type of
19495 // interpolated node.
19496 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
19497 pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
19498 cast->parts = parts;
19499
19500 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
19501 pm_interpolated_xstring_node_closing_set(parser, cast, &parser->previous);
19502
19503 cast->base.location = cast->opening_loc;
19504 node = UP(cast);
19505 } else {
19506 pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
19507
19508 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
19509 pm_interpolated_string_node_closing_set(parser, cast, &parser->previous);
19510
19511 cast->base.location = cast->opening_loc;
19512 node = UP(cast);
19513 }
19514
19515 // If this is a heredoc that is indented with a ~, then we need
19516 // to dedent each line by the common leading whitespace.
19517 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
19518 pm_node_list_t *nodes;
19519 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
19520 nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
19521 } else {
19522 nodes = &((pm_interpolated_string_node_t *) node)->parts;
19523 }
19524
19525 parse_heredoc_dedent(parser, nodes, common_whitespace);
19526 }
19527 }
19528
19529 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
19530 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
19531 }
19532
19533 return node;
19534 }
19535 case PM_TOKEN_INSTANCE_VARIABLE: {
19536 parser_lex(parser);
19537 pm_node_t *node = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
19538
19539 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
19540 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19541 }
19542
19543 return node;
19544 }
19545 case PM_TOKEN_INTEGER: {
19546 pm_node_flags_t base = parser->integer.base;
19547 parser_lex(parser);
19548 return UP(pm_integer_node_create(parser, base, &parser->previous));
19549 }
19550 case PM_TOKEN_INTEGER_IMAGINARY: {
19551 pm_node_flags_t base = parser->integer.base;
19552 parser_lex(parser);
19553 return UP(pm_integer_node_imaginary_create(parser, base, &parser->previous));
19554 }
19555 case PM_TOKEN_INTEGER_RATIONAL: {
19556 pm_node_flags_t base = parser->integer.base;
19557 parser_lex(parser);
19558 return UP(pm_integer_node_rational_create(parser, base, &parser->previous));
19559 }
19560 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: {
19561 pm_node_flags_t base = parser->integer.base;
19562 parser_lex(parser);
19563 return UP(pm_integer_node_rational_imaginary_create(parser, base, &parser->previous));
19564 }
19565 case PM_TOKEN_KEYWORD___ENCODING__:
19566 parser_lex(parser);
19567 return UP(pm_source_encoding_node_create(parser, &parser->previous));
19568 case PM_TOKEN_KEYWORD___FILE__:
19569 parser_lex(parser);
19570 return UP(pm_source_file_node_create(parser, &parser->previous));
19571 case PM_TOKEN_KEYWORD___LINE__:
19572 parser_lex(parser);
19573 return UP(pm_source_line_node_create(parser, &parser->previous));
19574 case PM_TOKEN_KEYWORD_ALIAS: {
19575 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19576 pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
19577 }
19578
19579 parser_lex(parser);
19580 pm_token_t keyword = parser->previous;
19581
19582 pm_node_t *new_name = parse_alias_argument(parser, true, (uint16_t) (depth + 1));
19583 pm_node_t *old_name = parse_alias_argument(parser, false, (uint16_t) (depth + 1));
19584
19585 switch (PM_NODE_TYPE(new_name)) {
19586 case PM_BACK_REFERENCE_READ_NODE:
19587 case PM_NUMBERED_REFERENCE_READ_NODE:
19588 case PM_GLOBAL_VARIABLE_READ_NODE: {
19589 if (PM_NODE_TYPE_P(old_name, PM_BACK_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_GLOBAL_VARIABLE_READ_NODE)) {
19590 if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) {
19591 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
19592 }
19593 } else if (!PM_NODE_TYPE_P(old_name, PM_ERROR_RECOVERY_NODE)) {
19594 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
19595 old_name = UP(pm_error_recovery_node_create_unexpected(parser, old_name));
19596 }
19597
19598 return UP(pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name));
19599 }
19600 case PM_SYMBOL_NODE:
19601 case PM_INTERPOLATED_SYMBOL_NODE: {
19602 if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_ERROR_RECOVERY_NODE)) {
19603 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
19604 old_name = UP(pm_error_recovery_node_create_unexpected(parser, old_name));
19605 }
19606 }
19608 default:
19609 return UP(pm_alias_method_node_create(parser, &keyword, new_name, old_name));
19610 }
19611 }
19612 case PM_TOKEN_KEYWORD_CASE:
19613 return parse_case(parser, flags, depth);
19614 case PM_TOKEN_KEYWORD_BEGIN: {
19615 size_t opening_newline_index = token_newline_index(parser);
19616 parser_lex(parser);
19617
19618 pm_token_t begin_keyword = parser->previous;
19619 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19620
19621 pm_node_list_t current_block_exits = { 0 };
19622 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19623 pm_statements_node_t *begin_statements = NULL;
19624
19625 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19626 pm_accepts_block_stack_push(parser, true);
19627 begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN, (uint16_t) (depth + 1));
19628 pm_accepts_block_stack_pop(parser);
19629 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19630 }
19631
19632 pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
19633 parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
19634 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM, &begin_keyword);
19635
19636 PM_NODE_LENGTH_SET_TOKEN(parser, begin_node, &parser->previous);
19637 pm_begin_node_end_keyword_set(parser, begin_node, &parser->previous);
19638 pop_block_exits(parser, previous_block_exits);
19639 return UP(begin_node);
19640 }
19641 case PM_TOKEN_KEYWORD_BEGIN_UPCASE: {
19642 pm_node_list_t current_block_exits = { 0 };
19643 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19644
19645 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19646 pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
19647 }
19648
19649 parser_lex(parser);
19650 pm_token_t keyword = parser->previous;
19651
19652 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE);
19653 pm_token_t opening = parser->previous;
19654 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
19655
19656 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM, &opening);
19657 pm_context_t context = parser->current_context->context;
19658 if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
19659 pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
19660 }
19661
19662 flush_block_exits(parser, previous_block_exits);
19663 return UP(pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
19664 }
19665 case PM_TOKEN_KEYWORD_BREAK:
19666 case PM_TOKEN_KEYWORD_NEXT:
19667 case PM_TOKEN_KEYWORD_RETURN: {
19668 parser_lex(parser);
19669
19670 pm_token_t keyword = parser->previous;
19671 pm_arguments_t arguments = { 0 };
19672
19673 if (
19674 token_begins_expression_p(parser->current.type) ||
19675 match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)
19676 ) {
19677 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
19678
19679 if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
19680 pm_token_t next = parser->current;
19681 parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, flags, (uint16_t) (depth + 1));
19682
19683 // Reject `foo && return bar`.
19684 if (!(flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && arguments.arguments != NULL) {
19685 PM_PARSER_ERR_TOKEN_FORMAT(parser, &next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(next.type));
19686 }
19687 }
19688
19689 // It's possible that we've parsed a block argument through our
19690 // call to parse_arguments. If we found one, we should mark it
19691 // as invalid and destroy it, as we don't have a place for it.
19692 if (arguments.block != NULL) {
19693 pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
19694 pm_node_unreference(parser, arguments.block);
19695 arguments.block = NULL;
19696 }
19697 }
19698
19699 switch (keyword.type) {
19700 case PM_TOKEN_KEYWORD_BREAK: {
19701 pm_node_t *node = UP(pm_break_node_create(parser, &keyword, arguments.arguments));
19702 if (!parser->partial_script) parse_block_exit(parser, node);
19703 return node;
19704 }
19705 case PM_TOKEN_KEYWORD_NEXT: {
19706 pm_node_t *node = UP(pm_next_node_create(parser, &keyword, arguments.arguments));
19707 if (!parser->partial_script) parse_block_exit(parser, node);
19708 return node;
19709 }
19710 case PM_TOKEN_KEYWORD_RETURN: {
19711 pm_node_t *node = UP(pm_return_node_create(parser, &keyword, arguments.arguments));
19712 parse_return(parser, node);
19713 return node;
19714 }
19715 default:
19716 assert(false && "unreachable");
19717 return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
19718 }
19719 }
19720 case PM_TOKEN_KEYWORD_SUPER: {
19721 parser_lex(parser);
19722
19723 pm_token_t keyword = parser->previous;
19724 pm_arguments_t arguments = { 0 };
19725 parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
19726
19727 if (
19728 arguments.opening_loc.length == 0 &&
19729 arguments.arguments == NULL &&
19730 ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
19731 ) {
19732 return UP(pm_forwarding_super_node_create(parser, &keyword, &arguments));
19733 }
19734
19735 return UP(pm_super_node_create(parser, &keyword, &arguments));
19736 }
19737 case PM_TOKEN_KEYWORD_YIELD: {
19738 parser_lex(parser);
19739
19740 pm_token_t keyword = parser->previous;
19741 pm_arguments_t arguments = { 0 };
19742 parse_arguments_list(parser, &arguments, false, flags, (uint16_t) (depth + 1));
19743
19744 // It's possible that we've parsed a block argument through our
19745 // call to parse_arguments_list. If we found one, we should mark it
19746 // as invalid and destroy it, as we don't have a place for it on the
19747 // yield node.
19748 if (arguments.block != NULL) {
19749 pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
19750 pm_node_unreference(parser, arguments.block);
19751 arguments.block = NULL;
19752 }
19753
19754 pm_node_t *node = UP(pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc));
19755 if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
19756
19757 return node;
19758 }
19759 case PM_TOKEN_KEYWORD_CLASS:
19760 return parse_class(parser, flags, depth);
19761 case PM_TOKEN_KEYWORD_DEF:
19762 return parse_def(parser, binding_power, flags, depth);
19763 case PM_TOKEN_KEYWORD_DEFINED: {
19764 parser_lex(parser);
19765
19766 pm_token_t keyword = parser->previous;
19767 pm_token_t lparen = { 0 };
19768 pm_token_t rparen = { 0 };
19769 pm_node_t *expression;
19770
19771 context_push(parser, PM_CONTEXT_DEFINED);
19772 bool newline = accept1(parser, PM_TOKEN_NEWLINE);
19773
19774 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19775 lparen = parser->previous;
19776
19777 if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19778 expression = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
19779 lparen = (pm_token_t) { 0 };
19780 } else {
19781 expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19782
19783 if (!parser->recovering) {
19784 accept1(parser, PM_TOKEN_NEWLINE);
19785 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19786 rparen = parser->previous;
19787 }
19788 }
19789 } else {
19790 expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19791 }
19792
19793 context_pop(parser);
19794 return UP(pm_defined_node_create(
19795 parser,
19796 NTOK2PTR(lparen),
19797 expression,
19798 NTOK2PTR(rparen),
19799 &keyword
19800 ));
19801 }
19802 case PM_TOKEN_KEYWORD_END_UPCASE: {
19803 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19804 pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
19805 }
19806
19807 parser_lex(parser);
19808 pm_token_t keyword = parser->previous;
19809
19810 if (context_def_p(parser)) {
19811 pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
19812 }
19813
19814 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
19815 pm_token_t opening = parser->previous;
19816 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
19817
19818 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM, &opening);
19819 return UP(pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
19820 }
19821 case PM_TOKEN_KEYWORD_FALSE:
19822 parser_lex(parser);
19823 return UP(pm_false_node_create(parser, &parser->previous));
19824 case PM_TOKEN_KEYWORD_FOR: {
19825 size_t opening_newline_index = token_newline_index(parser);
19826 parser_lex(parser);
19827
19828 pm_token_t for_keyword = parser->previous;
19829 pm_node_t *index;
19830
19831 context_push(parser, PM_CONTEXT_FOR_INDEX);
19832
19833 // First, parse out the first index expression.
19834 if (accept1(parser, PM_TOKEN_USTAR)) {
19835 pm_token_t star_operator = parser->previous;
19836 pm_node_t *name = NULL;
19837
19838 if (token_begins_expression_p(parser->current.type)) {
19839 name = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19840 }
19841
19842 index = UP(pm_splat_node_create(parser, &star_operator, name));
19843 } else if (token_begins_expression_p(parser->current.type)) {
19844 index = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
19845 } else {
19846 pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
19847 index = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &for_keyword), PM_TOKEN_LENGTH(&for_keyword)));
19848 }
19849
19850 // Now, if there are multiple index expressions, parse them out.
19851 if (match1(parser, PM_TOKEN_COMMA)) {
19852 index = parse_targets(parser, index, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19853 } else {
19854 index = parse_target(parser, index, false, false);
19855 }
19856
19857 context_pop(parser);
19858 pm_do_loop_stack_push(parser, true);
19859
19860 expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
19861 pm_token_t in_keyword = parser->previous;
19862
19863 pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
19864 pm_do_loop_stack_pop(parser);
19865
19866 pm_token_t do_keyword = { 0 };
19867 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19868 do_keyword = parser->previous;
19869 } else {
19870 if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
19871 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_str(parser->current.type));
19872 }
19873 }
19874
19875 pm_statements_node_t *statements = NULL;
19876 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19877 statements = parse_statements(parser, PM_CONTEXT_FOR, (uint16_t) (depth + 1));
19878 }
19879
19880 parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
19881 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM, &for_keyword);
19882
19883 return UP(pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, NTOK2PTR(do_keyword), &parser->previous));
19884 }
19885 case PM_TOKEN_KEYWORD_IF:
19886 if (parser_end_of_line_p(parser)) {
19887 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_KEYWORD_EOL);
19888 }
19889
19890 size_t opening_newline_index = token_newline_index(parser);
19891 bool if_after_else = parser->previous.type == PM_TOKEN_KEYWORD_ELSE;
19892 parser_lex(parser);
19893
19894 return parse_conditional(parser, PM_CONTEXT_IF, opening_newline_index, if_after_else, (uint16_t) (depth + 1));
19895 case PM_TOKEN_KEYWORD_UNDEF: {
19896 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19897 pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
19898 }
19899
19900 parser_lex(parser);
19901 pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
19902 pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19903
19904 if (PM_NODE_TYPE_P(name, PM_ERROR_RECOVERY_NODE)) {
19905 } else {
19906 pm_undef_node_append(parser->arena, undef, name);
19907
19908 while (match1(parser, PM_TOKEN_COMMA)) {
19909 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
19910 parser_lex(parser);
19911 name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19912
19913 if (PM_NODE_TYPE_P(name, PM_ERROR_RECOVERY_NODE)) {
19914 break;
19915 }
19916
19917 pm_undef_node_append(parser->arena, undef, name);
19918 }
19919 }
19920
19921 return UP(undef);
19922 }
19923 case PM_TOKEN_KEYWORD_NOT: {
19924 parser_lex(parser);
19925
19926 pm_token_t message = parser->previous;
19927 pm_arguments_t arguments = { 0 };
19928 pm_node_t *receiver = NULL;
19929
19930 // The `not` keyword without parentheses is only valid in contexts
19931 // where it would be parsed as an expression (i.e., at or below
19932 // the `not` binding power level). In other contexts (e.g., method
19933 // arguments, array elements, assignment right-hand sides),
19934 // parentheses are required: `not(x)`. An exception is made for
19935 // endless def bodies, where `not` is valid as both `arg` and
19936 // `command` (e.g., `def f = not 1`, `def f = not foo bar`).
19937 if (binding_power > PM_BINDING_POWER_NOT && !(flags & PM_PARSE_IN_ENDLESS_DEF) && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19938 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) {
19939 pm_parser_err(parser, PM_TOKEN_END(parser, &parser->previous), 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN);
19940 } else {
19941 accept1(parser, PM_TOKEN_NEWLINE);
19942 pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER);
19943 }
19944
19945 return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
19946 }
19947
19948 accept1(parser, PM_TOKEN_NEWLINE);
19949
19950 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19951 pm_token_t lparen = parser->previous;
19952
19953 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19954 receiver = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
19955 } else {
19956 arguments.opening_loc = TOK2LOC(parser, &lparen);
19957 receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19958
19959 if (!parser->recovering) {
19960 accept1(parser, PM_TOKEN_NEWLINE);
19961 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19962 arguments.closing_loc = TOK2LOC(parser, &parser->previous);
19963 }
19964 }
19965 } else {
19966 receiver = parse_expression(parser, PM_BINDING_POWER_NOT, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19967 }
19968
19969 return UP(pm_call_node_not_create(parser, receiver, &message, &arguments));
19970 }
19971 case PM_TOKEN_KEYWORD_UNLESS: {
19972 size_t opening_newline_index = token_newline_index(parser);
19973 parser_lex(parser);
19974
19975 return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
19976 }
19977 case PM_TOKEN_KEYWORD_MODULE:
19978 return parse_module(parser, flags, depth);
19979 case PM_TOKEN_KEYWORD_NIL:
19980 parser_lex(parser);
19981 return UP(pm_nil_node_create(parser, &parser->previous));
19982 case PM_TOKEN_KEYWORD_REDO: {
19983 parser_lex(parser);
19984
19985 pm_node_t *node = UP(pm_redo_node_create(parser, &parser->previous));
19986 if (!parser->partial_script) parse_block_exit(parser, node);
19987
19988 return node;
19989 }
19990 case PM_TOKEN_KEYWORD_RETRY: {
19991 parser_lex(parser);
19992
19993 pm_node_t *node = UP(pm_retry_node_create(parser, &parser->previous));
19994 parse_retry(parser, node);
19995
19996 return node;
19997 }
19998 case PM_TOKEN_KEYWORD_SELF:
19999 parser_lex(parser);
20000 return UP(pm_self_node_create(parser, &parser->previous));
20001 case PM_TOKEN_KEYWORD_TRUE:
20002 parser_lex(parser);
20003 return UP(pm_true_node_create(parser, &parser->previous));
20004 case PM_TOKEN_KEYWORD_UNTIL: {
20005 size_t opening_newline_index = token_newline_index(parser);
20006
20007 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
20008 pm_do_loop_stack_push(parser, true);
20009
20010 parser_lex(parser);
20011 pm_token_t keyword = parser->previous;
20012 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
20013
20014 pm_do_loop_stack_pop(parser);
20015 context_pop(parser);
20016
20017 pm_token_t do_keyword = { 0 };
20018 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
20019 do_keyword = parser->previous;
20020 } else {
20021 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
20022 }
20023
20024 pm_statements_node_t *statements = NULL;
20025 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
20026 pm_accepts_block_stack_push(parser, true);
20027 statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
20028 pm_accepts_block_stack_pop(parser);
20029 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
20030 }
20031
20032 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
20033 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM, &keyword);
20034
20035 return UP(pm_until_node_create(parser, &keyword, NTOK2PTR(do_keyword), &parser->previous, predicate, statements, 0));
20036 }
20037 case PM_TOKEN_KEYWORD_WHILE: {
20038 size_t opening_newline_index = token_newline_index(parser);
20039
20040 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
20041 pm_do_loop_stack_push(parser, true);
20042
20043 parser_lex(parser);
20044 pm_token_t keyword = parser->previous;
20045 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
20046
20047 pm_do_loop_stack_pop(parser);
20048 context_pop(parser);
20049
20050 pm_token_t do_keyword = { 0 };
20051 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
20052 do_keyword = parser->previous;
20053 } else {
20054 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
20055 }
20056
20057 pm_statements_node_t *statements = NULL;
20058 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
20059 pm_accepts_block_stack_push(parser, true);
20060 statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
20061 pm_accepts_block_stack_pop(parser);
20062 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
20063 }
20064
20065 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
20066 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM, &keyword);
20067
20068 return UP(pm_while_node_create(parser, &keyword, NTOK2PTR(do_keyword), &parser->previous, predicate, statements, 0));
20069 }
20070 case PM_TOKEN_PERCENT_LOWER_I: {
20071 parser_lex(parser);
20072 pm_token_t opening = parser->previous;
20073 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20074 pm_node_t *current = NULL;
20075
20076 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20077 accept1(parser, PM_TOKEN_WORDS_SEP);
20078 if (match1(parser, PM_TOKEN_STRING_END)) break;
20079
20080 // Interpolation is not possible but nested heredocs can still lead to
20081 // consecutive (disjoint) string tokens when the final newline is escaped.
20082 while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20083 // Record the string node, moving to interpolation if needed.
20084 if (current == NULL) {
20085 current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL));
20086 parser_lex(parser);
20087 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
20088 pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
20089 parser_lex(parser);
20090 pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, string);
20091 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20092 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
20093 pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = parser->start + cast->value_loc.start, .end = parser->start + cast->value_loc.start + cast->value_loc.length };
20094 pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped));
20095 pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL));
20096 parser_lex(parser);
20097
20098 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
20099 pm_interpolated_symbol_node_append(parser->arena, interpolated, first_string);
20100 pm_interpolated_symbol_node_append(parser->arena, interpolated, second_string);
20101
20102 // current is arena-allocated so no explicit free is needed.
20103 current = UP(interpolated);
20104 } else {
20105 assert(false && "unreachable");
20106 }
20107 }
20108
20109 if (current) {
20110 pm_array_node_elements_append(parser->arena, array, current);
20111 current = NULL;
20112 } else {
20113 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
20114 }
20115 }
20116
20117 pm_token_t closing = parser->current;
20118 if (match1(parser, PM_TOKEN_EOF)) {
20119 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
20120 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
20121 } else {
20122 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
20123 }
20124 pm_array_node_close_set(parser, array, &closing);
20125
20126 return UP(array);
20127 }
20128 case PM_TOKEN_PERCENT_UPPER_I:
20129 return parse_symbol_array(parser, depth);
20130 case PM_TOKEN_PERCENT_LOWER_W: {
20131 parser_lex(parser);
20132 pm_token_t opening = parser->previous;
20133 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20134 pm_node_t *current = NULL;
20135
20136 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20137 accept1(parser, PM_TOKEN_WORDS_SEP);
20138 if (match1(parser, PM_TOKEN_STRING_END)) break;
20139
20140 // Interpolation is not possible but nested heredocs can still lead to
20141 // consecutive (disjoint) string tokens when the final newline is escaped.
20142 while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20143 pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
20144
20145 // Record the string node, moving to interpolation if needed.
20146 if (current == NULL) {
20147 current = string;
20148 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20149 pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, string);
20150 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20151 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
20152 pm_interpolated_string_node_append(parser, interpolated, current);
20153 pm_interpolated_string_node_append(parser, interpolated, string);
20154 current = UP(interpolated);
20155 } else {
20156 assert(false && "unreachable");
20157 }
20158 parser_lex(parser);
20159 }
20160
20161 if (current) {
20162 pm_array_node_elements_append(parser->arena, array, current);
20163 current = NULL;
20164 } else {
20165 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
20166 }
20167 }
20168
20169 pm_token_t closing = parser->current;
20170 if (match1(parser, PM_TOKEN_EOF)) {
20171 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
20172 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
20173 } else {
20174 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
20175 }
20176
20177 pm_array_node_close_set(parser, array, &closing);
20178 return UP(array);
20179 }
20180 case PM_TOKEN_PERCENT_UPPER_W:
20181 return parse_string_array(parser, depth);
20182 case PM_TOKEN_REGEXP_BEGIN: {
20183 pm_token_t opening = parser->current;
20184 parser_lex(parser);
20185
20186 if (match1(parser, PM_TOKEN_REGEXP_END)) {
20187 // If we get here, then we have an end immediately after a start. In
20188 // that case we'll create an empty content token and return an
20189 // uninterpolated regular expression.
20190 pm_token_t content = (pm_token_t) {
20191 .type = PM_TOKEN_STRING_CONTENT,
20192 .start = parser->previous.end,
20193 .end = parser->previous.end
20194 };
20195
20196 parser_lex(parser);
20197
20198 pm_regular_expression_node_t *node = pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
20199 pm_node_flag_set(UP(node), pm_regexp_parse(parser, node, NULL, NULL));
20200 return UP(node);
20201 }
20202
20204
20205 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20206 // In this case we've hit string content so we know the regular
20207 // expression at least has something in it. We'll need to check if the
20208 // following token is the end (in which case we can return a plain
20209 // regular expression) or if it's not then it has interpolation.
20210 pm_string_t unescaped = parser->current_string;
20211 pm_token_t content = parser->current;
20212 parser_lex(parser);
20213
20214 // If we hit an end, then we can create a regular expression
20215 // node without interpolation, which can be represented more
20216 // succinctly and more easily compiled.
20217 if (accept1(parser, PM_TOKEN_REGEXP_END)) {
20218 pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
20219
20220 // If we're not immediately followed by a =~, then we
20221 // parse and validate now. If it is followed by a =~,
20222 // then it will get parsed in the =~ handler where
20223 // named captures can also be extracted.
20224 if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
20225 pm_node_flag_set(UP(node), pm_regexp_parse(parser, node, NULL, NULL));
20226 }
20227
20228 return UP(node);
20229 }
20230
20231 // If we get here, then we have interpolation so we'll need to create
20232 // a regular expression node with interpolation.
20233 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20234
20235 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
20236 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
20237 // This is extremely strange, but the first string part of a
20238 // regular expression will always be tagged as binary if we
20239 // are in a US-ASCII file, no matter its contents.
20240 pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
20241 }
20242
20243 pm_interpolated_regular_expression_node_append(parser->arena, interpolated, part);
20244 } else {
20245 // If the first part of the body of the regular expression is not a
20246 // string content, then we have interpolation and we need to create an
20247 // interpolated regular expression node.
20248 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20249 }
20250
20251 // Now that we're here and we have interpolation, we'll parse all of the
20252 // parts into the list.
20253 pm_node_t *part;
20254 while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
20255 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20256 pm_interpolated_regular_expression_node_append(parser->arena, interpolated, part);
20257 }
20258 }
20259
20260 pm_token_t closing = parser->current;
20261 if (match1(parser, PM_TOKEN_EOF)) {
20262 pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
20263 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
20264 } else {
20265 expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
20266 }
20267
20268 pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
20269 return UP(interpolated);
20270 }
20271 case PM_TOKEN_BACKTICK:
20272 case PM_TOKEN_PERCENT_LOWER_X: {
20273 parser_lex(parser);
20274 pm_token_t opening = parser->previous;
20275
20276 // When we get here, we don't know if this string is going to have
20277 // interpolation or not, even though it is allowed. Still, we want to be
20278 // able to return a string node without interpolation if we can since
20279 // it'll be faster.
20280 if (match1(parser, PM_TOKEN_STRING_END)) {
20281 // If we get here, then we have an end immediately after a start. In
20282 // that case we'll create an empty content token and return an
20283 // uninterpolated string.
20284 pm_token_t content = (pm_token_t) {
20285 .type = PM_TOKEN_STRING_CONTENT,
20286 .start = parser->previous.end,
20287 .end = parser->previous.end
20288 };
20289
20290 parser_lex(parser);
20291 return UP(pm_xstring_node_create(parser, &opening, &content, &parser->previous));
20292 }
20293
20295
20296 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20297 // In this case we've hit string content so we know the string
20298 // at least has something in it. We'll need to check if the
20299 // following token is the end (in which case we can return a
20300 // plain string) or if it's not then it has interpolation.
20301 pm_string_t unescaped = parser->current_string;
20302 pm_token_t content = parser->current;
20303 parser_lex(parser);
20304
20305 if (match1(parser, PM_TOKEN_STRING_END)) {
20306 pm_node_t *node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
20307 pm_node_flag_set(node, parse_unescaped_encoding(parser));
20308 parser_lex(parser);
20309 return node;
20310 }
20311
20312 // If we get here, then we have interpolation so we'll need to
20313 // create a string node with interpolation.
20314 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20315
20316 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
20317 pm_node_flag_set(part, parse_unescaped_encoding(parser));
20318
20319 pm_interpolated_xstring_node_append(parser->arena, node, part);
20320 } else {
20321 // If the first part of the body of the string is not a string
20322 // content, then we have interpolation and we need to create an
20323 // interpolated string node.
20324 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20325 }
20326
20327 pm_node_t *part;
20328 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20329 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20330 pm_interpolated_xstring_node_append(parser->arena, node, part);
20331 }
20332 }
20333
20334 pm_token_t closing = parser->current;
20335 if (match1(parser, PM_TOKEN_EOF)) {
20336 pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
20337 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
20338 } else {
20339 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
20340 }
20341 pm_interpolated_xstring_node_closing_set(parser, node, &closing);
20342
20343 return UP(node);
20344 }
20345 case PM_TOKEN_USTAR: {
20346 parser_lex(parser);
20347
20348 // * operators at the beginning of expressions are only valid in the
20349 // context of a multiple assignment. We enforce that here. We'll
20350 // still lex past it though and create a missing node place.
20351 if (binding_power != PM_BINDING_POWER_STATEMENT) {
20352 pm_parser_err_prefix(parser, diag_id);
20353 return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
20354 }
20355
20356 pm_token_t operator = parser->previous;
20357 pm_node_t *name = NULL;
20358
20359 if (token_begins_expression_p(parser->current.type)) {
20360 name = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
20361 }
20362
20363 pm_node_t *splat = UP(pm_splat_node_create(parser, &operator, name));
20364
20365 if (match1(parser, PM_TOKEN_COMMA)) {
20366 return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
20367 } else {
20368 return parse_target_validate(parser, splat, true);
20369 }
20370 }
20371 case PM_TOKEN_BANG: {
20372 if (binding_power > PM_BINDING_POWER_UNARY) {
20373 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20374 }
20375
20376 parser_lex(parser);
20377
20378 pm_token_t operator = parser->previous;
20379 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (binding_power < PM_BINDING_POWER_MATCH ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20380 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
20381
20382 pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
20383 return UP(node);
20384 }
20385 case PM_TOKEN_TILDE: {
20386 if (binding_power > PM_BINDING_POWER_UNARY) {
20387 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20388 }
20389 parser_lex(parser);
20390
20391 pm_token_t operator = parser->previous;
20392 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20393 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
20394
20395 return UP(node);
20396 }
20397 case PM_TOKEN_UMINUS: {
20398 if (binding_power > PM_BINDING_POWER_UNARY) {
20399 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20400 }
20401 parser_lex(parser);
20402
20403 pm_token_t operator = parser->previous;
20404 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20405 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
20406
20407 return UP(node);
20408 }
20409 case PM_TOKEN_UMINUS_NUM: {
20410 parser_lex(parser);
20411
20412 pm_token_t operator = parser->previous;
20413 pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20414
20415 if (accept1(parser, PM_TOKEN_STAR_STAR)) {
20416 pm_token_t exponent_operator = parser->previous;
20417 pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
20418 node = UP(pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0));
20419 node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
20420 } else {
20421 switch (PM_NODE_TYPE(node)) {
20422 case PM_INTEGER_NODE:
20423 case PM_FLOAT_NODE:
20424 case PM_RATIONAL_NODE:
20425 case PM_IMAGINARY_NODE:
20426 parse_negative_numeric(node);
20427 break;
20428 default:
20429 node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
20430 break;
20431 }
20432 }
20433
20434 return node;
20435 }
20436 case PM_TOKEN_MINUS_GREATER: {
20437 int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting;
20438 parser->lambda_enclosure_nesting = parser->enclosure_nesting;
20439
20440 size_t opening_newline_index = token_newline_index(parser);
20441 pm_accepts_block_stack_push(parser, true);
20442 parser_lex(parser);
20443
20444 pm_token_t operator = parser->previous;
20445 pm_parser_scope_push(parser, false);
20446
20447 pm_block_parameters_node_t *block_parameters;
20448
20449 switch (parser->current.type) {
20450 case PM_TOKEN_PARENTHESIS_LEFT: {
20451 pm_token_t opening = parser->current;
20452 parser_lex(parser);
20453
20454 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
20455 block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
20456 } else {
20457 block_parameters = parse_block_parameters(parser, false, &opening, true, true, (uint16_t) (depth + 1));
20458 }
20459
20460 accept1(parser, PM_TOKEN_NEWLINE);
20461 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
20462
20463 pm_block_parameters_node_closing_set(parser, block_parameters, &parser->previous);
20464 break;
20465 }
20466 case PM_CASE_PARAMETER: {
20467 pm_accepts_block_stack_push(parser, false);
20468 block_parameters = parse_block_parameters(parser, false, NULL, true, false, (uint16_t) (depth + 1));
20469 pm_accepts_block_stack_pop(parser);
20470 break;
20471 }
20472 default: {
20473 block_parameters = NULL;
20474 break;
20475 }
20476 }
20477
20478 pm_token_t opening;
20479 pm_node_t *body = NULL;
20480 parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
20481
20482 if (accept1(parser, PM_TOKEN_LAMBDA_BEGIN)) {
20483 opening = parser->previous;
20484
20485 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
20486 body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1)));
20487 }
20488
20489 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20490 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE, &opening);
20491 } else {
20492 expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
20493 opening = parser->previous;
20494
20495 if (!match3(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20496 body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1)));
20497 }
20498
20499 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20500 assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
20501 body = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1)));
20502 } else {
20503 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20504 }
20505
20506 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END, &operator);
20507 }
20508
20509 pm_constant_id_list_t locals;
20510 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
20511 pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &operator, &parser->previous);
20512
20513 pm_parser_scope_pop(parser);
20514 pm_accepts_block_stack_pop(parser);
20515
20516 return UP(pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body));
20517 }
20518 case PM_TOKEN_UPLUS: {
20519 if (binding_power > PM_BINDING_POWER_UNARY) {
20520 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20521 }
20522 parser_lex(parser);
20523
20524 pm_token_t operator = parser->previous;
20525 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20526 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
20527
20528 return UP(node);
20529 }
20530 case PM_TOKEN_STRING_BEGIN:
20531 return parse_strings(parser, NULL, flags & PM_PARSE_ACCEPTS_LABEL, (uint16_t) (depth + 1));
20532 case PM_TOKEN_SYMBOL_BEGIN: {
20533 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
20534 parser_lex(parser);
20535
20536 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END, (uint16_t) (depth + 1));
20537 }
20538 default: {
20539 pm_context_t recoverable = context_recoverable(parser, &parser->current);
20540
20541 if (recoverable != PM_CONTEXT_NONE) {
20542 parser->recovering = true;
20543
20544 // If the given error is not the generic one, then we'll add it
20545 // here because it will provide more context in addition to the
20546 // recoverable error that we will also add.
20547 if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
20548 pm_parser_err_prefix(parser, diag_id);
20549 }
20550
20551 // If we get here, then we are assuming this token is closing a
20552 // parent context, so we'll indicate that to the user so that
20553 // they know how we behaved.
20554 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_str(parser->current.type), context_human(recoverable));
20555 } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
20556 // We're going to make a special case here, because "cannot
20557 // parse expression" is pretty generic, and we know here that we
20558 // have an unexpected token.
20559 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_str(parser->current.type));
20560 } else {
20561 pm_parser_err_prefix(parser, diag_id);
20562 }
20563
20564 return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
20565 }
20566 }
20567}
20568
20578static pm_node_t *
20579parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
20580 pm_node_t *value = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? (flags & PM_PARSE_ACCEPTS_COMMAND_CALL) : (previous_binding_power < PM_BINDING_POWER_MATCH ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0))), diag_id, (uint16_t) (depth + 1));
20581
20582 // Assignments whose value is a command call (e.g., a = b c) can only
20583 // be followed by modifiers (if/unless/while/until/rescue) and not by
20584 // operators with higher binding power. If we find one, emit an error
20585 // and skip the operator and its right-hand side.
20586 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER && (pm_command_call_value_p(value) || pm_block_call_p(value))) {
20587 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
20588 parser_lex(parser);
20589 parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20590 }
20591
20592 // Contradicting binding powers, the right-hand-side value of the assignment
20593 // allows the `rescue` modifier.
20594 if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20595 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20596
20597 pm_token_t rescue = parser->current;
20598 parser_lex(parser);
20599
20600 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20601 context_pop(parser);
20602
20603 return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
20604 }
20605
20606 return value;
20607}
20608
20613static void
20614parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
20615 switch (PM_NODE_TYPE(node)) {
20616 case PM_BEGIN_NODE: {
20617 const pm_begin_node_t *cast = (const pm_begin_node_t *) node;
20618 if (cast->statements != NULL) parse_assignment_value_local(parser, (const pm_node_t *) cast->statements);
20619 break;
20620 }
20621 case PM_LOCAL_VARIABLE_WRITE_NODE: {
20623 pm_locals_read(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
20624 break;
20625 }
20626 case PM_PARENTHESES_NODE: {
20627 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
20628 if (cast->body != NULL) parse_assignment_value_local(parser, cast->body);
20629 break;
20630 }
20631 case PM_STATEMENTS_NODE: {
20632 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
20633 const pm_node_t *statement;
20634
20635 PM_NODE_LIST_FOREACH(&cast->body, index, statement) {
20636 parse_assignment_value_local(parser, statement);
20637 }
20638 break;
20639 }
20640 default:
20641 break;
20642 }
20643}
20644
20657static pm_node_t *
20658parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
20659 bool permitted = true;
20660 if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
20661
20662 pm_node_t *value = parse_starred_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? (flags & PM_PARSE_ACCEPTS_COMMAND_CALL) : (previous_binding_power < PM_BINDING_POWER_MODIFIER ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0))), diag_id, (uint16_t) (depth + 1));
20663 if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
20664
20665 parse_assignment_value_local(parser, value);
20666 bool single_value = true;
20667
20668 // Block calls (command call + do block, e.g., `foo bar do end`) cannot
20669 // be followed by a comma to form a multi-value RHS because each element
20670 // of a multi-value assignment must be an `arg`, not a `block_call`.
20671 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && !pm_block_call_p(value) && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
20672 single_value = false;
20673
20674 pm_array_node_t *array = pm_array_node_create(parser, NULL);
20675 pm_array_node_elements_append(parser->arena, array, value);
20676 value = UP(array);
20677
20678 while (accept1(parser, PM_TOKEN_COMMA)) {
20679 pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
20680
20681 pm_array_node_elements_append(parser->arena, array, element);
20682 if (PM_NODE_TYPE_P(element, PM_ERROR_RECOVERY_NODE)) break;
20683
20684 parse_assignment_value_local(parser, element);
20685 }
20686 }
20687
20688 // Assignments whose value is a command call (e.g., a = b c) can only
20689 // be followed by modifiers (if/unless/while/until/rescue) and not by
20690 // operators with higher binding power. If we find one, emit an error
20691 // and skip the operator and its right-hand side.
20692 if (single_value && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER && (pm_command_call_value_p(value) || pm_block_call_p(value))) {
20693 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
20694 parser_lex(parser);
20695 parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20696 }
20697
20698 // Contradicting binding powers, the right-hand-side value of the assignment
20699 // allows the `rescue` modifier.
20700 if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20701 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20702
20703 pm_token_t rescue = parser->current;
20704 parser_lex(parser);
20705
20706 bool accepts_command_call_inner = false;
20707
20708 // RHS can accept command call iff the value is a call with arguments
20709 // but without parenthesis.
20710 if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
20711 pm_call_node_t *call_node = (pm_call_node_t *) value;
20712 if ((call_node->arguments != NULL) && (call_node->opening_loc.length == 0)) {
20713 accepts_command_call_inner = true;
20714 }
20715 }
20716
20717 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (accepts_command_call_inner ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20718 context_pop(parser);
20719
20720 return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
20721 }
20722
20723 return value;
20724}
20725
20733static void
20734parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
20735 if (call_node->arguments != NULL) {
20736 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
20737 pm_node_unreference(parser, UP(call_node->arguments));
20738 call_node->arguments = NULL;
20739 }
20740
20741 if (call_node->block != NULL) {
20742 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
20743 pm_node_unreference(parser, UP(call_node->block));
20744 call_node->block = NULL;
20745 }
20746}
20747
20748static PRISM_INLINE const uint8_t *
20749pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20750 cursor++;
20751
20752 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20753 uint8_t value = escape_hexadecimal_digit(*cursor);
20754 cursor++;
20755
20756 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20757 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(*cursor));
20758 cursor++;
20759 }
20760
20761 pm_buffer_append_byte(unescaped, value);
20762 } else {
20763 pm_buffer_append_string(unescaped, "\\x", 2);
20764 }
20765
20766 return cursor;
20767}
20768
20769static PRISM_INLINE const uint8_t *
20770pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20771 uint8_t value = (uint8_t) (*cursor - '0');
20772 cursor++;
20773
20774 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20775 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20776 cursor++;
20777
20778 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20779 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20780 cursor++;
20781 }
20782 }
20783
20784 pm_buffer_append_byte(unescaped, value);
20785 return cursor;
20786}
20787
20788static PRISM_INLINE const uint8_t *
20789pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end, const pm_location_t *error_location) {
20790 const uint8_t *start = cursor - 1;
20791 cursor++;
20792
20793 if (cursor >= end) {
20794 pm_buffer_append_string(unescaped, "\\u", 2);
20795 return cursor;
20796 }
20797
20798 if (*cursor != '{') {
20799 size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
20800 uint32_t value = escape_unicode(parser, cursor, length, error_location, 0);
20801
20802 if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
20803 pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start));
20804 }
20805
20806 return cursor + length;
20807 }
20808
20809 cursor++;
20810 for (;;) {
20811 while (cursor < end && *cursor == ' ') cursor++;
20812
20813 if (cursor >= end) break;
20814 if (*cursor == '}') {
20815 cursor++;
20816 break;
20817 }
20818
20819 size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor);
20820 if (length == 0) {
20821 break;
20822 }
20823 uint32_t value = escape_unicode(parser, cursor, length, error_location, 0);
20824
20825 (void) pm_buffer_append_unicode_codepoint(unescaped, value);
20826 cursor += length;
20827 }
20828
20829 return cursor;
20830}
20831
20832static void
20833pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor, const pm_location_t *error_location) {
20834 const uint8_t *end = source + length;
20835 pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source));
20836
20837 for (;;) {
20838 if (++cursor >= end) {
20839 pm_buffer_append_byte(unescaped, '\\');
20840 return;
20841 }
20842
20843 switch (*cursor) {
20844 case 'x':
20845 cursor = pm_named_capture_escape_hex(unescaped, cursor, end);
20846 break;
20847 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
20848 cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
20849 break;
20850 case 'u':
20851 cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end, error_location);
20852 break;
20853 default:
20854 pm_buffer_append_byte(unescaped, '\\');
20855 break;
20856 }
20857
20858 const uint8_t *next_cursor = pm_memchr(cursor, '\\', (size_t) (end - cursor), parser->encoding_changed, parser->encoding);
20859 if (next_cursor == NULL) break;
20860
20861 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (next_cursor - cursor));
20862 cursor = next_cursor;
20863 }
20864
20865 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (end - cursor));
20866}
20867
20872static void
20873parse_regular_expression_named_capture(pm_parser_t *parser, const pm_string_t *capture, bool shared, pm_regexp_name_data_t *callback_data) {
20874 pm_call_node_t *call = callback_data->call;
20875 pm_constant_id_list_t *names = &callback_data->names;
20876
20877 const uint8_t *source = pm_string_source(capture);
20878 size_t length = pm_string_length(capture);
20879 pm_buffer_t unescaped = { 0 };
20880
20881 // First, we need to handle escapes within the name of the capture group.
20882 // This is because regular expressions have three different representations
20883 // in prism. The first is the plain source code. The second is the
20884 // representation that will be sent to the regular expression engine, which
20885 // is the value of the "unescaped" field. This is poorly named, because it
20886 // actually still contains escapes, just a subset of them that the regular
20887 // expression engine knows how to handle. The third representation is fully
20888 // unescaped, which is what we need.
20889 const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding);
20890 if (PRISM_UNLIKELY(cursor != NULL)) {
20891 pm_named_capture_escape(parser, &unescaped, source, length, cursor, shared ? NULL : &call->receiver->location);
20892 source = (const uint8_t *) pm_buffer_value(&unescaped);
20893 length = pm_buffer_length(&unescaped);
20894 }
20895
20896 const uint8_t *start;
20897 const uint8_t *end;
20898 pm_constant_id_t name;
20899
20900 // If the name of the capture group isn't a valid identifier, we do
20901 // not add it to the local table.
20902 if (!pm_slice_is_valid_local(parser, source, source + length)) {
20903 pm_buffer_cleanup(&unescaped);
20904 return;
20905 }
20906
20907 if (shared) {
20908 // If the unescaped string is a slice of the source, then we can
20909 // copy the names directly. The pointers will line up.
20910 start = source;
20911 end = source + length;
20912 name = pm_parser_constant_id_raw(parser, start, end);
20913 } else {
20914 // Otherwise, the name is a slice of the malloc-ed owned string,
20915 // in which case we need to copy it out into a new string.
20916 start = parser->start + PM_NODE_START(call->receiver);
20917 end = parser->start + PM_NODE_END(call->receiver);
20918
20919 uint8_t *memory = (uint8_t *) pm_arena_alloc(parser->arena, length, 1);
20920 memcpy(memory, source, length);
20921 name = pm_parser_constant_id_owned(parser, memory, length);
20922 }
20923
20924 // Add this name to the list of constants if it is valid, not duplicated,
20925 // and not a keyword.
20926 if (name != 0 && !pm_constant_id_list_includes(names, name)) {
20927 pm_constant_id_list_append(parser->arena, names, name);
20928
20929 int depth;
20930 if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
20931 // If the local is not already a local but it is a keyword, then we
20932 // do not want to add a capture for this.
20933 if (pm_local_is_keyword((const char *) source, length)) {
20934 pm_buffer_cleanup(&unescaped);
20935 return;
20936 }
20937
20938 // If the identifier is not already a local, then we will add it to
20939 // the local table.
20940 pm_parser_local_add(parser, name, start, end, 0);
20941 }
20942
20943 // Here we lazily create the MatchWriteNode since we know we're
20944 // about to add a target.
20945 if (callback_data->match == NULL) {
20946 callback_data->match = pm_match_write_node_create(parser, call);
20947 }
20948
20949 // Next, create the local variable target and add it to the list of
20950 // targets for the match.
20951 pm_node_t *target = UP(pm_local_variable_target_node_create(parser, &TOK2LOC(parser, &((pm_token_t) { .type = 0, .start = start, .end = end })), name, depth == -1 ? 0 : (uint32_t) depth));
20952 pm_node_list_append(parser->arena, &callback_data->match->targets, target);
20953 }
20954
20955 pm_buffer_cleanup(&unescaped);
20956}
20957
20963static pm_node_t *
20964parse_interpolated_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
20965 pm_regexp_name_data_t callback_data = {
20966 .call = call,
20967 .match = NULL,
20968 .names = { 0 },
20969 };
20970
20971 pm_regexp_parse_named_captures(parser, pm_string_source(content), pm_string_length(content), false, extended_mode, parse_regular_expression_named_capture, &callback_data);
20972
20973 if (callback_data.match != NULL) {
20974 return UP(callback_data.match);
20975 } else {
20976 return UP(call);
20977 }
20978}
20979
20980static PRISM_INLINE pm_node_t *
20981parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, uint8_t flags, uint16_t depth) {
20982 pm_token_t token = parser->current;
20983
20984 switch (token.type) {
20985 case PM_TOKEN_EQUAL: {
20986 switch (PM_NODE_TYPE(node)) {
20987 case PM_CALL_NODE: {
20988 // If we have no arguments to the call node and we need this
20989 // to be a target then this is either a method call or a
20990 // local variable write. This _must_ happen before the value
20991 // is parsed because it could be referenced in the value.
20992 pm_call_node_t *call_node = (pm_call_node_t *) node;
20993 if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20994 pm_parser_local_add_location(parser, &call_node->message_loc, 0);
20995 }
20996 }
20998 case PM_CASE_WRITABLE: {
20999 // When we have `it = value`, we need to add `it` as a local
21000 // variable before parsing the value, in case the value
21001 // references the variable.
21002 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
21003 pm_parser_local_add_location(parser, &node->location, 0);
21004 }
21005
21006 parser_lex(parser);
21007 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21008
21009 if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
21010 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
21011 }
21012
21013 return parse_write(parser, node, &token, value);
21014 }
21015 case PM_SPLAT_NODE: {
21016 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
21017 pm_multi_target_node_targets_append(parser, multi_target, node);
21018
21019 parser_lex(parser);
21020 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21021 return parse_write(parser, UP(multi_target), &token, value);
21022 }
21023 case PM_SOURCE_ENCODING_NODE:
21024 case PM_FALSE_NODE:
21025 case PM_SOURCE_FILE_NODE:
21026 case PM_SOURCE_LINE_NODE:
21027 case PM_NIL_NODE:
21028 case PM_SELF_NODE:
21029 case PM_TRUE_NODE: {
21030 // In these special cases, we have specific error messages
21031 // and we will replace them with local variable writes.
21032 parser_lex(parser);
21033 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21034 return parse_unwriteable_write(parser, node, &token, value);
21035 }
21036 default:
21037 // In this case we have an = sign, but we don't know what
21038 // it's for. We need to treat it as an error. We'll mark it
21039 // as an error and skip past it.
21040 parser_lex(parser);
21041 pm_parser_err_token(parser, &token, PM_ERR_EXPRESSION_NOT_WRITABLE);
21042 return node;
21043 }
21044 }
21045 case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL: {
21046 switch (PM_NODE_TYPE(node)) {
21047 case PM_BACK_REFERENCE_READ_NODE:
21048 case PM_NUMBERED_REFERENCE_READ_NODE:
21049 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21051 case PM_GLOBAL_VARIABLE_READ_NODE: {
21052 parser_lex(parser);
21053
21054 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21055 pm_node_t *result = UP(pm_global_variable_and_write_node_create(parser, node, &token, value));
21056
21057 return result;
21058 }
21059 case PM_CLASS_VARIABLE_READ_NODE: {
21060 parser_lex(parser);
21061
21062 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21063 pm_node_t *result = UP(pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
21064
21065 return result;
21066 }
21067 case PM_CONSTANT_PATH_NODE: {
21068 parser_lex(parser);
21069
21070 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21071 pm_node_t *write = UP(pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
21072
21073 return parse_shareable_constant_write(parser, write);
21074 }
21075 case PM_CONSTANT_READ_NODE: {
21076 parser_lex(parser);
21077
21078 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21079 pm_node_t *write = UP(pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
21080
21081 return parse_shareable_constant_write(parser, write);
21082 }
21083 case PM_INSTANCE_VARIABLE_READ_NODE: {
21084 parser_lex(parser);
21085
21086 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21087 pm_node_t *result = UP(pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
21088
21089 return result;
21090 }
21091 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21092 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21093 parser_lex(parser);
21094
21095 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21096 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0));
21097
21098 pm_node_unreference(parser, node);
21099 return result;
21100 }
21101 case PM_LOCAL_VARIABLE_READ_NODE: {
21102 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
21103 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.length, PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + node->location.start);
21104 pm_node_unreference(parser, node);
21105 }
21106
21108 parser_lex(parser);
21109
21110 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21111 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth));
21112
21113 return result;
21114 }
21115 case PM_CALL_NODE: {
21116 pm_call_node_t *cast = (pm_call_node_t *) node;
21117
21118 // If we have a vcall (a method with no arguments and no
21119 // receiver that could have been a local variable) then we
21120 // will transform it into a local variable write.
21121 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21122 pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
21123 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
21124 parser_lex(parser);
21125
21126 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21127 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
21128
21129 return result;
21130 }
21131
21132 // Move past the token here so that we have already added
21133 // the local variable by this point.
21134 parser_lex(parser);
21135
21136 // If there is no call operator and the message is "[]" then
21137 // this is an aref expression, and we can transform it into
21138 // an aset expression.
21139 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21140 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21141 return UP(pm_index_and_write_node_create(parser, cast, &token, value));
21142 }
21143
21144 // If this node cannot be writable, then we have an error.
21145 if (pm_call_node_writable_p(parser, cast)) {
21146 parse_write_name(parser, &cast->name);
21147 } else {
21148 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21149 }
21150
21151 parse_call_operator_write(parser, cast, &token);
21152 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21153 return UP(pm_call_and_write_node_create(parser, cast, &token, value));
21154 }
21155 case PM_MULTI_WRITE_NODE: {
21156 parser_lex(parser);
21157 pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
21158 return node;
21159 }
21160 default:
21161 parser_lex(parser);
21162
21163 // In this case we have an &&= sign, but we don't know what it's for.
21164 // We need to treat it as an error. For now, we'll mark it as an error
21165 // and just skip right past it.
21166 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
21167 return node;
21168 }
21169 }
21170 case PM_TOKEN_PIPE_PIPE_EQUAL: {
21171 switch (PM_NODE_TYPE(node)) {
21172 case PM_BACK_REFERENCE_READ_NODE:
21173 case PM_NUMBERED_REFERENCE_READ_NODE:
21174 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21176 case PM_GLOBAL_VARIABLE_READ_NODE: {
21177 parser_lex(parser);
21178
21179 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21180 pm_node_t *result = UP(pm_global_variable_or_write_node_create(parser, node, &token, value));
21181
21182 return result;
21183 }
21184 case PM_CLASS_VARIABLE_READ_NODE: {
21185 parser_lex(parser);
21186
21187 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21188 pm_node_t *result = UP(pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
21189
21190 return result;
21191 }
21192 case PM_CONSTANT_PATH_NODE: {
21193 parser_lex(parser);
21194
21195 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21196 pm_node_t *write = UP(pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
21197
21198 return parse_shareable_constant_write(parser, write);
21199 }
21200 case PM_CONSTANT_READ_NODE: {
21201 parser_lex(parser);
21202
21203 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21204 pm_node_t *write = UP(pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
21205
21206 return parse_shareable_constant_write(parser, write);
21207 }
21208 case PM_INSTANCE_VARIABLE_READ_NODE: {
21209 parser_lex(parser);
21210
21211 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21212 pm_node_t *result = UP(pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
21213
21214 return result;
21215 }
21216 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21217 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21218 parser_lex(parser);
21219
21220 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21221 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0));
21222
21223 pm_node_unreference(parser, node);
21224 return result;
21225 }
21226 case PM_LOCAL_VARIABLE_READ_NODE: {
21227 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
21228 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(node));
21229 pm_node_unreference(parser, node);
21230 }
21231
21233 parser_lex(parser);
21234
21235 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21236 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth));
21237
21238 return result;
21239 }
21240 case PM_CALL_NODE: {
21241 pm_call_node_t *cast = (pm_call_node_t *) node;
21242
21243 // If we have a vcall (a method with no arguments and no
21244 // receiver that could have been a local variable) then we
21245 // will transform it into a local variable write.
21246 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21247 pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
21248 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
21249 parser_lex(parser);
21250
21251 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21252 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
21253
21254 return result;
21255 }
21256
21257 // Move past the token here so that we have already added
21258 // the local variable by this point.
21259 parser_lex(parser);
21260
21261 // If there is no call operator and the message is "[]" then
21262 // this is an aref expression, and we can transform it into
21263 // an aset expression.
21264 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21265 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21266 return UP(pm_index_or_write_node_create(parser, cast, &token, value));
21267 }
21268
21269 // If this node cannot be writable, then we have an error.
21270 if (pm_call_node_writable_p(parser, cast)) {
21271 parse_write_name(parser, &cast->name);
21272 } else {
21273 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21274 }
21275
21276 parse_call_operator_write(parser, cast, &token);
21277 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21278 return UP(pm_call_or_write_node_create(parser, cast, &token, value));
21279 }
21280 case PM_MULTI_WRITE_NODE: {
21281 parser_lex(parser);
21282 pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
21283 return node;
21284 }
21285 default:
21286 parser_lex(parser);
21287
21288 // In this case we have an ||= sign, but we don't know what it's for.
21289 // We need to treat it as an error. For now, we'll mark it as an error
21290 // and just skip right past it.
21291 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
21292 return node;
21293 }
21294 }
21295 case PM_TOKEN_AMPERSAND_EQUAL:
21296 case PM_TOKEN_CARET_EQUAL:
21297 case PM_TOKEN_GREATER_GREATER_EQUAL:
21298 case PM_TOKEN_LESS_LESS_EQUAL:
21299 case PM_TOKEN_MINUS_EQUAL:
21300 case PM_TOKEN_PERCENT_EQUAL:
21301 case PM_TOKEN_PIPE_EQUAL:
21302 case PM_TOKEN_PLUS_EQUAL:
21303 case PM_TOKEN_SLASH_EQUAL:
21304 case PM_TOKEN_STAR_EQUAL:
21305 case PM_TOKEN_STAR_STAR_EQUAL: {
21306 switch (PM_NODE_TYPE(node)) {
21307 case PM_BACK_REFERENCE_READ_NODE:
21308 case PM_NUMBERED_REFERENCE_READ_NODE:
21309 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21311 case PM_GLOBAL_VARIABLE_READ_NODE: {
21312 parser_lex(parser);
21313
21314 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21315 pm_node_t *result = UP(pm_global_variable_operator_write_node_create(parser, node, &token, value));
21316
21317 return result;
21318 }
21319 case PM_CLASS_VARIABLE_READ_NODE: {
21320 parser_lex(parser);
21321
21322 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21323 pm_node_t *result = UP(pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
21324
21325 return result;
21326 }
21327 case PM_CONSTANT_PATH_NODE: {
21328 parser_lex(parser);
21329
21330 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21331 pm_node_t *write = UP(pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
21332
21333 return parse_shareable_constant_write(parser, write);
21334 }
21335 case PM_CONSTANT_READ_NODE: {
21336 parser_lex(parser);
21337
21338 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21339 pm_node_t *write = UP(pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
21340
21341 return parse_shareable_constant_write(parser, write);
21342 }
21343 case PM_INSTANCE_VARIABLE_READ_NODE: {
21344 parser_lex(parser);
21345
21346 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21347 pm_node_t *result = UP(pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
21348
21349 return result;
21350 }
21351 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21352 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21353 parser_lex(parser);
21354
21355 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21356 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0));
21357
21358 pm_node_unreference(parser, node);
21359 return result;
21360 }
21361 case PM_LOCAL_VARIABLE_READ_NODE: {
21362 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
21363 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(node));
21364 pm_node_unreference(parser, node);
21365 }
21366
21368 parser_lex(parser);
21369
21370 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21371 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth));
21372
21373 return result;
21374 }
21375 case PM_CALL_NODE: {
21376 parser_lex(parser);
21377 pm_call_node_t *cast = (pm_call_node_t *) node;
21378
21379 // If we have a vcall (a method with no arguments and no
21380 // receiver that could have been a local variable) then we
21381 // will transform it into a local variable write.
21382 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21383 pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
21384 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
21385 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21386 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
21387
21388 return result;
21389 }
21390
21391 // If there is no call operator and the message is "[]" then
21392 // this is an aref expression, and we can transform it into
21393 // an aset expression.
21394 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21395 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21396 return UP(pm_index_operator_write_node_create(parser, cast, &token, value));
21397 }
21398
21399 // If this node cannot be writable, then we have an error.
21400 if (pm_call_node_writable_p(parser, cast)) {
21401 parse_write_name(parser, &cast->name);
21402 } else {
21403 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21404 }
21405
21406 parse_call_operator_write(parser, cast, &token);
21407 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21408 return UP(pm_call_operator_write_node_create(parser, cast, &token, value));
21409 }
21410 case PM_MULTI_WRITE_NODE: {
21411 parser_lex(parser);
21412 pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
21413 return node;
21414 }
21415 default:
21416 parser_lex(parser);
21417
21418 // In this case we have an operator but we don't know what it's for.
21419 // We need to treat it as an error. For now, we'll mark it as an error
21420 // and just skip right past it.
21421 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_str(parser->current.type));
21422 return node;
21423 }
21424 }
21425 case PM_TOKEN_AMPERSAND_AMPERSAND:
21426 case PM_TOKEN_KEYWORD_AND: {
21427 parser_lex(parser);
21428
21429 pm_node_t *right = parse_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (parser->previous.type == PM_TOKEN_KEYWORD_AND ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21430 return UP(pm_and_node_create(parser, node, &token, right));
21431 }
21432 case PM_TOKEN_KEYWORD_OR:
21433 case PM_TOKEN_PIPE_PIPE: {
21434 parser_lex(parser);
21435
21436 pm_node_t *right = parse_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (parser->previous.type == PM_TOKEN_KEYWORD_OR ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21437 return UP(pm_or_node_create(parser, node, &token, right));
21438 }
21439 case PM_TOKEN_EQUAL_TILDE: {
21440 // Note that we _must_ parse the value before adding the local
21441 // variables in order to properly mirror the behavior of Ruby. For
21442 // example,
21443 //
21444 // /(?<foo>bar)/ =~ foo
21445 //
21446 // In this case, `foo` should be a method call and not a local yet.
21447 parser_lex(parser);
21448 pm_node_t *argument = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21449
21450 // By default, we're going to create a call node and then return it.
21451 pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
21452 pm_node_t *result = UP(call);
21453
21454 // If the receiver of this =~ is a regular expression node, then we
21455 // need to introduce local variables for it based on its named
21456 // capture groups.
21457 if (PM_NODE_TYPE_P(node, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE)) {
21458 // It's possible to have an interpolated regular expression node
21459 // that only contains strings. This is because it can be split
21460 // up by a heredoc. In this case we need to concat the unescaped
21461 // strings together and then parse them as a regular expression.
21463
21464 bool interpolated = false;
21465 size_t total_length = 0;
21466
21467 pm_node_t *part;
21468 PM_NODE_LIST_FOREACH(parts, index, part) {
21469 if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
21470 total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
21471 } else {
21472 interpolated = true;
21473 break;
21474 }
21475 }
21476
21477 if (!interpolated && total_length > 0) {
21478 void *memory = xmalloc(total_length);
21479 if (!memory) abort();
21480
21481 uint8_t *cursor = memory;
21482 PM_NODE_LIST_FOREACH(parts, index, part) {
21483 pm_string_t *unescaped = &((pm_string_node_t *) part)->unescaped;
21484 size_t length = pm_string_length(unescaped);
21485
21486 memcpy(cursor, pm_string_source(unescaped), length);
21487 cursor += length;
21488 }
21489
21490 pm_string_t owned;
21491 pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
21492
21493 result = parse_interpolated_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21494 pm_string_cleanup(&owned);
21495 }
21496 } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
21497 // If we have a regular expression node, then we can parse
21498 // the named captures and validate encoding in one pass.
21500
21501 pm_regexp_name_data_t name_data = {
21502 .call = call,
21503 .match = NULL,
21504 .names = { 0 },
21505 };
21506
21507 pm_node_flag_set(UP(regexp), pm_regexp_parse(parser, regexp, parse_regular_expression_named_capture, &name_data));
21508
21509 if (name_data.match != NULL) {
21510 result = UP(name_data.match);
21511 }
21512 }
21513
21514 return result;
21515 }
21516 case PM_TOKEN_UAMPERSAND:
21517 case PM_TOKEN_USTAR:
21518 case PM_TOKEN_USTAR_STAR:
21519 // The only times this will occur are when we are in an error state,
21520 // but we'll put them in here so that errors can propagate.
21521 case PM_TOKEN_BANG_EQUAL:
21522 case PM_TOKEN_BANG_TILDE:
21523 case PM_TOKEN_EQUAL_EQUAL:
21524 case PM_TOKEN_EQUAL_EQUAL_EQUAL:
21525 case PM_TOKEN_LESS_EQUAL_GREATER:
21526 case PM_TOKEN_CARET:
21527 case PM_TOKEN_PIPE:
21528 case PM_TOKEN_AMPERSAND:
21529 case PM_TOKEN_GREATER_GREATER:
21530 case PM_TOKEN_LESS_LESS:
21531 case PM_TOKEN_MINUS:
21532 case PM_TOKEN_PLUS:
21533 case PM_TOKEN_PERCENT:
21534 case PM_TOKEN_SLASH:
21535 case PM_TOKEN_STAR:
21536 case PM_TOKEN_STAR_STAR: {
21537 parser_lex(parser);
21538 pm_token_t operator = parser->previous;
21539 switch (PM_NODE_TYPE(node)) {
21540 case PM_RESCUE_MODIFIER_NODE: {
21542 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21543 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
21544 }
21545 break;
21546 }
21547 case PM_AND_NODE: {
21548 pm_and_node_t *cast = (pm_and_node_t *) node;
21549 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21550 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
21551 }
21552 break;
21553 }
21554 case PM_OR_NODE: {
21555 pm_or_node_t *cast = (pm_or_node_t *) node;
21556 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21557 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
21558 }
21559 break;
21560 }
21561 default:
21562 break;
21563 }
21564
21565 pm_node_t *argument = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21566 return UP(pm_call_node_binary_create(parser, node, &token, argument, 0));
21567 }
21568 case PM_TOKEN_GREATER:
21569 case PM_TOKEN_GREATER_EQUAL:
21570 case PM_TOKEN_LESS:
21571 case PM_TOKEN_LESS_EQUAL: {
21572 if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
21573 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
21574 }
21575
21576 parser_lex(parser);
21577 pm_node_t *argument = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21578 return UP(pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON));
21579 }
21580 case PM_TOKEN_AMPERSAND_DOT:
21581 case PM_TOKEN_DOT: {
21582 parser_lex(parser);
21583 pm_token_t operator = parser->previous;
21584 pm_arguments_t arguments = { 0 };
21585
21586 // This if statement handles the foo.() syntax.
21587 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
21588 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21589 return UP(pm_call_node_shorthand_create(parser, node, &operator, &arguments));
21590 }
21591
21592 switch (PM_NODE_TYPE(node)) {
21593 case PM_RESCUE_MODIFIER_NODE: {
21595 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21596 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
21597 }
21598 break;
21599 }
21600 case PM_AND_NODE: {
21601 pm_and_node_t *cast = (pm_and_node_t *) node;
21602 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21603 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
21604 }
21605 break;
21606 }
21607 case PM_OR_NODE: {
21608 pm_or_node_t *cast = (pm_or_node_t *) node;
21609 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21610 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
21611 }
21612 break;
21613 }
21614 default:
21615 break;
21616 }
21617
21618 pm_token_t message;
21619
21620 switch (parser->current.type) {
21621 case PM_CASE_OPERATOR:
21622 case PM_CASE_KEYWORD:
21623 case PM_TOKEN_CONSTANT:
21624 case PM_TOKEN_IDENTIFIER:
21625 case PM_TOKEN_METHOD_NAME: {
21626 parser_lex(parser);
21627 message = parser->previous;
21628 break;
21629 }
21630 default: {
21631 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_str(parser->current.type));
21632 message = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
21633 }
21634 }
21635
21636 parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
21637 pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
21638
21639 if (
21640 (previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
21641 arguments.arguments == NULL &&
21642 arguments.opening_loc.length == 0 &&
21643 match1(parser, PM_TOKEN_COMMA)
21644 ) {
21645 return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21646 } else {
21647 return UP(call);
21648 }
21649 }
21650 case PM_TOKEN_DOT_DOT:
21651 case PM_TOKEN_DOT_DOT_DOT: {
21652 parser_lex(parser);
21653
21654 pm_node_t *right = NULL;
21655 if (token_begins_expression_p(parser->current.type)) {
21656 right = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21657 }
21658
21659 return UP(pm_range_node_create(parser, node, &token, right));
21660 }
21661 case PM_TOKEN_KEYWORD_IF_MODIFIER: {
21662 pm_token_t keyword = parser->current;
21663 parser_lex(parser);
21664
21665 pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
21666 return UP(pm_if_node_modifier_create(parser, node, &keyword, predicate));
21667 }
21668 case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: {
21669 pm_token_t keyword = parser->current;
21670 parser_lex(parser);
21671
21672 pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
21673 return UP(pm_unless_node_modifier_create(parser, node, &keyword, predicate));
21674 }
21675 case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: {
21676 parser_lex(parser);
21677 pm_statements_node_t *statements = pm_statements_node_create(parser);
21678 pm_statements_node_body_append(parser, statements, node, true);
21679
21680 pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
21681 return UP(pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
21682 }
21683 case PM_TOKEN_KEYWORD_WHILE_MODIFIER: {
21684 parser_lex(parser);
21685 pm_statements_node_t *statements = pm_statements_node_create(parser);
21686 pm_statements_node_body_append(parser, statements, node, true);
21687
21688 pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
21689 return UP(pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
21690 }
21691 case PM_TOKEN_QUESTION_MARK: {
21692 context_push(parser, PM_CONTEXT_TERNARY);
21693 pm_node_list_t current_block_exits = { 0 };
21694 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21695
21696 pm_token_t qmark = parser->current;
21697 parser_lex(parser);
21698
21699 pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
21700
21701 if (parser->recovering) {
21702 // If parsing the true expression of this ternary resulted in a syntax
21703 // error that we can recover from, then we're going to put missing nodes
21704 // and tokens into the remaining places. We want to be sure to do this
21705 // before the `expect` function call to make sure it doesn't
21706 // accidentally move past a ':' token that occurs after the syntax
21707 // error.
21708 pm_token_t colon = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
21709 pm_node_t *false_expression = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &colon), PM_TOKEN_LENGTH(&colon)));
21710
21711 context_pop(parser);
21712 pop_block_exits(parser, previous_block_exits);
21713 return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
21714 }
21715
21716 accept1(parser, PM_TOKEN_NEWLINE);
21717 expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
21718
21719 pm_token_t colon = parser->previous;
21720 pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
21721
21722 context_pop(parser);
21723 pop_block_exits(parser, previous_block_exits);
21724 return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
21725 }
21726 case PM_TOKEN_COLON_COLON: {
21727 parser_lex(parser);
21728 pm_token_t delimiter = parser->previous;
21729
21730 switch (parser->current.type) {
21731 case PM_TOKEN_CONSTANT: {
21732 parser_lex(parser);
21733 pm_node_t *path;
21734
21735 if (
21736 (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
21737 ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
21738 ) {
21739 // If we have a constant immediately following a '::' operator, then
21740 // this can either be a constant path or a method call, depending on
21741 // what follows the constant.
21742 //
21743 // If we have parentheses, then this is a method call. That would
21744 // look like Foo::Bar().
21745 pm_token_t message = parser->previous;
21746 pm_arguments_t arguments = { 0 };
21747
21748 parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
21749 path = UP(pm_call_node_call_create(parser, node, &delimiter, &message, &arguments));
21750 } else {
21751 // Otherwise, this is a constant path. That would look like Foo::Bar.
21752 path = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
21753 }
21754
21755 // If this is followed by a comma then it is a multiple assignment.
21756 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21757 return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21758 }
21759
21760 return path;
21761 }
21762 case PM_CASE_OPERATOR:
21763 case PM_CASE_KEYWORD:
21764 case PM_TOKEN_IDENTIFIER:
21765 case PM_TOKEN_METHOD_NAME: {
21766 parser_lex(parser);
21767 pm_token_t message = parser->previous;
21768
21769 // If we have an identifier following a '::' operator, then it is for
21770 // sure a method call.
21771 pm_arguments_t arguments = { 0 };
21772 parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
21773 pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21774
21775 // If this is followed by a comma then it is a multiple assignment.
21776 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21777 return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21778 }
21779
21780 return UP(call);
21781 }
21782 case PM_TOKEN_PARENTHESIS_LEFT: {
21783 // If we have a parenthesis following a '::' operator, then it is the
21784 // method call shorthand. That would look like Foo::(bar).
21785 pm_arguments_t arguments = { 0 };
21786 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21787
21788 return UP(pm_call_node_shorthand_create(parser, node, &delimiter, &arguments));
21789 }
21790 default: {
21791 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
21792 return UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
21793 }
21794 }
21795 }
21796 case PM_TOKEN_KEYWORD_RESCUE_MODIFIER: {
21797 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
21798 parser_lex(parser);
21799 accept1(parser, PM_TOKEN_NEWLINE);
21800
21801 pm_node_t *value = parse_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
21802 context_pop(parser);
21803
21804 return UP(pm_rescue_modifier_node_create(parser, node, &token, value));
21805 }
21806 case PM_TOKEN_BRACKET_LEFT: {
21807 parser_lex(parser);
21808
21809 pm_arguments_t arguments = { 0 };
21810 arguments.opening_loc = TOK2LOC(parser, &parser->previous);
21811
21812 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
21813 pm_accepts_block_stack_push(parser, true);
21814 parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint8_t) (flags & ~PM_PARSE_ACCEPTS_DO_BLOCK), (uint16_t) (depth + 1));
21815 pm_accepts_block_stack_pop(parser);
21816 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
21817 }
21818
21819 arguments.closing_loc = TOK2LOC(parser, &parser->previous);
21820
21821 // If we have a comma after the closing bracket then this is a multiple
21822 // assignment and we should parse the targets.
21823 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21824 pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
21825 return parse_targets_validate(parser, UP(aref), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21826 }
21827
21828 // If we're at the end of the arguments, we can now check if there is a
21829 // block node that starts with a {. If there is, then we can parse it and
21830 // add it to the arguments.
21831 pm_block_node_t *block = NULL;
21832 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
21833 block = parse_block(parser, (uint16_t) (depth + 1));
21834 pm_arguments_validate_block(parser, &arguments, block);
21835 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
21836 block = parse_block(parser, (uint16_t) (depth + 1));
21837 }
21838
21839 if (block != NULL) {
21840 if (arguments.block != NULL) {
21841 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_AFTER_BLOCK);
21842 if (arguments.arguments == NULL) {
21843 arguments.arguments = pm_arguments_node_create(parser);
21844 }
21845 pm_arguments_node_arguments_append(parser->arena, arguments.arguments, arguments.block);
21846 }
21847
21848 arguments.block = UP(block);
21849 }
21850
21851 return UP(pm_call_node_aref_create(parser, node, &arguments));
21852 }
21853 case PM_TOKEN_KEYWORD_IN: {
21854 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21855 parser->pattern_matching_newlines = true;
21856
21857 pm_token_t operator = parser->current;
21858 parser->command_start = false;
21859 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21860 parser_lex(parser);
21861
21862 pm_constant_id_list_t captures = { 0 };
21863 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
21864
21865 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21866
21867 return UP(pm_match_predicate_node_create(parser, node, pattern, &operator));
21868 }
21869 case PM_TOKEN_EQUAL_GREATER: {
21870 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21871 parser->pattern_matching_newlines = true;
21872
21873 pm_token_t operator = parser->current;
21874 parser->command_start = false;
21875 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21876 parser_lex(parser);
21877
21878 pm_constant_id_list_t captures = { 0 };
21879 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
21880
21881 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21882
21883 return UP(pm_match_required_node_create(parser, node, pattern, &operator));
21884 }
21885 default:
21886 assert(false && "unreachable");
21887 return NULL;
21888 }
21889}
21890
21891#undef PM_PARSE_PATTERN_SINGLE
21892#undef PM_PARSE_PATTERN_TOP
21893#undef PM_PARSE_PATTERN_MULTI
21894
21907static bool
21908parse_expression_terminator(pm_parser_t *parser, pm_node_t *node) {
21909 pm_binding_power_t left = pm_binding_powers[parser->current.type].left;
21910
21911 switch (PM_NODE_TYPE(node)) {
21912 case PM_MULTI_WRITE_NODE:
21913 case PM_RETURN_NODE:
21914 case PM_BREAK_NODE:
21915 case PM_NEXT_NODE:
21916 return left > PM_BINDING_POWER_MODIFIER;
21917 case PM_CLASS_VARIABLE_WRITE_NODE:
21918 case PM_CONSTANT_PATH_WRITE_NODE:
21919 case PM_CONSTANT_WRITE_NODE:
21920 case PM_GLOBAL_VARIABLE_WRITE_NODE:
21921 case PM_INSTANCE_VARIABLE_WRITE_NODE:
21922 case PM_LOCAL_VARIABLE_WRITE_NODE:
21923 return PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && left > PM_BINDING_POWER_MODIFIER;
21924 case PM_CALL_NODE: {
21925 // Calls with an implicit array on the right-hand side are
21926 // statements and can only be followed by modifiers.
21927 if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY)) {
21928 return left > PM_BINDING_POWER_MODIFIER;
21929 }
21930
21931 // Command-style calls (including block commands like
21932 // `foo bar do end`) can only be followed by composition
21933 // (and/or) and modifier (if/unless/etc.) operators.
21934 if (pm_command_call_value_p(node)) {
21935 return left > PM_BINDING_POWER_COMPOSITION;
21936 }
21937
21938 // A block call (command with do-block, or any call chained
21939 // from one) can only be followed by call chaining (., ::,
21940 // &.), composition (and/or), and modifier operators.
21941 if (pm_block_call_p(node)) {
21942 return left > PM_BINDING_POWER_COMPOSITION && left < PM_BINDING_POWER_CALL;
21943 }
21944
21945 return false;
21946 }
21947 case PM_SUPER_NODE:
21948 case PM_YIELD_NODE:
21949 // Command-style super/yield (without parens) can only be followed
21950 // by composition and modifier operators.
21951 if (pm_command_call_value_p(node)) {
21952 return left > PM_BINDING_POWER_COMPOSITION;
21953 }
21954 return false;
21955 case PM_DEF_NODE:
21956 // An endless method whose body is a command-style call (e.g.,
21957 // `def f = foo bar`) is a command assignment and can only be
21958 // followed by modifiers.
21959 return left > PM_BINDING_POWER_MODIFIER && pm_command_call_value_p(node);
21960 case PM_RESCUE_MODIFIER_NODE:
21961 // A rescue modifier whose handler is a pattern match (=> or in)
21962 // produces a statement and cannot be followed by operators above
21963 // the modifier level.
21964 if (left > PM_BINDING_POWER_MODIFIER) {
21966 pm_node_t *rescue_expression = cast->rescue_expression;
21967 return PM_NODE_TYPE_P(rescue_expression, PM_MATCH_REQUIRED_NODE) || PM_NODE_TYPE_P(rescue_expression, PM_MATCH_PREDICATE_NODE);
21968 }
21969 return false;
21970 default:
21971 return false;
21972 }
21973}
21974
21983static pm_node_t *
21984parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
21985 if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
21986 pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
21987 return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
21988 }
21989
21990 pm_node_t *node = parse_expression_prefix(parser, binding_power, flags, diag_id, depth);
21991
21992 // Some prefix nodes are statements and can only be followed by modifiers
21993 // (if/unless/while/until/rescue) or nothing at all. We check these cheaply
21994 // here before entering the infix loop.
21995 switch (PM_NODE_TYPE(node)) {
21996 case PM_ERROR_RECOVERY_NODE:
21997 return node;
21998 case PM_PRE_EXECUTION_NODE:
21999 return node;
22000 case PM_POST_EXECUTION_NODE:
22001 case PM_ALIAS_GLOBAL_VARIABLE_NODE:
22002 case PM_ALIAS_METHOD_NODE:
22003 case PM_UNDEF_NODE:
22004 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22005 return node;
22006 }
22007 break;
22008 case PM_CALL_NODE:
22009 case PM_SUPER_NODE:
22010 case PM_YIELD_NODE:
22011 case PM_DEF_NODE:
22012 if (parse_expression_terminator(parser, node)) {
22013 return node;
22014 }
22015 break;
22016 case PM_SYMBOL_NODE:
22017 if (pm_symbol_node_label_p(parser, node)) {
22018 return node;
22019 }
22020 break;
22021 default:
22022 break;
22023 }
22024
22025 // Look and see if the next token can be parsed as an infix operator. If it
22026 // can, then we'll parse it using parse_expression_infix.
22027 pm_binding_powers_t current_binding_powers;
22028 pm_token_type_t current_token_type;
22029
22030 while (
22031 current_token_type = parser->current.type,
22032 current_binding_powers = pm_binding_powers[current_token_type],
22033 binding_power <= current_binding_powers.left &&
22034 current_binding_powers.binary
22035 ) {
22036 node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, flags, (uint16_t) (depth + 1));
22037 if (parse_expression_terminator(parser, node)) return node;
22038
22039 // If the operator is nonassoc and we should not be able to parse the
22040 // upcoming infix operator, break.
22041 if (current_binding_powers.nonassoc) {
22042 // If this is a non-assoc operator and we are about to parse the
22043 // exact same operator, then we need to add an error.
22044 if (match1(parser, current_token_type)) {
22045 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_str(parser->current.type), pm_token_str(current_token_type));
22046 break;
22047 }
22048
22049 // If this is an endless range, then we need to reject a couple of
22050 // additional operators because it violates the normal operator
22051 // precedence rules. Those patterns are:
22052 //
22053 // 1.. & 2
22054 // 1.. * 2
22055 //
22056 if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
22057 if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) {
22058 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_str(parser->current.type), pm_token_str(current_token_type));
22059 break;
22060 }
22061
22062 if (PM_BINDING_POWER_TERM <= pm_binding_powers[parser->current.type].left) {
22063 break;
22064 }
22065 } else if (current_binding_powers.left <= pm_binding_powers[parser->current.type].left) {
22066 break;
22067 }
22068 }
22069
22070 if (flags & PM_PARSE_ACCEPTS_COMMAND_CALL) {
22071 // A command-style method call is only accepted on method chains.
22072 // Thus, we check whether the parsed node can continue method chains.
22073 // The method chain can continue if the parsed node is one of the following five kinds:
22074 // (1) index access: foo[1]
22075 // (2) attribute access: foo.bar
22076 // (3) method call with parenthesis: foo.bar(1)
22077 // (4) method call with a block: foo.bar do end
22078 // (5) constant path: foo::Bar
22079 switch (node->type) {
22080 case PM_CALL_NODE: {
22081 pm_call_node_t *cast = (pm_call_node_t *)node;
22082 if (
22083 // (1) foo[1]
22084 !(
22085 cast->call_operator_loc.length == 0 &&
22086 cast->message_loc.length > 0 &&
22087 parser->start[cast->message_loc.start] == '[' &&
22088 parser->start[cast->message_loc.start + cast->message_loc.length - 1] == ']'
22089 ) &&
22090 // (2) foo.bar
22091 !(
22092 cast->call_operator_loc.length > 0 &&
22093 cast->arguments == NULL &&
22094 cast->block == NULL &&
22095 cast->opening_loc.length == 0
22096 ) &&
22097 // (3) foo.bar(1)
22098 !(
22099 cast->call_operator_loc.length > 0 &&
22100 cast->opening_loc.length > 0
22101 ) &&
22102 // (4) foo.bar do end
22103 !(
22104 cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
22105 )
22106 ) {
22107 flags &= (uint8_t) ~PM_PARSE_ACCEPTS_COMMAND_CALL;
22108 }
22109 break;
22110 }
22111 // (5) foo::Bar
22112 case PM_CONSTANT_PATH_NODE:
22113 break;
22114 default:
22115 flags &= (uint8_t) ~PM_PARSE_ACCEPTS_COMMAND_CALL;
22116 break;
22117 }
22118 }
22119
22120 if (context_terminator(parser->current_context->context, &parser->current)) {
22121 pm_binding_powers_t next_binding_powers = pm_binding_powers[parser->current.type];
22122 if (
22123 !next_binding_powers.binary ||
22124 binding_power > next_binding_powers.left ||
22125 (PM_NODE_TYPE_P(node, PM_CALL_NODE) && pm_call_node_command_p((pm_call_node_t *) node))
22126 ) {
22127 return node;
22128 }
22129 }
22130 }
22131
22132 return node;
22133}
22134
22139static pm_statements_node_t *
22140wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
22141 if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
22142 if (statements == NULL) {
22143 statements = pm_statements_node_create(parser);
22144 }
22145
22146 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22147 pm_arguments_node_arguments_append(
22148 parser->arena,
22149 arguments,
22150 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2)))
22151 );
22152
22153 pm_statements_node_body_append(parser, statements, UP(pm_call_node_fcall_synthesized_create(
22154 parser,
22155 arguments,
22156 pm_parser_constant_id_constant(parser, "print", 5)
22157 )), true);
22158 }
22159
22160 if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
22161 if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
22162 if (statements == NULL) {
22163 statements = pm_statements_node_create(parser);
22164 }
22165
22166 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22167 pm_arguments_node_arguments_append(
22168 parser->arena,
22169 arguments,
22170 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2)))
22171 );
22172
22173 pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
22174 pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, UP(receiver), "split", arguments);
22175
22176 pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
22177 parser,
22178 pm_parser_constant_id_constant(parser, "$F", 2),
22179 UP(call)
22180 );
22181
22182 pm_statements_node_body_prepend(parser->arena, statements, UP(write));
22183 }
22184
22185 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22186 pm_arguments_node_arguments_append(
22187 parser->arena,
22188 arguments,
22189 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2)))
22190 );
22191
22192 if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
22193 pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
22194 pm_keyword_hash_node_elements_append(parser->arena, keywords, UP(pm_assoc_node_create(
22195 parser,
22196 UP(pm_symbol_node_synthesized_create(parser, "chomp")),
22197 NULL,
22198 UP(pm_true_node_synthesized_create(parser))
22199 )));
22200
22201 pm_arguments_node_arguments_append(parser->arena, arguments, UP(keywords));
22202 pm_node_flag_set(UP(arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
22203 }
22204
22205 pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
22206 pm_statements_node_body_append(parser, wrapped_statements, UP(pm_while_node_synthesized_create(
22207 parser,
22208 UP(pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4))),
22209 statements
22210 )), true);
22211
22212 statements = wrapped_statements;
22213 }
22214
22215 return statements;
22216}
22217
22221static pm_node_t *
22222parse_program(pm_parser_t *parser) {
22223 // If the current scope is NULL, then we want to push a new top level scope.
22224 // The current scope could exist in the event that we are parsing an eval
22225 // and the user has passed into scopes that already exist.
22226 if (parser->current_scope == NULL) {
22227 pm_parser_scope_push(parser, true);
22228 }
22229
22230 pm_node_list_t current_block_exits = { 0 };
22231 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
22232
22233 parser_lex(parser);
22234 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
22235
22236 if (statements != NULL && !parser->parsing_eval) {
22237 // If we have statements, then the top-level statement should be
22238 // explicitly checked as well. We have to do this here because
22239 // everywhere else we check all but the last statement.
22240 assert(statements->body.size > 0);
22241 pm_void_statement_check(parser, statements->body.nodes[statements->body.size - 1]);
22242 }
22243
22244 pm_constant_id_list_t locals;
22245 pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
22246 pm_parser_scope_pop(parser);
22247
22248 // At the top level, see if we need to wrap the statements in a program
22249 // node with a while loop based on the options.
22250 if (parser->command_line & (PM_OPTIONS_COMMAND_LINE_P | PM_OPTIONS_COMMAND_LINE_N)) {
22251 statements = wrap_statements(parser, statements);
22252 } else {
22253 flush_block_exits(parser, previous_block_exits);
22254 }
22255
22256 // If this is an empty file, then we're still going to parse all of the
22257 // statements in order to gather up all of the comments and such. Here we'll
22258 // correct the location information.
22259 if (statements == NULL) {
22260 statements = pm_statements_node_create(parser);
22261 statements->base.location = (pm_location_t) { 0 };
22262 }
22263
22264 return UP(pm_program_node_create(parser, &locals, statements));
22265}
22266
22267/******************************************************************************/
22268/* External functions */
22269/******************************************************************************/
22270
22280static const char *
22281pm_strnstr(const char *big, const char *little, size_t big_length) {
22282 size_t little_length = strlen(little);
22283
22284 for (const char *max = big + big_length - little_length; big <= max; big++) {
22285 if (*big == *little && memcmp(big, little, little_length) == 0) return big;
22286 }
22287
22288 return NULL;
22289}
22290
22291#ifdef _WIN32
22292#define pm_parser_warn_shebang_carriage_return(parser, start, length) ((void) 0)
22293#else
22299static void
22300pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
22301 if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
22302 pm_parser_warn(parser, U32(start - parser->start), U32(length), PM_WARN_SHEBANG_CARRIAGE_RETURN);
22303 }
22304}
22305#endif
22306
22311static void
22312pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const char *engine, size_t length) {
22313 const char *switches = pm_strnstr(engine, " -", length);
22314 if (switches == NULL) return;
22315
22316 pm_options_t next_options = *options;
22317 options->shebang_callback(
22318 &next_options,
22319 (const uint8_t *) (switches + 1),
22320 length - ((size_t) (switches - engine)) - 1,
22321 options->shebang_callback_data
22322 );
22323
22324 size_t encoding_length;
22325 if ((encoding_length = pm_string_length(&next_options.encoding)) > 0) {
22326 const uint8_t *encoding_source = pm_string_source(&next_options.encoding);
22327 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22328 }
22329
22330 parser->command_line = next_options.command_line;
22331 parser->frozen_string_literal = next_options.frozen_string_literal;
22332}
22333
22337void
22338pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
22339 assert(arena != NULL);
22340 assert(source != NULL);
22341
22342 *parser = (pm_parser_t) {
22343 .arena = arena,
22344 .metadata_arena = { 0 },
22345 .node_id = 0,
22346 .lex_state = PM_LEX_STATE_BEG,
22347 .enclosure_nesting = 0,
22348 .lambda_enclosure_nesting = -1,
22349 .brace_nesting = 0,
22350 .do_loop_stack = 0,
22351 .accepts_block_stack = 0,
22352 .lex_modes = {
22353 .index = 0,
22354 .stack = {{ .mode = PM_LEX_DEFAULT }},
22355 .current = &parser->lex_modes.stack[0],
22356 },
22357 .start = source,
22358 .end = source + size,
22359 .previous = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22360 .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22361 .next_start = NULL,
22362 .heredoc_end = NULL,
22363 .data_loc = { 0 },
22364 .comment_list = { 0 },
22365 .magic_comment_list = { 0 },
22366 .warning_list = { 0 },
22367 .error_list = { 0 },
22368 .current_scope = NULL,
22369 .current_context = NULL,
22370 .encoding = PM_ENCODING_UTF_8_ENTRY,
22371 .encoding_changed_callback = NULL,
22372 .encoding_comment_start = source,
22373 .lex_callback = { 0 },
22374 .filepath = { 0 },
22375 .constant_pool = { 0 },
22376 .line_offsets = { 0 },
22377 .integer = { 0 },
22378 .current_string = PM_STRING_EMPTY,
22379 .start_line = 1,
22380 .explicit_encoding = NULL,
22381 .command_line = 0,
22382 .parsing_eval = false,
22383 .partial_script = false,
22384 .command_start = true,
22385 .recovering = false,
22386 .continuable = true,
22387 .encoding_locked = false,
22388 .encoding_changed = false,
22389 .pattern_matching_newlines = false,
22390 .in_keyword_arg = false,
22391 .current_block_exits = NULL,
22392 .semantic_token_seen = false,
22393 .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
22394 .warn_mismatched_indentation = true
22395 };
22396
22397 /* Pre-size the arenas based on input size to reduce the number of block
22398 * allocations (and the kernel page zeroing they trigger). The ratios were
22399 * measured empirically: AST arena ~3.3x input, metadata arena ~1.1x input.
22400 * The reserve call is a no-op when the capacity is at or below the default
22401 * arena block size, so small inputs don't waste an extra allocation. */
22402 if (size <= SIZE_MAX / 4) pm_arena_reserve(arena, size * 4);
22403 if (size <= SIZE_MAX / 5 * 4) pm_arena_reserve(&parser->metadata_arena, size + size / 4);
22404
22405 /* Initialize the constant pool. Measured across 1532 Ruby stdlib files, the
22406 * bytes/constant ratio has a median of ~56 and a 90th percentile of ~135.
22407 * We use 120 as a balance between over-allocation waste and resize
22408 * frequency. Resizes are cheap with arena allocation, so we lean toward
22409 * under-estimating. */
22410 uint32_t constant_size = ((uint32_t) size) / 120;
22411 pm_constant_pool_init(&parser->metadata_arena, &parser->constant_pool, constant_size < 4 ? 4 : constant_size);
22412
22413 /* Initialize the line offset list. Similar to the constant pool, we are
22414 * going to estimate the number of newlines that we will need based on the
22415 * size of the input. */
22416 size_t newline_size = size / 22;
22417 pm_line_offset_list_init(&parser->metadata_arena, &parser->line_offsets, newline_size < 4 ? 4 : newline_size);
22418
22419 // If options were provided to this parse, establish them here.
22420 if (options != NULL) {
22421 // filepath option
22422 parser->filepath = options->filepath;
22423
22424 // line option
22425 parser->start_line = options->line;
22426
22427 // encoding option
22428 size_t encoding_length = pm_string_length(&options->encoding);
22429 if (encoding_length > 0) {
22430 const uint8_t *encoding_source = pm_string_source(&options->encoding);
22431 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22432 }
22433
22434 // encoding_locked option
22435 parser->encoding_locked = options->encoding_locked;
22436
22437 // frozen_string_literal option
22438 parser->frozen_string_literal = options->frozen_string_literal;
22439
22440 // command_line option
22441 parser->command_line = options->command_line;
22442
22443 // version option
22444 parser->version = options->version;
22445
22446 // partial_script
22447 parser->partial_script = options->partial_script;
22448
22449 // scopes option
22450 parser->parsing_eval = options->scopes_count > 0;
22451 if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
22452
22453 for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
22454 const pm_options_scope_t *scope = pm_options_scope(options, scope_index);
22455 pm_parser_scope_push(parser, scope_index == 0);
22456
22457 // Scopes given from the outside are not allowed to have numbered
22458 // parameters.
22459 parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
22460
22461 for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
22462 const pm_string_t *local = pm_options_scope_local(scope, local_index);
22463
22464 const uint8_t *source = pm_string_source(local);
22465 size_t length = pm_string_length(local);
22466
22467 uint8_t *allocated = (uint8_t *) pm_arena_alloc(&parser->metadata_arena, length, 1);
22468 memcpy(allocated, source, length);
22469 pm_parser_local_add_owned(parser, allocated, length);
22470 }
22471 }
22472 }
22473
22474 // Now that we have established the user-provided options, check if
22475 // a version was given and parse as the latest version otherwise.
22476 if (parser->version == PM_OPTIONS_VERSION_UNSET) {
22477 parser->version = PM_OPTIONS_VERSION_LATEST;
22478 }
22479
22480 pm_accepts_block_stack_push(parser, true);
22481
22482 // Skip past the UTF-8 BOM if it exists.
22483 if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
22484 parser->current.end += 3;
22485 parser->encoding_comment_start += 3;
22486
22487 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
22488 parser->encoding = PM_ENCODING_UTF_8_ENTRY;
22489 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
22490 }
22491 }
22492
22493 // If the -x command line flag is set, or the first shebang of the file does
22494 // not include "ruby", then we'll search for a shebang that does include
22495 // "ruby" and start parsing from there.
22496 bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
22497
22498 // If the first two bytes of the source are a shebang, then we will do a bit
22499 // of extra processing.
22500 //
22501 // First, we'll indicate that the encoding comment is at the end of the
22502 // shebang. This means that when a shebang is present the encoding comment
22503 // can begin on the second line.
22504 //
22505 // Second, we will check if the shebang includes "ruby". If it does, then we
22506 // we will start parsing from there. We will also potentially warning the
22507 // user if there is a carriage return at the end of the shebang. We will
22508 // also potentially call the shebang callback if this is the main script to
22509 // allow the caller to parse the shebang and find any command-line options.
22510 // If the shebang does not include "ruby" and this is the main script being
22511 // parsed, then we will start searching the file for a shebang that does
22512 // contain "ruby" as if -x were passed on the command line.
22513 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
22514 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->current.end);
22515
22516 if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
22517 const char *engine;
22518
22519 if ((engine = pm_strnstr((const char *) parser->start, "ruby", length)) != NULL) {
22520 if (newline != NULL) {
22521 parser->encoding_comment_start = newline + 1;
22522
22523 if (options == NULL || options->main_script) {
22524 pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1);
22525 }
22526 }
22527
22528 if (options != NULL && options->main_script && options->shebang_callback != NULL) {
22529 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) parser->start)));
22530 }
22531
22532 search_shebang = false;
22533 } else if (options != NULL && options->main_script && !parser->parsing_eval) {
22534 search_shebang = true;
22535 }
22536 }
22537
22538 // Here we're going to find the first shebang that includes "ruby" and start
22539 // parsing from there.
22540 if (search_shebang) {
22541 // If a shebang that includes "ruby" is not found, then we're going to a
22542 // a load error to the list of errors on the parser.
22543 bool found_shebang = false;
22544
22545 // This is going to point to the start of each line as we check it.
22546 // We'll maintain a moving window looking at each line at they come.
22547 const uint8_t *cursor = parser->start;
22548
22549 // The newline pointer points to the end of the current line that we're
22550 // considering. If it is NULL, then we're at the end of the file.
22551 const uint8_t *newline = next_newline(cursor, parser->end - cursor);
22552
22553 while (newline != NULL) {
22554 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
22555
22556 cursor = newline + 1;
22557 newline = next_newline(cursor, parser->end - cursor);
22558
22559 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - cursor);
22560 if (length > 2 && cursor[0] == '#' && cursor[1] == '!') {
22561 const char *engine;
22562 if ((engine = pm_strnstr((const char *) cursor, "ruby", length)) != NULL) {
22563 found_shebang = true;
22564
22565 if (newline != NULL) {
22566 pm_parser_warn_shebang_carriage_return(parser, cursor, length + 1);
22567 parser->encoding_comment_start = newline + 1;
22568 }
22569
22570 if (options != NULL && options->shebang_callback != NULL) {
22571 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) cursor)));
22572 }
22573
22574 break;
22575 }
22576 }
22577 }
22578
22579 if (found_shebang) {
22580 parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22581 parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22582 } else {
22583 pm_parser_err(parser, 0, 0, PM_ERR_SCRIPT_NOT_FOUND);
22584 pm_line_offset_list_clear(&parser->line_offsets);
22585 }
22586 }
22587
22588 // The encoding comment can start after any amount of inline whitespace, so
22589 // here we'll advance it to the first non-inline-whitespace character so
22590 // that it is ready for future comparisons.
22591 parser->encoding_comment_start += pm_strspn_inline_whitespace(parser->encoding_comment_start, parser->end - parser->encoding_comment_start);
22592}
22593
22602pm_parser_new(pm_arena_t *arena, const uint8_t *source, size_t size, const pm_options_t *options) {
22603 pm_parser_t *parser = (pm_parser_t *) xmalloc(sizeof(pm_parser_t));
22604 if (parser == NULL) abort();
22605
22606 pm_parser_init(arena, parser, source, size, options);
22607 return parser;
22608}
22609
22613void
22614pm_parser_cleanup(pm_parser_t *parser) {
22615 pm_string_cleanup(&parser->filepath);
22616 pm_arena_cleanup(&parser->metadata_arena);
22617
22618 while (parser->current_scope != NULL) {
22619 // Normally, popping the scope doesn't free the locals since it is
22620 // assumed that ownership has transferred to the AST. However if we have
22621 // scopes while we're freeing the parser, it's likely they came from
22622 // eval scopes and we need to free them explicitly here.
22623 pm_parser_scope_pop(parser);
22624 }
22625
22626 while (parser->lex_modes.index >= PM_LEX_STACK_SIZE) {
22627 lex_mode_pop(parser);
22628 }
22629}
22630
22634void
22636 pm_parser_cleanup(parser);
22637 xfree_sized(parser, sizeof(pm_parser_t));
22638}
22639
22645static bool
22646pm_parse_err_is_fatal(pm_diagnostic_id_t diag_id) {
22647 switch (diag_id) {
22648 case PM_ERR_ARRAY_EXPRESSION_AFTER_STAR:
22649 case PM_ERR_BEGIN_UPCASE_BRACE:
22650 case PM_ERR_CLASS_VARIABLE_BARE:
22651 case PM_ERR_END_UPCASE_BRACE:
22652 case PM_ERR_ESCAPE_INVALID_HEXADECIMAL:
22653 case PM_ERR_ESCAPE_INVALID_UNICODE_LIST:
22654 case PM_ERR_ESCAPE_INVALID_UNICODE_SHORT:
22655 case PM_ERR_EXPRESSION_NOT_WRITABLE:
22656 case PM_ERR_EXPRESSION_NOT_WRITABLE_SELF:
22657 case PM_ERR_FLOAT_PARSE:
22658 case PM_ERR_GLOBAL_VARIABLE_BARE:
22659 case PM_ERR_HASH_KEY:
22660 case PM_ERR_HEREDOC_IDENTIFIER:
22661 case PM_ERR_INSTANCE_VARIABLE_BARE:
22662 case PM_ERR_INVALID_BLOCK_EXIT:
22663 case PM_ERR_INVALID_ENCODING_MAGIC_COMMENT:
22664 case PM_ERR_INVALID_FLOAT_EXPONENT:
22665 case PM_ERR_INVALID_NUMBER_BINARY:
22666 case PM_ERR_INVALID_NUMBER_DECIMAL:
22667 case PM_ERR_INVALID_NUMBER_HEXADECIMAL:
22668 case PM_ERR_INVALID_NUMBER_OCTAL:
22669 case PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING:
22670 case PM_ERR_NO_LOCAL_VARIABLE:
22671 case PM_ERR_PARAMETER_ORDER:
22672 case PM_ERR_STATEMENT_UNDEF:
22673 case PM_ERR_VOID_EXPRESSION:
22674 return true;
22675 default:
22676 return false;
22677 }
22678}
22679
22713static void
22714pm_parse_continuable(pm_parser_t *parser) {
22715 // If there are no errors then there is nothing to continue.
22716 if (parser->error_list.size == 0) {
22717 parser->continuable = false;
22718 return;
22719 }
22720
22721 if (!parser->continuable) return;
22722
22723 size_t source_length = (size_t) (parser->end - parser->start);
22724
22725 // First pass: check if there are any non-stray, non-fatal errors.
22726 bool has_non_stray_error = false;
22727 for (pm_diagnostic_t *error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
22728 if (error->diag_id != PM_ERR_UNEXPECTED_TOKEN_IGNORE && error->diag_id != PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT && !pm_parse_err_is_fatal(error->diag_id)) {
22729 has_non_stray_error = true;
22730 break;
22731 }
22732 }
22733
22734 // Second pass: check each error. We track the minimum source position
22735 // among non-stray, non-fatal errors seen so far in list order, which
22736 // lets us detect cascade stray tokens.
22737 size_t non_stray_min_start = SIZE_MAX;
22738
22739 for (pm_diagnostic_t *error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
22740 size_t error_start = (size_t) error->location.start;
22741 size_t error_end = error_start + (size_t) error->location.length;
22742 bool at_eof = error_end >= source_length;
22743
22744 // Fatal errors are non-continuable unless they occur at EOF.
22745 if (pm_parse_err_is_fatal(error->diag_id) && !at_eof) {
22746 parser->continuable = false;
22747 return;
22748 }
22749
22750 // Track non-stray, non-fatal error positions in list order.
22751 if (error->diag_id != PM_ERR_UNEXPECTED_TOKEN_IGNORE &&
22752 error->diag_id != PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT) {
22753 if (error_start < non_stray_min_start) non_stray_min_start = error_start;
22754 continue;
22755 }
22756
22757 // This is a stray token. Determine if it is a cascade effect
22758 // of a preceding error or genuinely stray.
22759
22760 // Rule (a): a non-stray error was seen earlier in the list at a
22761 // strictly earlier position — this stray is a cascade effect.
22762 if (non_stray_min_start < error_start) continue;
22763
22764 // Rule (b): this stray is at EOF with valid code before it.
22765 // Single-byte stray tokens at EOF (like `\` for line continuation)
22766 // are likely truncated tokens. Multi-byte stray tokens (like the
22767 // keyword `end`) need additional evidence that they are cascade
22768 // effects (i.e. non-stray errors exist elsewhere).
22769 if (at_eof && error_start > 0) {
22770 // Exception: closing delimiters at EOF are genuinely stray.
22771 if (error->location.length == 1) {
22772 const uint8_t *byte = parser->start + error_start;
22773 if (*byte == ')' || *byte == ']' || *byte == '}') {
22774 parser->continuable = false;
22775 return;
22776 }
22777
22778 // Single-byte non-delimiter stray at EOF: cascade.
22779 continue;
22780 }
22781
22782 // Multi-byte stray at EOF: cascade only if there are
22783 // non-stray errors (evidence of a preceding parse failure).
22784 if (has_non_stray_error) continue;
22785 }
22786
22787 // Rule (c): a stray `=` at the start of a line could be the
22788 // beginning of an embedded document (`=begin`). The remaining
22789 // bytes after `=` parse as an identifier, so the error is not
22790 // at EOF, but the construct is genuinely incomplete.
22791 if (error->location.length == 1) {
22792 const uint8_t *byte = parser->start + error_start;
22793 if (*byte == '=' && (error_start == 0 || *(byte - 1) == '\n')) continue;
22794 }
22795
22796 // This stray token is genuinely non-continuable.
22797 parser->continuable = false;
22798 return;
22799 }
22800}
22801
22805pm_node_t *
22807 pm_node_t *node = parse_program(parser);
22808 pm_parse_continuable(parser);
22809 return node;
22810}
22811
22818pm_node_t *
22819pm_parse_stream(pm_parser_t **parser, pm_arena_t *arena, pm_source_t *source, const pm_options_t *options) {
22820 bool eof = pm_source_stream_read(source);
22821
22822 pm_parser_t *tmp = pm_parser_new(arena, pm_source_source(source), pm_source_length(source), options);
22823 pm_node_t *node = pm_parse(tmp);
22824
22825 while (!eof && tmp->error_list.size > 0) {
22826 eof = pm_source_stream_read(source);
22827
22828 pm_parser_free(tmp);
22829 pm_arena_cleanup(arena);
22830
22831 tmp = pm_parser_new(arena, pm_source_source(source), pm_source_length(source), options);
22832 node = pm_parse(tmp);
22833 }
22834
22835 *parser = tmp;
22836 return node;
22837}
22838
22839#undef PM_CASE_KEYWORD
22840#undef PM_CASE_OPERATOR
22841#undef PM_CASE_WRITABLE
22842#undef PM_STRING_EMPTY
22843
22844// We optionally support serializing to a binary string. For systems that don't
22845// want or need this functionality, it can be turned off with the
22846// PRISM_EXCLUDE_SERIALIZATION define.
22847#ifndef PRISM_EXCLUDE_SERIALIZATION
22848
22849static PRISM_INLINE void
22850pm_serialize_header(pm_buffer_t *buffer) {
22851 pm_buffer_append_string(buffer, "PRISM", 5);
22852 pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
22853 pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR);
22854 pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH);
22855 pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
22856}
22857
22861void
22862pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
22863 pm_serialize_header(buffer);
22864 pm_serialize_content(parser, node, buffer);
22865 pm_buffer_append_byte(buffer, '\0');
22866}
22867
22872void
22873pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22874 pm_options_t options = { 0 };
22875 pm_options_read(&options, data);
22876
22877 pm_arena_t arena = { 0 };
22878 pm_parser_t parser;
22879 pm_parser_init(&arena, &parser, source, size, &options);
22880
22881 pm_node_t *node = pm_parse(&parser);
22882
22883 pm_serialize_header(buffer);
22884 pm_serialize_content(&parser, node, buffer);
22885 pm_buffer_append_byte(buffer, '\0');
22886
22887 pm_parser_cleanup(&parser);
22888 pm_arena_cleanup(&arena);
22889 pm_options_cleanup(&options);
22890}
22891
22896void
22897pm_serialize_parse_stream(pm_buffer_t *buffer, pm_source_t *source, const char *data) {
22898 pm_arena_t arena = { 0 };
22899 pm_parser_t *parser;
22900 pm_options_t options = { 0 };
22901 pm_options_read(&options, data);
22902
22903 pm_node_t *node = pm_parse_stream(&parser, &arena, source, &options);
22904 pm_serialize_header(buffer);
22905 pm_serialize_content(parser, node, buffer);
22906 pm_buffer_append_byte(buffer, '\0');
22907
22908 pm_parser_free(parser);
22909 pm_arena_cleanup(&arena);
22910 pm_options_cleanup(&options);
22911}
22912
22916void
22917pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22918 pm_options_t options = { 0 };
22919 pm_options_read(&options, data);
22920
22921 pm_arena_t arena = { 0 };
22922 pm_parser_t parser;
22923 pm_parser_init(&arena, &parser, source, size, &options);
22924
22925 pm_parse(&parser);
22926 pm_serialize_header(buffer);
22927 pm_serialize_encoding(parser.encoding, buffer);
22928 pm_buffer_append_varsint(buffer, parser.start_line);
22929 pm_serialize_comment_list(&parser.comment_list, buffer);
22930
22931 pm_parser_cleanup(&parser);
22932 pm_arena_cleanup(&arena);
22933 pm_options_cleanup(&options);
22934}
22935
22936#endif
#define PRISM_ALIGNOF
Get the alignment requirement of a type.
Definition align.h:15
pm_comment_type_t
This is the type of a comment that we've found while parsing.
Definition comments.h:18
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
A header file that defines macros to exclude certain features of the prism library.
#define PRISM_FALLTHROUGH
We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
Definition fallthrough.h:15
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
#define xcalloc
Old name of ruby_xcalloc.
Definition xmalloc.h:55
int len
Length of the buffer.
Definition io.h:8
#define PRISM_INLINE
Old Visual Studio versions do not support the inline keyword, so we need to define it to be __inline.
Definition inline.h:12
VALUE type(ANYARGS)
ANYARGS-ed function type.
static const uint8_t PM_OPTIONS_COMMAND_LINE_N
A bit representing whether or not the command line -n option was set.
Definition options.h:96
#define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
String literals should not be frozen.
Definition options.h:31
#define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
String literals should be made frozen.
Definition options.h:42
#define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
String literals may be frozen or mutable depending on the implementation default.
Definition options.h:37
static const uint8_t PM_OPTIONS_COMMAND_LINE_P
A bit representing whether or not the command line -p option was set.
Definition options.h:102
PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_parser_t * pm_parser_new(pm_arena_t *arena, const uint8_t *source, size_t size, const pm_options_t *options) PRISM_NONNULL(1)
Allocate and initialize a parser with the given start and end pointers.
Definition prism.c:22602
PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser) PRISM_NONNULL(1)
Free both the memory held by the given parser and the parser itself.
Definition prism.c:22635
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser) PRISM_NONNULL(1)
Initiate the parser with the given parser.
Definition prism.c:22806
#define PM_NODE_LIST_FOREACH(list, index, node)
Loop through each node in the node list, writing each node to the given pm_node_t pointer.
Definition node.h:18
The version of the Prism library.
#define PRISM_VERSION
The version of the Prism library as a constant string.
Definition version.h:29
#define PRISM_VERSION_PATCH
The patch version of the Prism library as an int.
Definition version.h:24
#define PRISM_VERSION_MINOR
The minor version of the Prism library as an int.
Definition version.h:19
#define PRISM_VERSION_MAJOR
The major version of the Prism library as an int.
Definition version.h:14
The functions related to serializing the AST to a binary format.
Functions for parsing streams.
AndNode.
Definition ast.h:1291
PM_NODE_ALIGNAS struct pm_node * left
AndNode::left.
Definition ast.h:1306
PM_NODE_ALIGNAS struct pm_node * right
AndNode::right.
Definition ast.h:1319
ArgumentsNode.
Definition ast.h:1351
pm_node_t base
The embedded base node.
Definition ast.h:1353
struct pm_node_list arguments
ArgumentsNode::arguments.
Definition ast.h:1363
This is a special out parameter to the parse_arguments_list function that includes opening and closin...
Definition prism.c:1763
pm_node_t * block
The optional block attached to the call.
Definition prism.c:1774
bool has_forwarding
The flag indicating whether this arguments list has forwarding argument.
Definition prism.c:1777
pm_location_t opening_loc
The optional location of the opening parenthesis or bracket.
Definition prism.c:1765
pm_arguments_node_t * arguments
The lazily-allocated optional arguments node.
Definition prism.c:1768
pm_location_t closing_loc
The optional location of the closing parenthesis or bracket.
Definition prism.c:1771
ArrayNode.
Definition ast.h:1381
struct pm_node_list elements
ArrayNode::elements.
Definition ast.h:1390
ArrayPatternNode.
Definition ast.h:1441
PM_NODE_ALIGNAS struct pm_node * constant
ArrayPatternNode::constant.
Definition ast.h:1459
pm_location_t opening_loc
ArrayPatternNode::opening_loc.
Definition ast.h:1499
pm_location_t closing_loc
ArrayPatternNode::closing_loc.
Definition ast.h:1509
AssocNode.
Definition ast.h:1524
PM_NODE_ALIGNAS struct pm_node * value
AssocNode::value.
Definition ast.h:1555
PM_NODE_ALIGNAS struct pm_node * key
AssocNode::key.
Definition ast.h:1542
BeginNode.
Definition ast.h:1647
PM_NODE_ALIGNAS struct pm_else_node * else_clause
BeginNode::else_clause.
Definition ast.h:1689
PM_NODE_ALIGNAS struct pm_ensure_node * ensure_clause
BeginNode::ensure_clause.
Definition ast.h:1699
PM_NODE_ALIGNAS struct pm_statements_node * statements
BeginNode::statements.
Definition ast.h:1669
PM_NODE_ALIGNAS struct pm_rescue_node * rescue_clause
BeginNode::rescue_clause.
Definition ast.h:1679
pm_node_t base
The embedded base node.
Definition ast.h:1649
This struct represents a set of binding powers used for a given token.
Definition prism.c:12445
bool binary
Whether or not this token can be used as a binary operator.
Definition prism.c:12453
pm_binding_power_t left
The left binding power.
Definition prism.c:12447
bool nonassoc
Whether or not this token can be used as non-associative binary operator.
Definition prism.c:12459
pm_binding_power_t right
The right binding power.
Definition prism.c:12450
BlockLocalVariableNode.
Definition ast.h:1764
BlockNode.
Definition ast.h:1791
BlockParametersNode.
Definition ast.h:1919
CallNode.
Definition ast.h:2143
pm_location_t opening_loc
CallNode::opening_loc.
Definition ast.h:2204
pm_location_t closing_loc
CallNode::closing_loc.
Definition ast.h:2224
pm_constant_id_t name
CallNode::name.
Definition ast.h:2184
PM_NODE_ALIGNAS struct pm_arguments_node * arguments
CallNode::arguments.
Definition ast.h:2214
pm_location_t equal_loc
CallNode::equal_loc.
Definition ast.h:2237
pm_location_t call_operator_loc
CallNode::call_operator_loc.
Definition ast.h:2174
pm_location_t message_loc
CallNode::message_loc.
Definition ast.h:2194
PM_NODE_ALIGNAS struct pm_node * block
CallNode::block.
Definition ast.h:2247
PM_NODE_ALIGNAS struct pm_node * receiver
CallNode::receiver.
Definition ast.h:2161
CaseMatchNode.
Definition ast.h:2578
struct pm_node_list conditions
CaseMatchNode::conditions.
Definition ast.h:2600
PM_NODE_ALIGNAS struct pm_else_node * else_clause
CaseMatchNode::else_clause.
Definition ast.h:2610
CaseNode.
Definition ast.h:2647
PM_NODE_ALIGNAS struct pm_else_node * else_clause
CaseNode::else_clause.
Definition ast.h:2679
struct pm_node_list conditions
CaseNode::conditions.
Definition ast.h:2669
ClassVariableReadNode.
Definition ast.h:2936
ClassVariableTargetNode.
Definition ast.h:2964
ClassVariableWriteNode.
Definition ast.h:2986
A list of constant IDs.
size_t size
The number of constant ids in the list.
ConstantPathNode.
Definition ast.h:3195
ConstantPathTargetNode.
Definition ast.h:3330
ConstantReadNode.
Definition ast.h:3423
ConstantTargetNode.
Definition ast.h:3451
ConstantWriteNode.
Definition ast.h:3473
DefNode.
Definition ast.h:3535
pm_location_t equal_loc
DefNode::equal_loc.
Definition ast.h:3592
PM_NODE_ALIGNAS struct pm_node * body
DefNode::body.
Definition ast.h:3562
ElseNode.
Definition ast.h:3649
PM_NODE_ALIGNAS struct pm_statements_node * statements
ElseNode::statements.
Definition ast.h:3661
EnsureNode.
Definition ast.h:3744
PM_NODE_ALIGNAS struct pm_statements_node * statements
EnsureNode::statements.
Definition ast.h:3756
FindPatternNode.
Definition ast.h:3823
pm_location_t opening_loc
FindPatternNode::opening_loc.
Definition ast.h:3887
PM_NODE_ALIGNAS struct pm_node * constant
FindPatternNode::constant.
Definition ast.h:3835
pm_location_t closing_loc
FindPatternNode::closing_loc.
Definition ast.h:3900
FlipFlopNode.
Definition ast.h:3918
FloatNode.
Definition ast.h:3950
double value
FloatNode::value.
Definition ast.h:3959
pm_node_t base
The embedded base node.
Definition ast.h:3952
ForwardingParameterNode.
Definition ast.h:4083
GlobalVariableReadNode.
Definition ast.h:4256
GlobalVariableTargetNode.
Definition ast.h:4284
GlobalVariableWriteNode.
Definition ast.h:4306
HashNode.
Definition ast.h:4367
struct pm_node_list elements
HashNode::elements.
Definition ast.h:4392
HashPatternNode.
Definition ast.h:4426
PM_NODE_ALIGNAS struct pm_node * constant
HashPatternNode::constant.
Definition ast.h:4441
pm_location_t opening_loc
HashPatternNode::opening_loc.
Definition ast.h:4480
pm_location_t closing_loc
HashPatternNode::closing_loc.
Definition ast.h:4493
IfNode.
Definition ast.h:4514
PM_NODE_ALIGNAS struct pm_statements_node * statements
IfNode::statements.
Definition ast.h:4573
PM_NODE_ALIGNAS struct pm_node * subsequent
IfNode::subsequent.
Definition ast.h:4592
ImaginaryNode.
Definition ast.h:4619
InNode.
Definition ast.h:4695
PM_NODE_ALIGNAS struct pm_statements_node * statements
InNode::statements.
Definition ast.h:4707
InstanceVariableReadNode.
Definition ast.h:5098
InstanceVariableTargetNode.
Definition ast.h:5126
InstanceVariableWriteNode.
Definition ast.h:5148
IntegerNode.
Definition ast.h:5215
pm_integer_t value
IntegerNode::value.
Definition ast.h:5224
pm_node_t base
The embedded base node.
Definition ast.h:5217
bool negative
Whether or not the integer is negative.
Definition integer.h:38
InterpolatedMatchLastLineNode.
Definition ast.h:5252
InterpolatedRegularExpressionNode.
Definition ast.h:5297
InterpolatedStringNode.
Definition ast.h:5333
pm_node_t base
The embedded base node.
Definition ast.h:5335
pm_location_t opening_loc
InterpolatedStringNode::opening_loc.
Definition ast.h:5340
InterpolatedSymbolNode.
Definition ast.h:5365
InterpolatedXStringNode.
Definition ast.h:5397
pm_location_t opening_loc
InterpolatedXStringNode::opening_loc.
Definition ast.h:5404
pm_node_t base
The embedded base node.
Definition ast.h:5399
struct pm_node_list parts
InterpolatedXStringNode::parts.
Definition ast.h:5409
KeywordHashNode.
Definition ast.h:5466
int32_t line
The line number.
uint32_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
LocalVariableReadNode.
Definition ast.h:5702
uint32_t depth
LocalVariableReadNode::depth.
Definition ast.h:5732
pm_constant_id_t name
LocalVariableReadNode::name.
Definition ast.h:5719
LocalVariableTargetNode.
Definition ast.h:5750
LocalVariableWriteNode.
Definition ast.h:5777
uint32_t depth
LocalVariableWriteNode::depth.
Definition ast.h:5803
pm_constant_id_t name
LocalVariableWriteNode::name.
Definition ast.h:5790
This struct represents a slice in the source code, defined by an offset and a length.
Definition ast.h:554
uint32_t start
The offset of the location from the start of the source.
Definition ast.h:556
uint32_t length
The length of the location.
Definition ast.h:559
MatchLastLineNode.
Definition ast.h:5868
struct pm_node_list targets
MatchWriteNode::targets.
Definition ast.h:6035
MultiTargetNode.
Definition ast.h:6102
pm_location_t lparen_loc
MultiTargetNode::lparen_loc.
Definition ast.h:6159
struct pm_node_list lefts
MultiTargetNode::lefts.
Definition ast.h:6119
pm_location_t rparen_loc
MultiTargetNode::rparen_loc.
Definition ast.h:6169
MultiWriteNode.
Definition ast.h:6184
A list of nodes in the source, most often used for lists of children.
Definition ast.h:567
size_t size
The number of nodes in the list.
Definition ast.h:569
struct pm_node ** nodes
The nodes in the list.
Definition ast.h:575
This is the base structure that represents a node in the syntax tree.
Definition ast.h:1065
pm_node_type_t type
This represents the type of the node.
Definition ast.h:1070
pm_location_t location
This is the location of the node in the source.
Definition ast.h:1088
OptionalParameterNode.
Definition ast.h:6478
OrNode.
Definition ast.h:6515
PM_NODE_ALIGNAS struct pm_node * right
OrNode::right.
Definition ast.h:6543
PM_NODE_ALIGNAS struct pm_node * left
OrNode::left.
Definition ast.h:6530
ParametersNode.
Definition ast.h:6569
PM_NODE_ALIGNAS struct pm_node * block
ParametersNode::block.
Definition ast.h:6606
PM_NODE_ALIGNAS struct pm_node * rest
ParametersNode::rest.
Definition ast.h:6586
PM_NODE_ALIGNAS struct pm_node * keyword_rest
ParametersNode::keyword_rest.
Definition ast.h:6601
ParenthesesNode.
Definition ast.h:6624
PM_NODE_ALIGNAS struct pm_node * body
ParenthesesNode::body.
Definition ast.h:6631
RangeNode.
Definition ast.h:6854
PM_NODE_ALIGNAS struct pm_node * right
RangeNode::right.
Definition ast.h:6883
PM_NODE_ALIGNAS struct pm_node * left
RangeNode::left.
Definition ast.h:6869
RationalNode.
Definition ast.h:6911
pm_node_t base
The embedded base node.
Definition ast.h:6913
pm_integer_t numerator
RationalNode::numerator.
Definition ast.h:6922
In order to properly set a regular expression's encoding and to validate the byte sequence for the un...
Definition prism.c:9735
pm_buffer_t regexp_buffer
The buffer holding the regexp source.
Definition prism.c:9740
pm_token_buffer_t base
The embedded base buffer.
Definition prism.c:9737
RegularExpressionNode.
Definition ast.h:6976
RequiredParameterNode.
Definition ast.h:7048
RescueModifierNode.
Definition ast.h:7070
PM_NODE_ALIGNAS struct pm_node * rescue_expression
RescueModifierNode::rescue_expression.
Definition ast.h:7087
RescueNode.
Definition ast.h:7107
PM_NODE_ALIGNAS struct pm_rescue_node * subsequent
RescueNode::subsequent.
Definition ast.h:7144
pm_location_t then_keyword_loc
RescueNode::then_keyword_loc.
Definition ast.h:7134
SplatNode.
Definition ast.h:7397
PM_NODE_ALIGNAS struct pm_node * expression
SplatNode::expression.
Definition ast.h:7409
StatementsNode.
Definition ast.h:7424
struct pm_node_list body
StatementsNode::body.
Definition ast.h:7431
pm_node_t base
The embedded base node.
Definition ast.h:7426
StringNode.
Definition ast.h:7458
pm_node_t base
The embedded base node.
Definition ast.h:7460
pm_string_t unescaped
StringNode::unescaped.
Definition ast.h:7480
pm_location_t content_loc
StringNode::content_loc.
Definition ast.h:7470
pm_location_t closing_loc
StringNode::closing_loc.
Definition ast.h:7475
pm_location_t opening_loc
StringNode::opening_loc.
Definition ast.h:7465
A generic string type that can have various ownership semantics.
Definition stringy.h:18
const uint8_t * source
A pointer to the start of the string.
Definition stringy.h:20
size_t length
The length of the string in bytes of memory.
Definition stringy.h:23
enum pm_string_t::@110 type
The type of the string.
SuperNode.
Definition ast.h:7500
PM_NODE_ALIGNAS struct pm_arguments_node * arguments
SuperNode::arguments.
Definition ast.h:7519
pm_location_t lparen_loc
SuperNode::lparen_loc.
Definition ast.h:7512
PM_NODE_ALIGNAS struct pm_node * block
SuperNode::block.
Definition ast.h:7529
SymbolNode.
Definition ast.h:7552
pm_location_t value_loc
SymbolNode::value_loc.
Definition ast.h:7564
pm_string_t unescaped
SymbolNode::unescaped.
Definition ast.h:7574
When we're lexing certain types (strings, symbols, lists, etc.) we have string content associated wit...
Definition prism.c:9709
pm_buffer_t buffer
The buffer that we're using to keep track of the string content.
Definition prism.c:9714
const uint8_t * cursor
The cursor into the source string that points to how far we have currently copied into the buffer.
Definition prism.c:9720
This struct represents a token in the Ruby source.
Definition ast.h:526
const uint8_t * end
A pointer to the end location of the token in the source.
Definition ast.h:534
const uint8_t * start
A pointer to the start location of the token in the source.
Definition ast.h:531
pm_token_type_t type
The type of the token.
Definition ast.h:528
UndefNode.
Definition ast.h:7606
UnlessNode.
Definition ast.h:7636
PM_NODE_ALIGNAS struct pm_statements_node * statements
UnlessNode::statements.
Definition ast.h:7685
PM_NODE_ALIGNAS struct pm_else_node * else_clause
UnlessNode::else_clause.
Definition ast.h:7695
WhenNode.
Definition ast.h:7770
PM_NODE_ALIGNAS struct pm_statements_node * statements
WhenNode::statements.
Definition ast.h:7792
XStringNode.
Definition ast.h:7859
YieldNode.
Definition ast.h:7896
pm_location_t lparen_loc
YieldNode::lparen_loc.
Definition ast.h:7908
PM_NODE_ALIGNAS struct pm_arguments_node * arguments
YieldNode::arguments.
Definition ast.h:7913
#define PRISM_UNUSED
GCC will warn if you specify a function or parameter that is unused at runtime.
Definition unused.h:13