Ruby 4.1.0dev (2026-05-26 revision d6fa8e3e0f876114bf4ab9c8961a8e10e32ac9db)
prism.c
4
5#include "prism/internal/allocator.h"
6#include "prism/internal/arena.h"
7#include "prism/internal/bit.h"
8#include "prism/internal/buffer.h"
9#include "prism/internal/char.h"
10#include "prism/internal/comments.h"
11#include "prism/internal/constant_pool.h"
12#include "prism/internal/diagnostic.h"
13#include "prism/internal/encoding.h"
14#include "prism/internal/integer.h"
15#include "prism/internal/isinf.h"
16#include "prism/internal/line_offset_list.h"
17#include "prism/internal/list.h"
18#include "prism/internal/magic_comments.h"
19#include "prism/internal/memchr.h"
20#include "prism/internal/node.h"
21#include "prism/internal/options.h"
22#include "prism/internal/parser.h"
23#include "prism/internal/regexp.h"
24#include "prism/internal/serialize.h"
25#include "prism/internal/source.h"
26#include "prism/internal/static_literals.h"
27#include "prism/internal/stringy.h"
28#include "prism/internal/strncasecmp.h"
29#include "prism/internal/strpbrk.h"
30#include "prism/internal/tokens.h"
31
32#include "prism/excludes.h"
33#include "prism/serialize.h"
34#include "prism/stream.h"
35#include "prism/version.h"
36
37#include <assert.h>
38#include <errno.h>
39#include <limits.h>
40#include <locale.h>
41#include <math.h>
42#include <stdio.h>
43#include <stdlib.h>
44
50#ifndef PRISM_DEPTH_MAXIMUM
51 #define PRISM_DEPTH_MAXIMUM 10000
52#endif
53
58#define PM_CONCATENATE(left, right) left ## right
59
65#if defined(_Static_assert)
66# define PM_STATIC_ASSERT(line, condition, message) _Static_assert(condition, message)
67#else
68# define PM_STATIC_ASSERT(line, condition, message) typedef char PM_CONCATENATE(static_assert_, line)[(condition) ? 1 : -1]
69#endif
70
75#if defined(__GNUC__) || defined(__clang__)
77 #define PRISM_LIKELY(x) __builtin_expect(!!(x), 1)
78
80 #define PRISM_UNLIKELY(x) __builtin_expect(!!(x), 0)
81#else
83 #define PRISM_LIKELY(x) (x)
84
86 #define PRISM_UNLIKELY(x) (x)
87#endif
88
92const char *
93pm_version(void) {
94 return PRISM_VERSION;
95}
96
101#define PM_TAB_WHITESPACE_SIZE 8
102
103// Macros for min/max.
104#define MIN(a,b) (((a)<(b))?(a):(b))
105#define MAX(a,b) (((a)>(b))?(a):(b))
106
107/******************************************************************************/
108/* Helpful AST-related macros */
109/******************************************************************************/
110
111#define U32(value_) ((uint32_t) (value_))
112
113#define FL PM_NODE_FLAGS
114#define UP PM_NODE_UPCAST
115
116#define PM_LOCATION_START(location_) ((location_)->start)
117#define PM_LOCATION_END(location_) ((location_)->start + (location_)->length)
118
119#define PM_TOKEN_START(parser_, token_) U32((token_)->start - (parser_)->start)
120#define PM_TOKEN_END(parser_, token_) U32((token_)->end - (parser_)->start)
121#define PM_TOKEN_LENGTH(token_) U32((token_)->end - (token_)->start)
122#define PM_TOKENS_LENGTH(left_, right_) U32((right_)->end - (left_)->start)
123
124#define PM_NODE_START(node_) (UP(node_)->location.start)
125#define PM_NODE_LENGTH(node_) (UP(node_)->location.length)
126#define PM_NODE_END(node_) (UP(node_)->location.start + UP(node_)->location.length)
127#define PM_NODES_LENGTH(left_, right_) (PM_NODE_END(right_) - PM_NODE_START(left_))
128
129#define PM_TOKEN_NODE_LENGTH(parser_, token_, node_) (PM_NODE_END(node_) - PM_TOKEN_START(parser_, token_))
130#define PM_NODE_TOKEN_LENGTH(parser_, node_, token_) (PM_TOKEN_END(parser_, token_) - PM_NODE_START(node_))
131
132#define PM_NODE_START_SET_NODE(left_, right_) (PM_NODE_START(left_) = PM_NODE_START(right_))
133#define PM_NODE_START_SET_TOKEN(parser_, node_, token_) (PM_NODE_START(node_) = PM_TOKEN_START(parser_, token_))
134#define PM_NODE_LENGTH_SET_NODE(left_, right_) (PM_NODE_LENGTH(left_) = PM_NODE_END(right_) - PM_NODE_START(left_))
135#define PM_NODE_LENGTH_SET_TOKEN(parser_, node_, token_) (PM_NODE_LENGTH(node_) = PM_TOKEN_END(parser_, token_) - PM_NODE_START(node_))
136#define PM_NODE_LENGTH_SET_LOCATION(node_, location_) (PM_NODE_LENGTH(node_) = PM_LOCATION_END(location_) - PM_NODE_START(node_))
137
138#define PM_LOCATION_INIT(start_, length_) ((pm_location_t) { .start = (start_), .length = (length_) })
139#define PM_LOCATION_INIT_UNSET PM_LOCATION_INIT(0, 0)
140#define PM_LOCATION_INIT_TOKEN(parser_, token_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_))
141#define PM_LOCATION_INIT_NODE(node_) UP(node_)->location
142
143#define PM_LOCATION_INIT_TOKENS(parser_, left_, right_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, left_), PM_TOKENS_LENGTH(left_, right_))
144#define PM_LOCATION_INIT_NODES(left_, right_) PM_LOCATION_INIT(PM_NODE_START(left_), PM_NODES_LENGTH(left_, right_))
145#define PM_LOCATION_INIT_TOKEN_NODE(parser_, token_, node_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, token_), PM_TOKEN_NODE_LENGTH(parser_, token_, node_))
146#define PM_LOCATION_INIT_NODE_TOKEN(parser_, node_, token_) PM_LOCATION_INIT(PM_NODE_START(node_), PM_NODE_TOKEN_LENGTH(parser_, node_, token_))
147
148#define TOK2LOC(parser_, token_) PM_LOCATION_INIT_TOKEN(parser_, token_)
149#define NTOK2LOC(parser_, token_) ((token_) == NULL ? PM_LOCATION_INIT_UNSET : TOK2LOC(parser_, token_))
150#define NTOK2PTR(token_) ((token_).start == NULL ? NULL : &(token_))
151
152/******************************************************************************/
153/* Lex mode manipulations */
154/******************************************************************************/
155
160static PRISM_INLINE uint8_t
161lex_mode_incrementor(const uint8_t start) {
162 switch (start) {
163 case '(':
164 case '[':
165 case '{':
166 case '<':
167 return start;
168 default:
169 return '\0';
170 }
171}
172
177static PRISM_INLINE uint8_t
178lex_mode_terminator(const uint8_t start) {
179 switch (start) {
180 case '(':
181 return ')';
182 case '[':
183 return ']';
184 case '{':
185 return '}';
186 case '<':
187 return '>';
188 default:
189 return start;
190 }
191}
192
198static bool
199lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
200 lex_mode.prev = parser->lex_modes.current;
201 parser->lex_modes.index++;
202
203 if (parser->lex_modes.index > PM_LEX_STACK_SIZE - 1) {
204 parser->lex_modes.current = (pm_lex_mode_t *) xmalloc(sizeof(pm_lex_mode_t));
205 if (parser->lex_modes.current == NULL) return false;
206
207 *parser->lex_modes.current = lex_mode;
208 } else {
209 parser->lex_modes.stack[parser->lex_modes.index] = lex_mode;
210 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
211 }
212
213 return true;
214}
215
219static PRISM_INLINE bool
220lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
221 uint8_t incrementor = lex_mode_incrementor(delimiter);
222 uint8_t terminator = lex_mode_terminator(delimiter);
223
224 pm_lex_mode_t lex_mode = {
225 .mode = PM_LEX_LIST,
226 .as.list = {
227 .nesting = 0,
228 .interpolation = interpolation,
229 .incrementor = incrementor,
230 .terminator = terminator
231 }
232 };
233
234 // These are the places where we need to split up the content of the list.
235 // We'll use strpbrk to find the first of these characters.
236 uint8_t *breakpoints = lex_mode.as.list.breakpoints;
237 memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE);
238 memcpy(breakpoints, "\\ \t\f\r\v\n", sizeof("\\ \t\f\r\v\n") - 1);
239 size_t index = 7;
240
241 // Now we'll add the terminator to the list of breakpoints. If the
242 // terminator is not already a NULL byte, add it to the list.
243 if (terminator != '\0') {
244 breakpoints[index++] = terminator;
245 }
246
247 // If interpolation is allowed, then we're going to check for the #
248 // character. Otherwise we'll only look for escapes and the terminator.
249 if (interpolation) {
250 breakpoints[index++] = '#';
251 }
252
253 // If there is an incrementor, then we'll check for that as well.
254 if (incrementor != '\0') {
255 breakpoints[index++] = incrementor;
256 }
257
258 parser->explicit_encoding = NULL;
259 return lex_mode_push(parser, lex_mode);
260}
261
267static PRISM_INLINE bool
268lex_mode_push_list_eof(pm_parser_t *parser) {
269 return lex_mode_push_list(parser, false, '\0');
270}
271
275static PRISM_INLINE bool
276lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
277 pm_lex_mode_t lex_mode = {
278 .mode = PM_LEX_REGEXP,
279 .as.regexp = {
280 .nesting = 0,
281 .incrementor = incrementor,
282 .terminator = terminator
283 }
284 };
285
286 // These are the places where we need to split up the content of the
287 // regular expression. We'll use strpbrk to find the first of these
288 // characters.
289 uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
290 memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE);
291 memcpy(breakpoints, "\r\n\\#", sizeof("\r\n\\#") - 1);
292 size_t index = 4;
293
294 // First we'll add the terminator.
295 if (terminator != '\0') {
296 breakpoints[index++] = terminator;
297 }
298
299 // Next, if there is an incrementor, then we'll check for that as well.
300 if (incrementor != '\0') {
301 breakpoints[index++] = incrementor;
302 }
303
304 parser->explicit_encoding = NULL;
305 return lex_mode_push(parser, lex_mode);
306}
307
311static PRISM_INLINE bool
312lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
313 pm_lex_mode_t lex_mode = {
314 .mode = PM_LEX_STRING,
315 .as.string = {
316 .nesting = 0,
317 .interpolation = interpolation,
318 .label_allowed = label_allowed,
319 .incrementor = incrementor,
320 .terminator = terminator
321 }
322 };
323
324 // These are the places where we need to split up the content of the
325 // string. We'll use strpbrk to find the first of these characters.
326 uint8_t *breakpoints = lex_mode.as.string.breakpoints;
327 memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE);
328 memcpy(breakpoints, "\r\n\\", sizeof("\r\n\\") - 1);
329 size_t index = 3;
330
331 // Now add in the terminator. If the terminator is not already a NULL byte,
332 // then we'll add it.
333 if (terminator != '\0') {
334 breakpoints[index++] = terminator;
335 }
336
337 // If interpolation is allowed, then we're going to check for the #
338 // character. Otherwise we'll only look for escapes and the terminator.
339 if (interpolation) {
340 breakpoints[index++] = '#';
341 }
342
343 // If we have an incrementor, then we'll add that in as a breakpoint as
344 // well.
345 if (incrementor != '\0') {
346 breakpoints[index++] = incrementor;
347 }
348
349 parser->explicit_encoding = NULL;
350 return lex_mode_push(parser, lex_mode);
351}
352
358static PRISM_INLINE bool
359lex_mode_push_string_eof(pm_parser_t *parser) {
360 return lex_mode_push_string(parser, false, false, '\0', '\0');
361}
362
368static void
369lex_mode_pop(pm_parser_t *parser) {
370 if (parser->lex_modes.index == 0) {
371 parser->lex_modes.current->mode = PM_LEX_DEFAULT;
372 } else if (parser->lex_modes.index < PM_LEX_STACK_SIZE) {
373 parser->lex_modes.index--;
374 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
375 } else {
376 parser->lex_modes.index--;
377 pm_lex_mode_t *prev = parser->lex_modes.current->prev;
378 xfree_sized(parser->lex_modes.current, sizeof(pm_lex_mode_t));
379 parser->lex_modes.current = prev;
380 }
381}
382
386static PRISM_INLINE bool
387lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
388 return parser->lex_state & state;
389}
390
391typedef enum {
392 PM_IGNORED_NEWLINE_NONE = 0,
393 PM_IGNORED_NEWLINE_ALL,
394 PM_IGNORED_NEWLINE_PATTERN
395} pm_ignored_newline_type_t;
396
397static PRISM_INLINE pm_ignored_newline_type_t
398lex_state_ignored_p(pm_parser_t *parser) {
399 bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
400
401 if (ignored) {
402 return PM_IGNORED_NEWLINE_ALL;
403 } else if ((parser->lex_state & ~((unsigned int) PM_LEX_STATE_LABEL)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) {
404 return PM_IGNORED_NEWLINE_PATTERN;
405 } else {
406 return PM_IGNORED_NEWLINE_NONE;
407 }
408}
409
410static PRISM_INLINE bool
411lex_state_beg_p(pm_parser_t *parser) {
412 return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
413}
414
415static PRISM_INLINE bool
416lex_state_arg_p(pm_parser_t *parser) {
417 return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
418}
419
420static PRISM_INLINE bool
421lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
422 if (parser->current.end >= parser->end) {
423 return false;
424 }
425 return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
426}
427
428static PRISM_INLINE bool
429lex_state_end_p(pm_parser_t *parser) {
430 return lex_state_p(parser, PM_LEX_STATE_END_ANY);
431}
432
436static PRISM_INLINE bool
437lex_state_operator_p(pm_parser_t *parser) {
438 return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
439}
440
445static PRISM_INLINE void
446lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
447 parser->lex_state = state;
448}
449
450#ifndef PM_DEBUG_LOGGING
455#define PM_DEBUG_LOGGING 0
456#endif
457
458#if PM_DEBUG_LOGGING
459PRISM_UNUSED static void
460debug_state(pm_parser_t *parser) {
461 fprintf(stderr, "STATE: ");
462 bool first = true;
463
464 if (parser->lex_state == PM_LEX_STATE_NONE) {
465 fprintf(stderr, "NONE\n");
466 return;
467 }
468
469#define CHECK_STATE(state) \
470 if (parser->lex_state & state) { \
471 if (!first) fprintf(stderr, "|"); \
472 fprintf(stderr, "%s", #state); \
473 first = false; \
474 }
475
476 CHECK_STATE(PM_LEX_STATE_BEG)
477 CHECK_STATE(PM_LEX_STATE_END)
478 CHECK_STATE(PM_LEX_STATE_ENDARG)
479 CHECK_STATE(PM_LEX_STATE_ENDFN)
480 CHECK_STATE(PM_LEX_STATE_ARG)
481 CHECK_STATE(PM_LEX_STATE_CMDARG)
482 CHECK_STATE(PM_LEX_STATE_MID)
483 CHECK_STATE(PM_LEX_STATE_FNAME)
484 CHECK_STATE(PM_LEX_STATE_DOT)
485 CHECK_STATE(PM_LEX_STATE_CLASS)
486 CHECK_STATE(PM_LEX_STATE_LABEL)
487 CHECK_STATE(PM_LEX_STATE_LABELED)
488 CHECK_STATE(PM_LEX_STATE_FITEM)
489
490#undef CHECK_STATE
491
492 fprintf(stderr, "\n");
493}
494
495static void
496debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * caller_name, int line_number) {
497 fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
498 debug_state(parser);
499 lex_state_set(parser, state);
500 fprintf(stderr, "Now: ");
501 debug_state(parser);
502 fprintf(stderr, "\n");
503}
504
505#define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
506#endif
507
508/******************************************************************************/
509/* Command-line macro helpers */
510/******************************************************************************/
511
513#define PM_PARSER_COMMAND_LINE_OPTION(parser, option) ((parser)->command_line & (option))
514
516#define PM_PARSER_COMMAND_LINE_OPTION_A(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_A)
517
519#define PM_PARSER_COMMAND_LINE_OPTION_E(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_E)
520
522#define PM_PARSER_COMMAND_LINE_OPTION_L(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_L)
523
525#define PM_PARSER_COMMAND_LINE_OPTION_N(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_N)
526
528#define PM_PARSER_COMMAND_LINE_OPTION_P(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_P)
529
531#define PM_PARSER_COMMAND_LINE_OPTION_X(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_X)
532
533/******************************************************************************/
534/* Diagnostic-related functions */
535/******************************************************************************/
536
540static PRISM_INLINE void
541pm_parser_err(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) {
542 pm_diagnostic_list_append(&parser->metadata_arena, &parser->error_list, start, length, diag_id);
543}
544
549static PRISM_INLINE void
550pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
551 pm_parser_err(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id);
552}
553
558static PRISM_INLINE void
559pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
560 pm_parser_err_token(parser, &parser->current, diag_id);
561}
562
567static PRISM_INLINE void
568pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
569 pm_parser_err_token(parser, &parser->previous, diag_id);
570}
571
576static PRISM_INLINE void
577pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
578 pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id);
579}
580
584#define PM_PARSER_ERR_FORMAT(parser_, start_, length_, diag_id_, ...) \
585 pm_diagnostic_list_append_format(&(parser_)->metadata_arena, &(parser_)->error_list, start_, length_, diag_id_, __VA_ARGS__)
586
591#define PM_PARSER_ERR_NODE_FORMAT(parser_, node_, diag_id_, ...) \
592 PM_PARSER_ERR_FORMAT(parser_, PM_NODE_START(node_), PM_NODE_LENGTH(node_), diag_id_, __VA_ARGS__)
593
598#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser_, node_, diag_id_) \
599 PM_PARSER_ERR_NODE_FORMAT(parser_, node_, diag_id_, (int) PM_NODE_LENGTH(node_), (const char *) (parser_->start + PM_NODE_START(node_)))
600
605#define PM_PARSER_ERR_TOKEN_FORMAT(parser_, token_, diag_id, ...) \
606 PM_PARSER_ERR_FORMAT(parser_, PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_), diag_id, __VA_ARGS__)
607
612#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser_, token_, diag_id_) \
613 PM_PARSER_ERR_TOKEN_FORMAT(parser_, token_, diag_id_, (int) PM_TOKEN_LENGTH(token_), (const char *) (token_)->start)
614
618static PRISM_INLINE void
619pm_parser_warn(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) {
620 pm_diagnostic_list_append(&parser->metadata_arena, &parser->warning_list, start, length, diag_id);
621}
622
627static PRISM_INLINE void
628pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
629 pm_parser_warn(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id);
630}
631
636static PRISM_INLINE void
637pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
638 pm_parser_warn(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id);
639}
640
645#define PM_PARSER_WARN_FORMAT(parser_, start_, length_, diag_id_, ...) \
646 pm_diagnostic_list_append_format(&(parser_)->metadata_arena, &(parser_)->warning_list, start_, length_, diag_id_, __VA_ARGS__)
647
652#define PM_PARSER_WARN_TOKEN_FORMAT(parser_, token_, diag_id_, ...) \
653 PM_PARSER_WARN_FORMAT(parser_, PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_), diag_id_, __VA_ARGS__)
654
659#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser_, token_, diag_id_) \
660 PM_PARSER_WARN_TOKEN_FORMAT(parser_, token_, diag_id_, (int) PM_TOKEN_LENGTH(token_), (const char *) (token_)->start)
661
666#define PM_PARSER_WARN_NODE_FORMAT(parser_, node_, diag_id_, ...) \
667 PM_PARSER_WARN_FORMAT(parser_, PM_NODE_START(node_), PM_NODE_LENGTH(node_), diag_id_, __VA_ARGS__)
668
674static void
675pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
676 PM_PARSER_ERR_FORMAT(
677 parser,
678 U32(ident_start - parser->start),
679 U32(ident_length),
680 PM_ERR_HEREDOC_TERM,
681 (int) ident_length,
682 (const char *) ident_start
683 );
684}
685
686/******************************************************************************/
687/* Scope-related functions */
688/******************************************************************************/
689
693static bool
694pm_parser_scope_push(pm_parser_t *parser, bool closed) {
695 pm_scope_t *scope = (pm_scope_t *) xmalloc(sizeof(pm_scope_t));
696 if (scope == NULL) return false;
697
698 *scope = (pm_scope_t) {
699 .previous = parser->current_scope,
700 .locals = { 0 },
701 .parameters = PM_SCOPE_PARAMETERS_NONE,
702 .implicit_parameters = { 0 },
703 .shareable_constant = parser->current_scope == NULL ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
704 .closed = closed
705 };
706
707 parser->current_scope = scope;
708 return true;
709}
710
715static bool
716pm_parser_scope_toplevel_p(pm_parser_t *parser) {
717 pm_scope_t *scope = parser->current_scope;
718
719 do {
720 if (scope->previous == NULL) return true;
721 if (scope->closed) return false;
722 } while ((scope = scope->previous) != NULL);
723
724 assert(false && "unreachable");
725 return true;
726}
727
731static pm_scope_t *
732pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
733 pm_scope_t *scope = parser->current_scope;
734
735 while (depth-- > 0) {
736 assert(scope != NULL);
737 scope = scope->previous;
738 }
739
740 return scope;
741}
742
743typedef enum {
744 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
745 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
746 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
747} pm_scope_forwarding_param_check_result_t;
748
749static pm_scope_forwarding_param_check_result_t
750pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
751 pm_scope_t *scope = parser->current_scope;
752 bool conflict = false;
753
754 while (scope != NULL) {
755 if (scope->parameters & mask) {
756 if (scope->closed) {
757 if (conflict) {
758 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
759 } else {
760 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
761 }
762 }
763
764 conflict = true;
765 }
766
767 if (scope->closed) break;
768 scope = scope->previous;
769 }
770
771 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
772}
773
774static void
775pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
776 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
777 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
778 // Pass.
779 break;
780 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
781 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
782 break;
783 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
784 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
785 break;
786 }
787}
788
789static void
790pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
791 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
792 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
793 // Pass.
794 break;
795 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
796 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
797 break;
798 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
799 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
800 break;
801 }
802}
803
804static void
805pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
806 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
807 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
808 // Pass.
809 break;
810 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
811 // This shouldn't happen, because ... is not allowed in the
812 // declaration of blocks. If we get here, we assume we already have
813 // an error for this.
814 break;
815 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
816 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
817 break;
818 }
819}
820
821static void
822pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
823 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
824 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
825 // Pass.
826 break;
827 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
828 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
829 break;
830 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
831 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
832 break;
833 }
834}
835
839static PRISM_INLINE pm_shareable_constant_value_t
840pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
841 return parser->current_scope->shareable_constant;
842}
843
848static void
849pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constant_value_t shareable_constant) {
850 pm_scope_t *scope = parser->current_scope;
851
852 do {
853 scope->shareable_constant = shareable_constant;
854 } while (!scope->closed && (scope = scope->previous) != NULL);
855}
856
857/******************************************************************************/
858/* Local variable-related functions */
859/******************************************************************************/
860
864#define PM_LOCALS_HASH_THRESHOLD 5
865
866static void
867pm_locals_free(pm_locals_t *locals) {
868 if (locals->capacity > 0) {
869 xfree_sized(locals->locals, locals->capacity * sizeof(pm_local_t));
870 }
871}
872
877static uint32_t
878pm_locals_hash(pm_constant_id_t name) {
879 name = ((name >> 16) ^ name) * 0x45d9f3b;
880 name = ((name >> 16) ^ name) * 0x45d9f3b;
881 name = (name >> 16) ^ name;
882 return name;
883}
884
889static void
890pm_locals_resize(pm_locals_t *locals) {
891 uint32_t next_capacity = locals->capacity == 0 ? 4 : (locals->capacity * 2);
892 assert(next_capacity > locals->capacity);
893
894 pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
895 if (next_locals == NULL) abort();
896
897 if (next_capacity < PM_LOCALS_HASH_THRESHOLD) {
898 if (locals->size > 0) {
899 memcpy(next_locals, locals->locals, locals->size * sizeof(pm_local_t));
900 }
901 } else {
902 // If we just switched from a list to a hash, then we need to fill in
903 // the hash values of all of the locals.
904 bool hash_needed = (locals->capacity <= PM_LOCALS_HASH_THRESHOLD);
905 uint32_t mask = next_capacity - 1;
906
907 for (uint32_t index = 0; index < locals->capacity; index++) {
908 pm_local_t *local = &locals->locals[index];
909
910 if (local->name != PM_CONSTANT_ID_UNSET) {
911 if (hash_needed) local->hash = pm_locals_hash(local->name);
912
913 uint32_t hash = local->hash;
914 while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
915 next_locals[hash & mask] = *local;
916 }
917 }
918 }
919
920 pm_locals_free(locals);
921 locals->locals = next_locals;
922 locals->capacity = next_capacity;
923}
924
940static bool
941pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, uint32_t start, uint32_t length, uint32_t reads) {
942 if (locals->size >= (locals->capacity / 4 * 3)) {
943 pm_locals_resize(locals);
944 }
945
946 locals->bloom |= (1u << (name & 31));
947
948 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
949 for (uint32_t index = 0; index < locals->capacity; index++) {
950 pm_local_t *local = &locals->locals[index];
951
952 if (local->name == PM_CONSTANT_ID_UNSET) {
953 *local = (pm_local_t) {
954 .name = name,
955 .location = { .start = start, .length = length },
956 .index = locals->size++,
957 .reads = reads,
958 .hash = 0
959 };
960 return true;
961 } else if (local->name == name) {
962 return false;
963 }
964 }
965 } else {
966 uint32_t mask = locals->capacity - 1;
967 uint32_t hash = pm_locals_hash(name);
968 uint32_t initial_hash = hash;
969
970 do {
971 pm_local_t *local = &locals->locals[hash & mask];
972
973 if (local->name == PM_CONSTANT_ID_UNSET) {
974 *local = (pm_local_t) {
975 .name = name,
976 .location = { .start = start, .length = length },
977 .index = locals->size++,
978 .reads = reads,
979 .hash = initial_hash
980 };
981 return true;
982 } else if (local->name == name) {
983 return false;
984 } else {
985 hash++;
986 }
987 } while ((hash & mask) != initial_hash);
988 }
989
990 assert(false && "unreachable");
991 return true;
992}
993
998static uint32_t
999pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
1000 if (!(locals->bloom & (1u << (name & 31)))) return UINT32_MAX;
1001
1002 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
1003 for (uint32_t index = 0; index < locals->size; index++) {
1004 pm_local_t *local = &locals->locals[index];
1005 if (local->name == name) return index;
1006 }
1007 } else {
1008 uint32_t mask = locals->capacity - 1;
1009 uint32_t hash = pm_locals_hash(name);
1010 uint32_t initial_hash = hash & mask;
1011
1012 do {
1013 pm_local_t *local = &locals->locals[hash & mask];
1014
1015 if (local->name == PM_CONSTANT_ID_UNSET) {
1016 return UINT32_MAX;
1017 } else if (local->name == name) {
1018 return hash & mask;
1019 } else {
1020 hash++;
1021 }
1022 } while ((hash & mask) != initial_hash);
1023 }
1024
1025 return UINT32_MAX;
1026}
1027
1032static void
1033pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
1034 uint32_t index = pm_locals_find(locals, name);
1035 assert(index != UINT32_MAX);
1036
1037 pm_local_t *local = &locals->locals[index];
1038 assert(local->reads < UINT32_MAX);
1039
1040 local->reads++;
1041}
1042
1047static void
1048pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
1049 uint32_t index = pm_locals_find(locals, name);
1050 assert(index != UINT32_MAX);
1051
1052 pm_local_t *local = &locals->locals[index];
1053 assert(local->reads > 0);
1054
1055 local->reads--;
1056}
1057
1061static uint32_t
1062pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
1063 uint32_t index = pm_locals_find(locals, name);
1064 assert(index != UINT32_MAX);
1065
1066 return locals->locals[index].reads;
1067}
1068
1077static void
1078pm_locals_order(pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
1079 pm_constant_id_list_init_capacity(parser->arena, list, locals->size);
1080
1081 // If we're still below the threshold for switching to a hash, then we only
1082 // need to loop over the locals until we hit the size because the locals are
1083 // stored in a list.
1084 uint32_t capacity = locals->capacity < PM_LOCALS_HASH_THRESHOLD ? locals->size : locals->capacity;
1085
1086 // We will only warn for unused variables if we're not at the top level, or
1087 // if we're parsing a file outside of eval or -e.
1088 bool warn_unused = !toplevel || (!parser->parsing_eval && !PM_PARSER_COMMAND_LINE_OPTION_E(parser));
1089
1090 for (uint32_t index = 0; index < capacity; index++) {
1091 pm_local_t *local = &locals->locals[index];
1092
1093 if (local->name != PM_CONSTANT_ID_UNSET) {
1094 pm_constant_id_list_insert(list, (size_t) local->index, local->name);
1095
1096 if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_line_offset_list_line(&parser->line_offsets, local->location.start, parser->start_line) >= 0))) {
1097 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
1098
1099 if (constant->length >= 1 && *constant->start != '_') {
1100 PM_PARSER_WARN_FORMAT(
1101 parser,
1102 local->location.start,
1103 local->location.length,
1104 PM_WARN_UNUSED_LOCAL_VARIABLE,
1105 (int) constant->length,
1106 (const char *) constant->start
1107 );
1108 }
1109 }
1110 }
1111 }
1112}
1113
1114/******************************************************************************/
1115/* Node-related functions */
1116/******************************************************************************/
1117
1122pm_parser_constant_id_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
1123 /* Fast path: if this is the same token as the last lookup (same pointer
1124 * range), return the cached result. */
1125 if (start == parser->constant_cache.start && end == parser->constant_cache.end) {
1126 return parser->constant_cache.id;
1127 }
1128
1129 pm_constant_id_t id = pm_constant_pool_insert_shared(&parser->metadata_arena, &parser->constant_pool, start, (size_t) (end - start));
1130
1131 parser->constant_cache.start = start;
1132 parser->constant_cache.end = end;
1133 parser->constant_cache.id = id;
1134
1135 return id;
1136}
1137
1142pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
1143 return pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, start, length);
1144}
1145
1150pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
1151 return pm_constant_pool_insert_constant(&parser->metadata_arena, &parser->constant_pool, (const uint8_t *) start, length);
1152}
1153
1158pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1159 return pm_parser_constant_id_raw(parser, token->start, token->end);
1160}
1161
1166#define PM_CASE_VOID_VALUE PM_RETURN_NODE: case PM_BREAK_NODE: case PM_NEXT_NODE: \
1167 case PM_REDO_NODE: case PM_RETRY_NODE: case PM_MATCH_REQUIRED_NODE
1168
1174static pm_node_t *
1175pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
1176 pm_node_t *void_node = NULL;
1177
1178 while (node != NULL) {
1179 switch (PM_NODE_TYPE(node)) {
1180 case PM_CASE_VOID_VALUE:
1181 return void_node != NULL ? void_node : node;
1182 case PM_MATCH_PREDICATE_NODE:
1183 return NULL;
1184 case PM_BEGIN_NODE: {
1185 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1186
1187 if (cast->ensure_clause != NULL) {
1188 if (cast->rescue_clause != NULL) {
1189 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->rescue_clause));
1190 if (vn != NULL) return vn;
1191 }
1192
1193 if (cast->statements != NULL) {
1194 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1195 if (vn != NULL) return vn;
1196 }
1197
1198 node = UP(cast->ensure_clause);
1199 } else if (cast->rescue_clause != NULL) {
1200 // https://bugs.ruby-lang.org/issues/21669
1201 if (cast->else_clause == NULL || parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
1202 if (cast->statements == NULL) return NULL;
1203
1204 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1205 if (vn == NULL) return NULL;
1206 if (void_node == NULL) void_node = vn;
1207 }
1208
1209 for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
1210 pm_node_t *vn = pm_check_value_expression(parser, UP(rescue_clause->statements));
1211
1212 if (vn == NULL) {
1213 // https://bugs.ruby-lang.org/issues/21669
1214 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
1215 return NULL;
1216 }
1217 void_node = NULL;
1218 break;
1219 }
1220 }
1221
1222 if (cast->else_clause != NULL) {
1223 node = UP(cast->else_clause);
1224
1225 // https://bugs.ruby-lang.org/issues/21669
1226 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
1227 pm_node_t *vn = pm_check_value_expression(parser, node);
1228 if (vn != NULL) return vn;
1229 }
1230 } else {
1231 return void_node;
1232 }
1233 } else {
1234 node = UP(cast->statements);
1235 }
1236
1237 break;
1238 }
1239 case PM_CASE_NODE: {
1240 // https://bugs.ruby-lang.org/issues/21669
1241 if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
1242 return NULL;
1243 }
1244
1245 pm_case_node_t *cast = (pm_case_node_t *) node;
1246 if (cast->else_clause == NULL) return NULL;
1247
1248 pm_node_t *condition;
1249 PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
1250 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
1251
1252 pm_when_node_t *cast = (pm_when_node_t *) condition;
1253 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1254 if (vn == NULL) return NULL;
1255 if (void_node == NULL) void_node = vn;
1256 }
1257
1258 node = UP(cast->else_clause);
1259 break;
1260 }
1261 case PM_CASE_MATCH_NODE: {
1262 // https://bugs.ruby-lang.org/issues/21669
1263 if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
1264 return NULL;
1265 }
1266
1268 if (cast->else_clause == NULL) return NULL;
1269
1270 pm_node_t *condition;
1271 PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
1272 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
1273
1274 pm_in_node_t *cast = (pm_in_node_t *) condition;
1275 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1276 if (vn == NULL) return NULL;
1277 if (void_node == NULL) void_node = vn;
1278 }
1279
1280 node = UP(cast->else_clause);
1281 break;
1282 }
1283 case PM_ENSURE_NODE: {
1284 pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
1285 node = UP(cast->statements);
1286 break;
1287 }
1288 case PM_PARENTHESES_NODE: {
1290 node = UP(cast->body);
1291 break;
1292 }
1293 case PM_STATEMENTS_NODE: {
1295
1296 // https://bugs.ruby-lang.org/issues/21669
1297 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
1298 pm_node_t *body_part;
1299 PM_NODE_LIST_FOREACH(&cast->body, index, body_part) {
1300 switch (PM_NODE_TYPE(body_part)) {
1301 case PM_CASE_VOID_VALUE:
1302 if (void_node == NULL) {
1303 void_node = body_part;
1304 }
1305 return void_node;
1306 default: break;
1307 }
1308 }
1309 }
1310
1311 node = cast->body.nodes[cast->body.size - 1];
1312 break;
1313 }
1314 case PM_IF_NODE: {
1315 pm_if_node_t *cast = (pm_if_node_t *) node;
1316 if (cast->statements == NULL || cast->subsequent == NULL) {
1317 return NULL;
1318 }
1319 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1320 if (vn == NULL) {
1321 return NULL;
1322 }
1323 if (void_node == NULL) {
1324 void_node = vn;
1325 }
1326 node = cast->subsequent;
1327 break;
1328 }
1329 case PM_UNLESS_NODE: {
1330 pm_unless_node_t *cast = (pm_unless_node_t *) node;
1331 if (cast->statements == NULL || cast->else_clause == NULL) {
1332 return NULL;
1333 }
1334 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1335 if (vn == NULL) {
1336 return NULL;
1337 }
1338 if (void_node == NULL) {
1339 void_node = vn;
1340 }
1341 node = UP(cast->else_clause);
1342 break;
1343 }
1344 case PM_ELSE_NODE: {
1345 pm_else_node_t *cast = (pm_else_node_t *) node;
1346 node = UP(cast->statements);
1347 break;
1348 }
1349 case PM_AND_NODE: {
1350 pm_and_node_t *cast = (pm_and_node_t *) node;
1351 node = cast->left;
1352 break;
1353 }
1354 case PM_OR_NODE: {
1355 pm_or_node_t *cast = (pm_or_node_t *) node;
1356 node = cast->left;
1357 break;
1358 }
1359 case PM_LOCAL_VARIABLE_WRITE_NODE: {
1361
1362 pm_scope_t *scope = parser->current_scope;
1363 for (uint32_t depth = 0; depth < cast->depth; depth++) scope = scope->previous;
1364
1365 pm_locals_read(&scope->locals, cast->name);
1366 return NULL;
1367 }
1368 default:
1369 return NULL;
1370 }
1371 }
1372
1373 return NULL;
1374}
1375
1376static PRISM_INLINE void
1377pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
1378 pm_node_t *void_node = pm_check_value_expression(parser, node);
1379 if (void_node != NULL) {
1380 pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
1381 }
1382}
1383
1387static void
1388pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
1389 const char *type = NULL;
1390 int length = 0;
1391
1392 switch (PM_NODE_TYPE(node)) {
1393 case PM_BACK_REFERENCE_READ_NODE:
1394 case PM_CLASS_VARIABLE_READ_NODE:
1395 case PM_GLOBAL_VARIABLE_READ_NODE:
1396 case PM_INSTANCE_VARIABLE_READ_NODE:
1397 case PM_LOCAL_VARIABLE_READ_NODE:
1398 case PM_NUMBERED_REFERENCE_READ_NODE:
1399 type = "a variable";
1400 length = 10;
1401 break;
1402 case PM_CALL_NODE: {
1403 const pm_call_node_t *cast = (const pm_call_node_t *) node;
1404 if (cast->call_operator_loc.length > 0 || cast->message_loc.length == 0) break;
1405
1406 const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
1407 switch (message->length) {
1408 case 1:
1409 switch (message->start[0]) {
1410 case '+':
1411 case '-':
1412 case '*':
1413 case '/':
1414 case '%':
1415 case '|':
1416 case '^':
1417 case '&':
1418 case '>':
1419 case '<':
1420 type = (const char *) message->start;
1421 length = 1;
1422 break;
1423 }
1424 break;
1425 case 2:
1426 switch (message->start[1]) {
1427 case '=':
1428 if (message->start[0] == '<' || message->start[0] == '>' || message->start[0] == '!' || message->start[0] == '=') {
1429 type = (const char *) message->start;
1430 length = 2;
1431 }
1432 break;
1433 case '@':
1434 if (message->start[0] == '+' || message->start[0] == '-') {
1435 type = (const char *) message->start;
1436 length = 2;
1437 }
1438 break;
1439 case '*':
1440 if (message->start[0] == '*') {
1441 type = (const char *) message->start;
1442 length = 2;
1443 }
1444 break;
1445 }
1446 break;
1447 case 3:
1448 if (memcmp(message->start, "<=>", 3) == 0) {
1449 type = "<=>";
1450 length = 3;
1451 }
1452 break;
1453 }
1454
1455 break;
1456 }
1457 case PM_CONSTANT_PATH_NODE:
1458 type = "::";
1459 length = 2;
1460 break;
1461 case PM_CONSTANT_READ_NODE:
1462 type = "a constant";
1463 length = 10;
1464 break;
1465 case PM_DEFINED_NODE:
1466 type = "defined?";
1467 length = 8;
1468 break;
1469 case PM_FALSE_NODE:
1470 type = "false";
1471 length = 5;
1472 break;
1473 case PM_FLOAT_NODE:
1474 case PM_IMAGINARY_NODE:
1475 case PM_INTEGER_NODE:
1476 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1477 case PM_INTERPOLATED_STRING_NODE:
1478 case PM_RATIONAL_NODE:
1479 case PM_REGULAR_EXPRESSION_NODE:
1480 case PM_SOURCE_ENCODING_NODE:
1481 case PM_SOURCE_FILE_NODE:
1482 case PM_SOURCE_LINE_NODE:
1483 case PM_STRING_NODE:
1484 case PM_SYMBOL_NODE:
1485 type = "a literal";
1486 length = 9;
1487 break;
1488 case PM_NIL_NODE:
1489 type = "nil";
1490 length = 3;
1491 break;
1492 case PM_RANGE_NODE: {
1493 const pm_range_node_t *cast = (const pm_range_node_t *) node;
1494
1495 if (PM_NODE_FLAG_P(cast, PM_RANGE_FLAGS_EXCLUDE_END)) {
1496 type = "...";
1497 length = 3;
1498 } else {
1499 type = "..";
1500 length = 2;
1501 }
1502
1503 break;
1504 }
1505 case PM_SELF_NODE:
1506 type = "self";
1507 length = 4;
1508 break;
1509 case PM_TRUE_NODE:
1510 type = "true";
1511 length = 4;
1512 break;
1513 default:
1514 break;
1515 }
1516
1517 if (type != NULL) {
1518 PM_PARSER_WARN_NODE_FORMAT(parser, node, PM_WARN_VOID_STATEMENT, length, type);
1519 }
1520}
1521
1526static void
1527pm_void_statements_check(pm_parser_t *parser, const pm_statements_node_t *node, bool last_value) {
1528 assert(node->body.size > 0);
1529 const size_t size = node->body.size - (last_value ? 1 : 0);
1530 for (size_t index = 0; index < size; index++) {
1531 pm_void_statement_check(parser, node->body.nodes[index]);
1532 }
1533}
1534
1540typedef enum {
1541 PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL,
1542 PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP,
1543 PM_CONDITIONAL_PREDICATE_TYPE_NOT
1544} pm_conditional_predicate_type_t;
1545
1549static void
1550pm_parser_warn_conditional_predicate_literal(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type, pm_diagnostic_id_t diag_id, const char *prefix) {
1551 switch (type) {
1552 case PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL:
1553 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "condition");
1554 break;
1555 case PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP:
1556 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "flip-flop");
1557 break;
1558 case PM_CONDITIONAL_PREDICATE_TYPE_NOT:
1559 break;
1560 }
1561}
1562
1567static bool
1568pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1569 switch (PM_NODE_TYPE(node)) {
1570 case PM_ARRAY_NODE: {
1571 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1572
1573 const pm_array_node_t *cast = (const pm_array_node_t *) node;
1574 for (size_t index = 0; index < cast->elements.size; index++) {
1575 if (!pm_conditional_predicate_warn_write_literal_p(cast->elements.nodes[index])) return false;
1576 }
1577
1578 return true;
1579 }
1580 case PM_HASH_NODE: {
1581 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1582
1583 const pm_hash_node_t *cast = (const pm_hash_node_t *) node;
1584 for (size_t index = 0; index < cast->elements.size; index++) {
1585 const pm_node_t *element = cast->elements.nodes[index];
1586 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) return false;
1587
1588 const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element;
1589 if (!pm_conditional_predicate_warn_write_literal_p(assoc->key) || !pm_conditional_predicate_warn_write_literal_p(assoc->value)) return false;
1590 }
1591
1592 return true;
1593 }
1594 case PM_FALSE_NODE:
1595 case PM_FLOAT_NODE:
1596 case PM_IMAGINARY_NODE:
1597 case PM_INTEGER_NODE:
1598 case PM_NIL_NODE:
1599 case PM_RATIONAL_NODE:
1600 case PM_REGULAR_EXPRESSION_NODE:
1601 case PM_SOURCE_ENCODING_NODE:
1602 case PM_SOURCE_FILE_NODE:
1603 case PM_SOURCE_LINE_NODE:
1604 case PM_STRING_NODE:
1605 case PM_SYMBOL_NODE:
1606 case PM_TRUE_NODE:
1607 return true;
1608 default:
1609 return false;
1610 }
1611}
1612
1617static PRISM_INLINE void
1618pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1619 if (pm_conditional_predicate_warn_write_literal_p(node)) {
1620 pm_parser_warn_node(parser, node, parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1621 }
1622}
1623
1636static void
1637pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type) {
1638 switch (PM_NODE_TYPE(node)) {
1639 case PM_AND_NODE: {
1640 pm_and_node_t *cast = (pm_and_node_t *) node;
1641 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1642 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1643 break;
1644 }
1645 case PM_OR_NODE: {
1646 pm_or_node_t *cast = (pm_or_node_t *) node;
1647 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1648 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1649 break;
1650 }
1651 case PM_PARENTHESES_NODE: {
1653
1654 if ((cast->body != NULL) && PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE)) {
1655 pm_statements_node_t *statements = (pm_statements_node_t *) cast->body;
1656 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1657 }
1658
1659 break;
1660 }
1661 case PM_BEGIN_NODE: {
1662 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1663 if (cast->statements != NULL) {
1664 pm_statements_node_t *statements = cast->statements;
1665 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1666 }
1667 break;
1668 }
1669 case PM_RANGE_NODE: {
1670 pm_range_node_t *cast = (pm_range_node_t *) node;
1671
1672 if (cast->left != NULL) pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1673 if (cast->right != NULL) pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1674
1675 // Here we change the range node into a flip flop node. We can do
1676 // this since the nodes are exactly the same except for the type.
1677 // We're only asserting against the size when we should probably
1678 // assert against the entire layout, but we'll assume tests will
1679 // catch this.
1680 assert(sizeof(pm_range_node_t) == sizeof(pm_flip_flop_node_t));
1681 node->type = PM_FLIP_FLOP_NODE;
1682
1683 break;
1684 }
1685 case PM_REGULAR_EXPRESSION_NODE:
1686 // Here we change the regular expression node into a match last line
1687 // node. We can do this since the nodes are exactly the same except
1688 // for the type.
1690 node->type = PM_MATCH_LAST_LINE_NODE;
1691
1692 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1693 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "regex ");
1694 }
1695
1696 break;
1697 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1698 // Here we change the interpolated regular expression node into an
1699 // interpolated match last line node. We can do this since the nodes
1700 // are exactly the same except for the type.
1702 node->type = PM_INTERPOLATED_MATCH_LAST_LINE_NODE;
1703
1704 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1705 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "regex ");
1706 }
1707
1708 break;
1709 case PM_INTEGER_NODE:
1710 if (type == PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP) {
1711 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1712 pm_parser_warn_node(parser, node, PM_WARN_INTEGER_IN_FLIP_FLOP);
1713 }
1714 } else {
1715 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1716 }
1717 break;
1718 case PM_STRING_NODE:
1719 case PM_SOURCE_FILE_NODE:
1720 case PM_INTERPOLATED_STRING_NODE:
1721 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "string ");
1722 break;
1723 case PM_SYMBOL_NODE:
1724 case PM_INTERPOLATED_SYMBOL_NODE:
1725 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "symbol ");
1726 break;
1727 case PM_SOURCE_LINE_NODE:
1728 case PM_SOURCE_ENCODING_NODE:
1729 case PM_FLOAT_NODE:
1730 case PM_RATIONAL_NODE:
1731 case PM_IMAGINARY_NODE:
1732 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1733 break;
1734 case PM_CLASS_VARIABLE_WRITE_NODE:
1735 pm_conditional_predicate_warn_write_literal(parser, ((pm_class_variable_write_node_t *) node)->value);
1736 break;
1737 case PM_CONSTANT_WRITE_NODE:
1738 pm_conditional_predicate_warn_write_literal(parser, ((pm_constant_write_node_t *) node)->value);
1739 break;
1740 case PM_GLOBAL_VARIABLE_WRITE_NODE:
1741 pm_conditional_predicate_warn_write_literal(parser, ((pm_global_variable_write_node_t *) node)->value);
1742 break;
1743 case PM_INSTANCE_VARIABLE_WRITE_NODE:
1744 pm_conditional_predicate_warn_write_literal(parser, ((pm_instance_variable_write_node_t *) node)->value);
1745 break;
1746 case PM_LOCAL_VARIABLE_WRITE_NODE:
1747 pm_conditional_predicate_warn_write_literal(parser, ((pm_local_variable_write_node_t *) node)->value);
1748 break;
1749 case PM_MULTI_WRITE_NODE:
1750 pm_conditional_predicate_warn_write_literal(parser, ((pm_multi_write_node_t *) node)->value);
1751 break;
1752 default:
1753 break;
1754 }
1755}
1756
1779
1783static PRISM_INLINE const pm_location_t *
1784pm_arguments_end(pm_arguments_t *arguments) {
1785 if (arguments->block != NULL) {
1786 uint32_t end = PM_NODE_END(arguments->block);
1787
1788 if (arguments->closing_loc.length > 0) {
1789 uint32_t arguments_end = PM_LOCATION_END(&arguments->closing_loc);
1790 if (arguments_end > end) {
1791 return &arguments->closing_loc;
1792 }
1793 }
1794 return &arguments->block->location;
1795 }
1796 if (arguments->closing_loc.length > 0) {
1797 return &arguments->closing_loc;
1798 }
1799 if (arguments->arguments != NULL) {
1800 return &arguments->arguments->base.location;
1801 }
1802 if (arguments->opening_loc.length > 0) {
1803 return &arguments->opening_loc;
1804 }
1805 return NULL;
1806}
1807
1812static void
1813pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
1814 // First, check that we have arguments and that we don't have a closing
1815 // location for them.
1816 if (arguments->arguments == NULL || arguments->closing_loc.length > 0) {
1817 return;
1818 }
1819
1820 // Next, check that we don't have a single parentheses argument. This would
1821 // look like:
1822 //
1823 // foo (1) {}
1824 //
1825 // In this case, it's actually okay for the block to be attached to the
1826 // call, even though it looks like it's attached to the argument.
1827 if (arguments->arguments->arguments.size == 1 && PM_NODE_TYPE_P(arguments->arguments->arguments.nodes[0], PM_PARENTHESES_NODE)) {
1828 return;
1829 }
1830
1831 // If we didn't hit a case before this check, then at this point we need to
1832 // add a syntax error.
1833 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
1834}
1835
1836/******************************************************************************/
1837/* Basic character checks */
1838/******************************************************************************/
1839
1846static PRISM_INLINE size_t
1847char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1848 if (n <= 0) return 0;
1849
1850 if (parser->encoding_changed) {
1851 size_t width;
1852
1853 if ((width = parser->encoding->alpha_char(b, n)) != 0) {
1854 return width;
1855 } else if (*b == '_') {
1856 return 1;
1857 } else if (*b >= 0x80) {
1858 return parser->encoding->char_width(b, n);
1859 } else {
1860 return 0;
1861 }
1862 } else if (*b < 0x80) {
1863 return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
1864 } else {
1865 return pm_encoding_utf_8_char_width(b, n);
1866 }
1867}
1868
1873static PRISM_INLINE size_t
1874char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
1875 if (n <= 0) {
1876 return 0;
1877 } else if (*b < 0x80) {
1878 return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
1879 } else {
1880 return pm_encoding_utf_8_char_width(b, n);
1881 }
1882}
1883
1897#if defined(PRISM_HAS_NEON)
1898#include <arm_neon.h>
1899
1900static PRISM_INLINE size_t
1901scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
1902 const uint8_t *cursor = start;
1903
1904 // Nibble-based lookup tables for classifying [a-zA-Z0-9_].
1905 // Each high nibble is assigned a unique bit; the low nibble table
1906 // contains the OR of bits for all high nibbles that have an
1907 // identifier character at that low nibble position. A byte is an
1908 // identifier character iff (low_lut[lo] & high_lut[hi]) != 0.
1909 static const uint8_t low_lut_data[16] = {
1910 0x15, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F,
1911 0x1F, 0x1F, 0x1E, 0x0A, 0x0A, 0x0A, 0x0A, 0x0E
1912 };
1913 static const uint8_t high_lut_data[16] = {
1914 0x00, 0x00, 0x00, 0x01, 0x02, 0x04, 0x08, 0x10,
1915 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1916 };
1917 const uint8x16_t low_lut = vld1q_u8(low_lut_data);
1918 const uint8x16_t high_lut = vld1q_u8(high_lut_data);
1919 const uint8x16_t mask_0f = vdupq_n_u8(0x0F);
1920
1921 while (cursor + 16 <= end) {
1922 uint8x16_t v = vld1q_u8(cursor);
1923
1924 uint8x16_t lo_class = vqtbl1q_u8(low_lut, vandq_u8(v, mask_0f));
1925 uint8x16_t hi_class = vqtbl1q_u8(high_lut, vshrq_n_u8(v, 4));
1926 uint8x16_t ident = vandq_u8(lo_class, hi_class);
1927
1928 // Fast check: if the per-byte minimum is nonzero, every byte matched.
1929 if (vminvq_u8(ident) != 0) {
1930 cursor += 16;
1931 continue;
1932 }
1933
1934 // Find the first non-identifier byte (zero in ident).
1935 uint8x16_t is_zero = vceqq_u8(ident, vdupq_n_u8(0));
1936 uint64_t lo = vgetq_lane_u64(vreinterpretq_u64_u8(is_zero), 0);
1937
1938 if (lo != 0) {
1939 cursor += pm_ctzll(lo) / 8;
1940 } else {
1941 uint64_t hi = vgetq_lane_u64(vreinterpretq_u64_u8(is_zero), 1);
1942 cursor += 8 + pm_ctzll(hi) / 8;
1943 }
1944
1945 return (size_t) (cursor - start);
1946 }
1947
1948 return (size_t) (cursor - start);
1949}
1950
1951#elif defined(PRISM_HAS_SSSE3)
1952#include <tmmintrin.h>
1953
1954static PRISM_INLINE size_t
1955scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
1956 const uint8_t *cursor = start;
1957
1958 while (cursor + 16 <= end) {
1959 __m128i v = _mm_loadu_si128((const __m128i *) cursor);
1960 __m128i zero = _mm_setzero_si128();
1961
1962 // Unsigned range check via saturating subtraction:
1963 // byte >= lo ⟺ saturate(lo - byte) == 0
1964 // byte <= hi ⟺ saturate(byte - hi) == 0
1965
1966 // Fold case: OR with 0x20 maps A-Z to a-z.
1967 __m128i lowered = _mm_or_si128(v, _mm_set1_epi8(0x20));
1968 __m128i letter = _mm_and_si128(
1969 _mm_cmpeq_epi8(_mm_subs_epu8(_mm_set1_epi8(0x61), lowered), zero),
1970 _mm_cmpeq_epi8(_mm_subs_epu8(lowered, _mm_set1_epi8(0x7A)), zero));
1971
1972 __m128i digit = _mm_and_si128(
1973 _mm_cmpeq_epi8(_mm_subs_epu8(_mm_set1_epi8(0x30), v), zero),
1974 _mm_cmpeq_epi8(_mm_subs_epu8(v, _mm_set1_epi8(0x39)), zero));
1975
1976 __m128i underscore = _mm_cmpeq_epi8(v, _mm_set1_epi8(0x5F));
1977
1978 __m128i ident = _mm_or_si128(_mm_or_si128(letter, digit), underscore);
1979 int mask = _mm_movemask_epi8(ident);
1980
1981 if (mask == 0xFFFF) {
1982 cursor += 16;
1983 continue;
1984 }
1985
1986 cursor += pm_ctzll((uint64_t) (~mask & 0xFFFF));
1987 return (size_t) (cursor - start);
1988 }
1989
1990 return (size_t) (cursor - start);
1991}
1992
1993// The SWAR path uses pm_ctzll to find the first non-matching byte within a
1994// word, which only yields the correct byte index on little-endian targets.
1995// We gate on a positive little-endian check so that unknown-endianness
1996// platforms safely fall through to the no-op fallback.
1997#elif defined(PRISM_HAS_SWAR)
1998
2008static PRISM_INLINE size_t
2009scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
2010 static const uint64_t ones = 0x0101010101010101ULL;
2011 static const uint64_t highs = 0x8080808080808080ULL;
2012 const uint8_t *cursor = start;
2013
2014 while (cursor + 8 <= end) {
2015 uint64_t word;
2016 memcpy(&word, cursor, 8);
2017
2018 // Bail on any non-ASCII byte.
2019 if (word & highs) break;
2020
2021 uint64_t digit = ((word | highs) - ones * 0x30) & ((ones * 0x39 | highs) - word) & highs;
2022
2023 // Fold upper- and lowercase together by forcing bit 5 (OR 0x20),
2024 // then check the lowercase range once. A-Z maps to a-z; the
2025 // only non-letter byte that could alias into [0x61,0x7A] is one
2026 // whose original value was in [0x41,0x5A] — which is exactly
2027 // the uppercase letters we want to match.
2028 uint64_t lowered = word | (ones * 0x20);
2029 uint64_t letter = ((lowered | highs) - ones * 0x61) & ((ones * 0x7A | highs) - lowered) & highs;
2030
2031 // Standard SWAR "has zero byte" idiom on (word XOR 0x5F) to find
2032 // bytes equal to underscore. Safe from cross-byte borrows because
2033 // the ASCII guard above ensures all bytes are < 0x80.
2034 uint64_t xor_us = word ^ (ones * 0x5F);
2035 uint64_t underscore = (xor_us - ones) & ~xor_us & highs;
2036
2037 uint64_t ident = digit | letter | underscore;
2038
2039 if (ident == highs) {
2040 cursor += 8;
2041 continue;
2042 }
2043
2044 // Find the first non-identifier byte. On little-endian the first
2045 // byte sits in the least-significant position.
2046 uint64_t not_ident = ~ident & highs;
2047 cursor += pm_ctzll(not_ident) / 8;
2048 return (size_t) (cursor - start);
2049 }
2050
2051 return (size_t) (cursor - start);
2052}
2053
2054#else
2055
2056// No-op fallback for big-endian or other unsupported platforms.
2057// The caller's byte-at-a-time loop handles everything.
2058#define scan_identifier_ascii(start, end) ((size_t) 0)
2059
2060#endif
2061
2067static PRISM_INLINE size_t
2068char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
2069 if (n <= 0) {
2070 return 0;
2071 } else if (parser->encoding_changed) {
2072 size_t width;
2073
2074 if ((width = parser->encoding->alnum_char(b, n)) != 0) {
2075 return width;
2076 } else if (*b == '_') {
2077 return 1;
2078 } else if (*b >= 0x80) {
2079 return parser->encoding->char_width(b, n);
2080 } else {
2081 return 0;
2082 }
2083 } else {
2084 return char_is_identifier_utf8(b, n);
2085 }
2086}
2087
2088// Here we're defining a perfect hash for the characters that are allowed in
2089// global names. This is used to quickly check the next character after a $ to
2090// see if it's a valid character for a global name.
2091#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
2092#define PUNCT(idx) ( \
2093 BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
2094 BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
2095 BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
2096 BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
2097 BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
2098 BIT('0', idx))
2099
2100const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
2101
2102#undef BIT
2103#undef PUNCT
2104
2105static PRISM_INLINE bool
2106char_is_global_name_punctuation(const uint8_t b) {
2107 const unsigned int i = (const unsigned int) b;
2108 if (i <= 0x20 || 0x7e < i) return false;
2109
2110 return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
2111}
2112
2113static PRISM_INLINE bool
2114token_is_setter_name(pm_token_t *token) {
2115 return (
2116 (token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
2117 ((token->type == PM_TOKEN_IDENTIFIER) &&
2118 (token->end - token->start >= 2) &&
2119 (token->end[-1] == '='))
2120 );
2121}
2122
2126static bool
2127pm_local_is_keyword(const char *source, size_t length) {
2128#define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
2129
2130 switch (length) {
2131 case 2:
2132 switch (source[0]) {
2133 case 'd': KEYWORD("do"); return false;
2134 case 'i': KEYWORD("if"); KEYWORD("in"); return false;
2135 case 'o': KEYWORD("or"); return false;
2136 default: return false;
2137 }
2138 case 3:
2139 switch (source[0]) {
2140 case 'a': KEYWORD("and"); return false;
2141 case 'd': KEYWORD("def"); return false;
2142 case 'e': KEYWORD("end"); return false;
2143 case 'f': KEYWORD("for"); return false;
2144 case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
2145 default: return false;
2146 }
2147 case 4:
2148 switch (source[0]) {
2149 case 'c': KEYWORD("case"); return false;
2150 case 'e': KEYWORD("else"); return false;
2151 case 'n': KEYWORD("next"); return false;
2152 case 'r': KEYWORD("redo"); return false;
2153 case 's': KEYWORD("self"); return false;
2154 case 't': KEYWORD("then"); KEYWORD("true"); return false;
2155 case 'w': KEYWORD("when"); return false;
2156 default: return false;
2157 }
2158 case 5:
2159 switch (source[0]) {
2160 case 'a': KEYWORD("alias"); return false;
2161 case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
2162 case 'c': KEYWORD("class"); return false;
2163 case 'e': KEYWORD("elsif"); return false;
2164 case 'f': KEYWORD("false"); return false;
2165 case 'r': KEYWORD("retry"); return false;
2166 case 's': KEYWORD("super"); return false;
2167 case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
2168 case 'w': KEYWORD("while"); return false;
2169 case 'y': KEYWORD("yield"); return false;
2170 default: return false;
2171 }
2172 case 6:
2173 switch (source[0]) {
2174 case 'e': KEYWORD("ensure"); return false;
2175 case 'm': KEYWORD("module"); return false;
2176 case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
2177 case 'u': KEYWORD("unless"); return false;
2178 default: return false;
2179 }
2180 case 8:
2181 KEYWORD("__LINE__");
2182 KEYWORD("__FILE__");
2183 return false;
2184 case 12:
2185 KEYWORD("__ENCODING__");
2186 return false;
2187 default:
2188 return false;
2189 }
2190
2191#undef KEYWORD
2192}
2193
2194/******************************************************************************/
2195/* Node flag handling functions */
2196/******************************************************************************/
2197
2201static PRISM_INLINE void
2202pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
2203 node->flags |= flag;
2204}
2205
2209static PRISM_INLINE void
2210pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
2211 node->flags &= (pm_node_flags_t) ~flag;
2212}
2213
2217static PRISM_INLINE void
2218pm_node_flag_set_repeated_parameter(pm_node_t *node) {
2219 assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
2220 PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
2221 PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
2222 PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
2223 PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
2224 PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
2225 PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
2226 PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
2227
2228 pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
2229}
2230
2231/******************************************************************************/
2232/* Node creation functions */
2233/******************************************************************************/
2234
2240#define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
2241
2245static PRISM_INLINE pm_node_flags_t
2246pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
2247 pm_node_flags_t flags = 0;
2248
2249 if (closing->type == PM_TOKEN_REGEXP_END) {
2250 pm_buffer_t unknown_flags = { 0 };
2251
2252 for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
2253 switch (*flag) {
2254 case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
2255 case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
2256 case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
2257 case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
2258
2259 case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
2260 case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
2261 case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
2262 case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
2263
2264 default: pm_buffer_append_byte(&unknown_flags, *flag);
2265 }
2266 }
2267
2268 size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
2269 if (unknown_flags_length != 0) {
2270 const char *word = unknown_flags_length >= 2 ? "options" : "option";
2271 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
2272 }
2273 pm_buffer_cleanup(&unknown_flags);
2274 }
2275
2276 return flags;
2277}
2278
2279#undef PM_REGULAR_EXPRESSION_ENCODING_MASK
2280
2281static pm_statements_node_t *
2282pm_statements_node_create(pm_parser_t *parser);
2283
2284static void
2285pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline);
2286
2287static size_t
2288pm_statements_node_body_length(pm_statements_node_t *node);
2289
2294static PRISM_INLINE void
2295pm_integer_arena_move(pm_arena_t *arena, pm_integer_t *integer) {
2296 if (integer->values != NULL) {
2297 size_t byte_size = integer->length * sizeof(uint32_t);
2298 uint32_t *old_values = integer->values;
2299 integer->values = (uint32_t *) pm_arena_memdup(arena, old_values, byte_size, PRISM_ALIGNOF(uint32_t));
2300 xfree(old_values);
2301 }
2302}
2303
2307static pm_error_recovery_node_t *
2308pm_error_recovery_node_create(pm_parser_t *parser, uint32_t start, uint32_t length) {
2309 return pm_error_recovery_node_new(
2310 parser->arena,
2311 ++parser->node_id,
2312 0,
2313 ((pm_location_t) { .start = start, .length = length }),
2314 NULL
2315 );
2316}
2317
2321static pm_error_recovery_node_t *
2322pm_error_recovery_node_create_unexpected(pm_parser_t *parser, pm_node_t *unexpected) {
2323 return pm_error_recovery_node_new(
2324 parser->arena,
2325 ++parser->node_id,
2326 0,
2327 unexpected->location,
2328 unexpected
2329 );
2330}
2331
2335static pm_alias_global_variable_node_t *
2336pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
2337 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
2338
2339 return pm_alias_global_variable_node_new(
2340 parser->arena,
2341 ++parser->node_id,
2342 0,
2343 PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, old_name),
2344 new_name,
2345 old_name,
2346 TOK2LOC(parser, keyword)
2347 );
2348}
2349
2353static pm_alias_method_node_t *
2354pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
2355 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
2356
2357 return pm_alias_method_node_new(
2358 parser->arena,
2359 ++parser->node_id,
2360 0,
2361 PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, old_name),
2362 new_name,
2363 old_name,
2364 TOK2LOC(parser, keyword)
2365 );
2366}
2367
2371static pm_alternation_pattern_node_t *
2372pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
2373 return pm_alternation_pattern_node_new(
2374 parser->arena,
2375 ++parser->node_id,
2376 0,
2377 PM_LOCATION_INIT_NODES(left, right),
2378 left,
2379 right,
2380 TOK2LOC(parser, operator)
2381 );
2382}
2383
2387static pm_and_node_t *
2388pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
2389 pm_assert_value_expression(parser, left);
2390
2391 return pm_and_node_new(
2392 parser->arena,
2393 ++parser->node_id,
2394 0,
2395 PM_LOCATION_INIT_NODES(left, right),
2396 left,
2397 right,
2398 TOK2LOC(parser, operator)
2399 );
2400}
2401
2405static pm_arguments_node_t *
2406pm_arguments_node_create(pm_parser_t *parser) {
2407 return pm_arguments_node_new(
2408 parser->arena,
2409 ++parser->node_id,
2410 0,
2411 PM_LOCATION_INIT_UNSET,
2412 ((pm_node_list_t) { 0 })
2413 );
2414}
2415
2419static size_t
2420pm_arguments_node_size(pm_arguments_node_t *node) {
2421 return node->arguments.size;
2422}
2423
2427static void
2428pm_arguments_node_arguments_append(pm_arena_t *arena, pm_arguments_node_t *node, pm_node_t *argument) {
2429 if (pm_arguments_node_size(node) == 0) {
2430 PM_NODE_START_SET_NODE(node, argument);
2431 }
2432
2433 if (PM_NODE_END(node) < PM_NODE_END(argument)) {
2434 PM_NODE_LENGTH_SET_NODE(node, argument);
2435 }
2436
2437 pm_node_list_append(arena, &node->arguments, argument);
2438
2439 if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
2440 if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
2441 pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
2442 } else {
2443 pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
2444 }
2445 }
2446}
2447
2451static pm_array_node_t *
2452pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
2453 if (opening == NULL) {
2454 return pm_array_node_new(
2455 parser->arena,
2456 ++parser->node_id,
2457 PM_NODE_FLAG_STATIC_LITERAL,
2458 PM_LOCATION_INIT_UNSET,
2459 ((pm_node_list_t) { 0 }),
2460 ((pm_location_t) { 0 }),
2461 ((pm_location_t) { 0 })
2462 );
2463 } else {
2464 return pm_array_node_new(
2465 parser->arena,
2466 ++parser->node_id,
2467 PM_NODE_FLAG_STATIC_LITERAL,
2468 PM_LOCATION_INIT_TOKEN(parser, opening),
2469 ((pm_node_list_t) { 0 }),
2470 TOK2LOC(parser, opening),
2471 TOK2LOC(parser, opening)
2472 );
2473 }
2474}
2475
2479static PRISM_INLINE void
2480pm_array_node_elements_append(pm_arena_t *arena, pm_array_node_t *node, pm_node_t *element) {
2481 if (!node->elements.size && !node->opening_loc.length) {
2482 PM_NODE_START_SET_NODE(node, element);
2483 }
2484
2485 pm_node_list_append(arena, &node->elements, element);
2486 PM_NODE_LENGTH_SET_NODE(node, element);
2487
2488 // If the element is not a static literal, then the array is not a static
2489 // literal. Turn that flag off.
2490 if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
2491 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
2492 }
2493
2494 if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
2495 pm_node_flag_set(UP(node), PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
2496 }
2497}
2498
2502static void
2503pm_array_node_close_set(const pm_parser_t *parser, pm_array_node_t *node, const pm_token_t *closing) {
2504 assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == 0);
2505 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
2506 node->closing_loc = TOK2LOC(parser, closing);
2507}
2508
2513static pm_array_pattern_node_t *
2514pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
2515 pm_array_pattern_node_t *node = pm_array_pattern_node_new(
2516 parser->arena,
2517 ++parser->node_id,
2518 0,
2519 PM_LOCATION_INIT_NODES(nodes->nodes[0], nodes->nodes[nodes->size - 1]),
2520 NULL,
2521 ((pm_node_list_t) { 0 }),
2522 NULL,
2523 ((pm_node_list_t) { 0 }),
2524 ((pm_location_t) { 0 }),
2525 ((pm_location_t) { 0 })
2526 );
2527
2528 // For now we're going to just copy over each pointer manually. This could be
2529 // much more efficient, as we could instead resize the node list.
2530 bool found_rest = false;
2531 pm_node_t *child;
2532
2533 PM_NODE_LIST_FOREACH(nodes, index, child) {
2534 if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
2535 node->rest = child;
2536 found_rest = true;
2537 } else if (found_rest) {
2538 pm_node_list_append(parser->arena, &node->posts, child);
2539 } else {
2540 pm_node_list_append(parser->arena, &node->requireds, child);
2541 }
2542 }
2543
2544 return node;
2545}
2546
2550static pm_array_pattern_node_t *
2551pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
2552 return pm_array_pattern_node_new(
2553 parser->arena,
2554 ++parser->node_id,
2555 0,
2556 PM_LOCATION_INIT_NODE(rest),
2557 NULL,
2558 ((pm_node_list_t) { 0 }),
2559 rest,
2560 ((pm_node_list_t) { 0 }),
2561 ((pm_location_t) { 0 }),
2562 ((pm_location_t) { 0 })
2563 );
2564}
2565
2570static pm_array_pattern_node_t *
2571pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
2572 return pm_array_pattern_node_new(
2573 parser->arena,
2574 ++parser->node_id,
2575 0,
2576 PM_LOCATION_INIT_NODE_TOKEN(parser, constant, closing),
2577 constant,
2578 ((pm_node_list_t) { 0 }),
2579 NULL,
2580 ((pm_node_list_t) { 0 }),
2581 TOK2LOC(parser, opening),
2582 TOK2LOC(parser, closing)
2583 );
2584}
2585
2590static pm_array_pattern_node_t *
2591pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
2592 return pm_array_pattern_node_new(
2593 parser->arena,
2594 ++parser->node_id,
2595 0,
2596 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
2597 NULL,
2598 ((pm_node_list_t) { 0 }),
2599 NULL,
2600 ((pm_node_list_t) { 0 }),
2601 TOK2LOC(parser, opening),
2602 TOK2LOC(parser, closing)
2603 );
2604}
2605
2606static PRISM_INLINE void
2607pm_array_pattern_node_requireds_append(pm_arena_t *arena, pm_array_pattern_node_t *node, pm_node_t *inner) {
2608 pm_node_list_append(arena, &node->requireds, inner);
2609}
2610
2614static pm_assoc_node_t *
2615pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
2616 uint32_t end;
2617
2618 if (value != NULL && PM_NODE_END(value) > PM_NODE_END(key)) {
2619 end = PM_NODE_END(value);
2620 } else if (operator != NULL) {
2621 end = PM_TOKEN_END(parser, operator);
2622 } else {
2623 end = PM_NODE_END(key);
2624 }
2625
2626 // Hash string keys will be frozen, so we can mark them as frozen here so
2627 // that the compiler picks them up and also when we check for static literal
2628 // on the keys it gets factored in.
2629 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
2630 key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
2631 }
2632
2633 // If the key and value of this assoc node are both static literals, then
2634 // we can mark this node as a static literal.
2635 pm_node_flags_t flags = 0;
2636 if (
2637 !PM_NODE_TYPE_P(key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(key, PM_HASH_NODE) && !PM_NODE_TYPE_P(key, PM_RANGE_NODE) &&
2638 value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)
2639 ) {
2640 flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
2641 }
2642
2643 return pm_assoc_node_new(
2644 parser->arena,
2645 ++parser->node_id,
2646 flags,
2647 ((pm_location_t) { .start = PM_NODE_START(key), .length = U32(end - PM_NODE_START(key)) }),
2648 key,
2649 value,
2650 NTOK2LOC(parser, operator)
2651 );
2652}
2653
2657static pm_assoc_splat_node_t *
2658pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
2659 assert(operator->type == PM_TOKEN_USTAR_STAR);
2660
2661 return pm_assoc_splat_node_new(
2662 parser->arena,
2663 ++parser->node_id,
2664 0,
2665 (value == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, value),
2666 value,
2667 TOK2LOC(parser, operator)
2668 );
2669}
2670
2674static pm_back_reference_read_node_t *
2675pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
2676 assert(name->type == PM_TOKEN_BACK_REFERENCE);
2677
2678 return pm_back_reference_read_node_new(
2679 parser->arena,
2680 ++parser->node_id,
2681 0,
2682 PM_LOCATION_INIT_TOKEN(parser, name),
2683 pm_parser_constant_id_token(parser, name)
2684 );
2685}
2686
2690static pm_begin_node_t *
2691pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
2692 uint32_t start = begin_keyword == NULL ? 0 : PM_TOKEN_START(parser, begin_keyword);
2693 uint32_t end = statements == NULL ? (begin_keyword == NULL ? 0 : PM_TOKEN_END(parser, begin_keyword)) : PM_NODE_END(statements);
2694
2695 return pm_begin_node_new(
2696 parser->arena,
2697 ++parser->node_id,
2698 0,
2699 ((pm_location_t) { .start = start, .length = U32(end - start) }),
2700 NTOK2LOC(parser, begin_keyword),
2701 statements,
2702 NULL,
2703 NULL,
2704 NULL,
2705 ((pm_location_t) { 0 })
2706 );
2707}
2708
2712static void
2713pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
2714 if (node->begin_keyword_loc.length == 0) {
2715 PM_NODE_START_SET_NODE(node, rescue_clause);
2716 }
2717 PM_NODE_LENGTH_SET_NODE(node, rescue_clause);
2718 node->rescue_clause = rescue_clause;
2719}
2720
2724static void
2725pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
2726 if ((node->begin_keyword_loc.length == 0) && PM_NODE_START(node) == 0) {
2727 PM_NODE_START_SET_NODE(node, else_clause);
2728 }
2729 PM_NODE_LENGTH_SET_NODE(node, else_clause);
2730 node->else_clause = else_clause;
2731}
2732
2736static void
2737pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
2738 if ((node->begin_keyword_loc.length == 0) && PM_NODE_START(node) == 0) {
2739 PM_NODE_START_SET_NODE(node, ensure_clause);
2740 }
2741 PM_NODE_LENGTH_SET_NODE(node, ensure_clause);
2742 node->ensure_clause = ensure_clause;
2743}
2744
2748static void
2749pm_begin_node_end_keyword_set(const pm_parser_t *parser, pm_begin_node_t *node, const pm_token_t *end_keyword) {
2750 assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == 0);
2751 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
2752 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
2753}
2754
2758static pm_block_argument_node_t *
2759pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
2760 assert(operator->type == PM_TOKEN_UAMPERSAND);
2761
2762 return pm_block_argument_node_new(
2763 parser->arena,
2764 ++parser->node_id,
2765 0,
2766 (expression == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, expression),
2767 expression,
2768 TOK2LOC(parser, operator)
2769 );
2770}
2771
2775static pm_block_node_t *
2776pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
2777 return pm_block_node_new(
2778 parser->arena,
2779 ++parser->node_id,
2780 0,
2781 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
2782 *locals,
2783 parameters,
2784 body,
2785 TOK2LOC(parser, opening),
2786 TOK2LOC(parser, closing)
2787 );
2788}
2789
2793static pm_block_parameter_node_t *
2794pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
2795 assert(operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
2796
2797 return pm_block_parameter_node_new(
2798 parser->arena,
2799 ++parser->node_id,
2800 0,
2801 (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
2802 name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
2803 NTOK2LOC(parser, name),
2804 TOK2LOC(parser, operator)
2805 );
2806}
2807
2811static pm_block_parameters_node_t *
2812pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
2813 uint32_t start;
2814 if (opening != NULL) {
2815 start = PM_TOKEN_START(parser, opening);
2816 } else if (parameters != NULL) {
2817 start = PM_NODE_START(parameters);
2818 } else {
2819 start = 0;
2820 }
2821
2822 uint32_t end;
2823 if (parameters != NULL) {
2824 end = PM_NODE_END(parameters);
2825 } else if (opening != NULL) {
2826 end = PM_TOKEN_END(parser, opening);
2827 } else {
2828 end = 0;
2829 }
2830
2831 return pm_block_parameters_node_new(
2832 parser->arena,
2833 ++parser->node_id,
2834 0,
2835 ((pm_location_t) { .start = start, .length = U32(end - start) }),
2836 parameters,
2837 ((pm_node_list_t) { 0 }),
2838 NTOK2LOC(parser, opening),
2839 ((pm_location_t) { 0 })
2840 );
2841}
2842
2846static void
2847pm_block_parameters_node_closing_set(const pm_parser_t *parser, pm_block_parameters_node_t *node, const pm_token_t *closing) {
2848 assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == 0);
2849 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
2850 node->closing_loc = TOK2LOC(parser, closing);
2851}
2852
2856static pm_block_local_variable_node_t *
2857pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
2858 return pm_block_local_variable_node_new(
2859 parser->arena,
2860 ++parser->node_id,
2861 0,
2862 PM_LOCATION_INIT_TOKEN(parser, name),
2863 pm_parser_constant_id_token(parser, name)
2864 );
2865}
2866
2870static void
2871pm_block_parameters_node_append_local(pm_arena_t *arena, pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
2872 pm_node_list_append(arena, &node->locals, UP(local));
2873
2874 if (PM_NODE_LENGTH(node) == 0) {
2875 PM_NODE_START_SET_NODE(node, local);
2876 }
2877
2878 PM_NODE_LENGTH_SET_NODE(node, local);
2879}
2880
2884static pm_break_node_t *
2885pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
2886 assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
2887
2888 return pm_break_node_new(
2889 parser->arena,
2890 ++parser->node_id,
2891 0,
2892 (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
2893 arguments,
2894 TOK2LOC(parser, keyword)
2895 );
2896}
2897
2898// There are certain flags that we want to use internally but don't want to
2899// expose because they are not relevant beyond parsing. Therefore we'll define
2900// them here and not define them in config.yml/a header file.
2901static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = (1 << 2);
2902
2903static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = ((PM_CALL_NODE_FLAGS_LAST - 1) << 1);
2904static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = ((PM_CALL_NODE_FLAGS_LAST - 1) << 2);
2905static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = ((PM_CALL_NODE_FLAGS_LAST - 1) << 3);
2906
2912static pm_call_node_t *
2913pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
2914 return pm_call_node_new(
2915 parser->arena,
2916 ++parser->node_id,
2917 flags,
2918 PM_LOCATION_INIT_UNSET,
2919 NULL,
2920 ((pm_location_t) { 0 }),
2921 0,
2922 ((pm_location_t) { 0 }),
2923 ((pm_location_t) { 0 }),
2924 NULL,
2925 ((pm_location_t) { 0 }),
2926 ((pm_location_t) { 0 }),
2927 NULL
2928 );
2929}
2930
2935static PRISM_INLINE pm_node_flags_t
2936pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
2937 return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
2938}
2939
2944static pm_call_node_t *
2945pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
2946 pm_assert_value_expression(parser, receiver);
2947
2948 pm_node_flags_t flags = pm_call_node_ignore_visibility_flag(receiver);
2949 if (arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_ARGUMENT_NODE)) {
2950 flags |= PM_CALL_NODE_FLAGS_INDEX;
2951 }
2952
2953 pm_call_node_t *node = pm_call_node_create(parser, flags);
2954
2955 PM_NODE_START_SET_NODE(node, receiver);
2956
2957 const pm_location_t *end = pm_arguments_end(arguments);
2958 assert(end != NULL && "unreachable");
2959 PM_NODE_LENGTH_SET_LOCATION(node, end);
2960
2961 node->receiver = receiver;
2962 node->message_loc.start = arguments->opening_loc.start;
2963 node->message_loc.length = (arguments->closing_loc.start + arguments->closing_loc.length) - arguments->opening_loc.start;
2964
2965 node->opening_loc = arguments->opening_loc;
2966 node->arguments = arguments->arguments;
2967 node->closing_loc = arguments->closing_loc;
2968 node->block = arguments->block;
2969
2970 node->name = pm_parser_constant_id_constant(parser, "[]", 2);
2971 return node;
2972}
2973
2977static pm_call_node_t *
2978pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument, pm_node_flags_t flags) {
2979 pm_assert_value_expression(parser, receiver);
2980 pm_assert_value_expression(parser, argument);
2981
2982 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
2983
2984 PM_NODE_START_SET_NODE(node, PM_NODE_START(receiver) < PM_NODE_START(argument) ? receiver : argument);
2985 PM_NODE_LENGTH_SET_NODE(node, PM_NODE_END(receiver) > PM_NODE_END(argument) ? receiver : argument);
2986
2987 node->receiver = receiver;
2988 node->message_loc = TOK2LOC(parser, operator);
2989
2990 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
2991 pm_arguments_node_arguments_append(parser->arena, arguments, argument);
2992 node->arguments = arguments;
2993
2994 node->name = pm_parser_constant_id_token(parser, operator);
2995 return node;
2996}
2997
2998static const uint8_t * parse_operator_symbol_name(const pm_token_t *);
2999
3003static pm_call_node_t *
3004pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
3005 pm_assert_value_expression(parser, receiver);
3006
3007 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
3008
3009 PM_NODE_START_SET_NODE(node, receiver);
3010 const pm_location_t *end = pm_arguments_end(arguments);
3011 if (end == NULL) {
3012 PM_NODE_LENGTH_SET_TOKEN(parser, node, message);
3013 } else {
3014 PM_NODE_LENGTH_SET_LOCATION(node, end);
3015 }
3016
3017 node->receiver = receiver;
3018 node->call_operator_loc = TOK2LOC(parser, operator);
3019 node->message_loc = TOK2LOC(parser, message);
3020 node->opening_loc = arguments->opening_loc;
3021 node->arguments = arguments->arguments;
3022 node->closing_loc = arguments->closing_loc;
3023 node->block = arguments->block;
3024
3025 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
3026 pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
3027 }
3028
3033 node->name = pm_parser_constant_id_raw(parser, message->start, parse_operator_symbol_name(message));
3034 return node;
3035}
3036
3040static pm_call_node_t *
3041pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
3042 pm_call_node_t *node = pm_call_node_create(parser, 0);
3043 node->base.location = (pm_location_t) { .start = 0, .length = U32(parser->end - parser->start) };
3044
3045 node->receiver = receiver;
3046 node->arguments = arguments;
3047
3048 node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
3049 return node;
3050}
3051
3056static pm_call_node_t *
3057pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
3058 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
3059
3060 PM_NODE_START_SET_TOKEN(parser, node, message);
3061 const pm_location_t *end = pm_arguments_end(arguments);
3062 assert(end != NULL && "unreachable");
3063 PM_NODE_LENGTH_SET_LOCATION(node, end);
3064
3065 node->message_loc = TOK2LOC(parser, message);
3066 node->opening_loc = arguments->opening_loc;
3067 node->arguments = arguments->arguments;
3068 node->closing_loc = arguments->closing_loc;
3069 node->block = arguments->block;
3070
3071 node->name = pm_parser_constant_id_token(parser, message);
3072 return node;
3073}
3074
3079static pm_call_node_t *
3080pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
3081 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
3082
3083 node->base.location = (pm_location_t) { 0 };
3084 node->arguments = arguments;
3085
3086 node->name = name;
3087 return node;
3088}
3089
3093static pm_call_node_t *
3094pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
3095 pm_assert_value_expression(parser, receiver);
3096 if (receiver != NULL) pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
3097
3098 pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
3099
3100 PM_NODE_START_SET_TOKEN(parser, node, message);
3101 if (arguments->closing_loc.length > 0) {
3102 PM_NODE_LENGTH_SET_LOCATION(node, &arguments->closing_loc);
3103 } else {
3104 assert(receiver != NULL);
3105 PM_NODE_LENGTH_SET_NODE(node, receiver);
3106 }
3107
3108 node->receiver = receiver;
3109 node->message_loc = TOK2LOC(parser, message);
3110 node->opening_loc = arguments->opening_loc;
3111 node->arguments = arguments->arguments;
3112 node->closing_loc = arguments->closing_loc;
3113
3114 node->name = pm_parser_constant_id_constant(parser, "!", 1);
3115 return node;
3116}
3117
3121static pm_call_node_t *
3122pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
3123 pm_assert_value_expression(parser, receiver);
3124
3125 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
3126
3127 PM_NODE_START_SET_NODE(node, receiver);
3128 const pm_location_t *end = pm_arguments_end(arguments);
3129 assert(end != NULL && "unreachable");
3130 PM_NODE_LENGTH_SET_LOCATION(node, end);
3131
3132 node->receiver = receiver;
3133 node->call_operator_loc = TOK2LOC(parser, operator);
3134 node->opening_loc = arguments->opening_loc;
3135 node->arguments = arguments->arguments;
3136 node->closing_loc = arguments->closing_loc;
3137 node->block = arguments->block;
3138
3139 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
3140 pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
3141 }
3142
3143 node->name = pm_parser_constant_id_constant(parser, "call", 4);
3144 return node;
3145}
3146
3150static pm_call_node_t *
3151pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
3152 pm_assert_value_expression(parser, receiver);
3153
3154 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
3155
3156 PM_NODE_START_SET_TOKEN(parser, node, operator);
3157 PM_NODE_LENGTH_SET_NODE(node, receiver);
3158
3159 node->receiver = receiver;
3160 node->message_loc = TOK2LOC(parser, operator);
3161
3162 node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
3163 return node;
3164}
3165
3170static pm_call_node_t *
3171pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
3172 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
3173
3174 node->base.location = TOK2LOC(parser, message);
3175 node->message_loc = TOK2LOC(parser, message);
3176
3177 node->name = pm_parser_constant_id_token(parser, message);
3178 return node;
3179}
3180
3185static PRISM_INLINE bool
3186pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
3187 return (
3188 (node->message_loc.length > 0) &&
3189 (parser->start[node->message_loc.start + node->message_loc.length - 1] != '!') &&
3190 (parser->start[node->message_loc.start + node->message_loc.length - 1] != '?') &&
3191 char_is_identifier_start(parser, parser->start + node->message_loc.start, (ptrdiff_t) node->message_loc.length) &&
3192 (node->opening_loc.length == 0) &&
3193 (node->arguments == NULL) &&
3194 (node->block == NULL)
3195 );
3196}
3197
3201static void
3202pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
3203 pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
3204
3205 if (write_constant->length > 0) {
3206 size_t length = write_constant->length - 1;
3207
3208 uint8_t *memory = (uint8_t *) pm_arena_alloc(parser->arena, length, 1);
3209 memcpy(memory, write_constant->start, length);
3210
3211 *read_name = pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, memory, length);
3212 } else {
3213 // We can get here if the message was missing because of a syntax error.
3214 *read_name = pm_parser_constant_id_constant(parser, "", 0);
3215 }
3216}
3217
3221static pm_call_and_write_node_t *
3222pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3223 assert(target->block == NULL);
3224 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3225
3226 pm_call_and_write_node_t *node = pm_call_and_write_node_new(
3227 parser->arena,
3228 ++parser->node_id,
3229 FL(target),
3230 PM_LOCATION_INIT_NODES(target, value),
3231 target->receiver,
3232 target->call_operator_loc,
3233 target->message_loc,
3234 0,
3235 target->name,
3236 TOK2LOC(parser, operator),
3237 value
3238 );
3239
3240 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3241
3242 // The target is no longer necessary because we've reused its children.
3243 // It is arena-allocated so no explicit free is needed.
3244
3245 return node;
3246}
3247
3252static void
3253pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
3254 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
3255 if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
3256 pm_node_t *node;
3257 PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
3258 if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
3259 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
3260 break;
3261 }
3262 }
3263 }
3264
3265 if (block != NULL) {
3266 pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
3267 }
3268 }
3269}
3270
3274static pm_index_and_write_node_t *
3275pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3276 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3277
3278 pm_index_arguments_check(parser, target->arguments, target->block);
3279
3280 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3281
3282 pm_index_and_write_node_t *node = pm_index_and_write_node_new(
3283 parser->arena,
3284 ++parser->node_id,
3285 FL(target),
3286 PM_LOCATION_INIT_NODES(target, value),
3287 target->receiver,
3288 target->call_operator_loc,
3289 target->opening_loc,
3290 target->arguments,
3291 target->closing_loc,
3292 (pm_block_argument_node_t *) target->block,
3293 TOK2LOC(parser, operator),
3294 value
3295 );
3296
3297 // The target is no longer necessary because we've reused its children.
3298 // It is arena-allocated so no explicit free is needed.
3299
3300 return node;
3301}
3302
3306static pm_call_operator_write_node_t *
3307pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3308 assert(target->block == NULL);
3309
3310 pm_call_operator_write_node_t *node = pm_call_operator_write_node_new(
3311 parser->arena,
3312 ++parser->node_id,
3313 FL(target),
3314 PM_LOCATION_INIT_NODES(target, value),
3315 target->receiver,
3316 target->call_operator_loc,
3317 target->message_loc,
3318 0,
3319 target->name,
3320 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
3321 TOK2LOC(parser, operator),
3322 value
3323 );
3324
3325 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3326
3327 // The target is no longer necessary because we've reused its children.
3328 // It is arena-allocated so no explicit free is needed.
3329
3330 return node;
3331}
3332
3336static pm_index_operator_write_node_t *
3337pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3338 pm_index_arguments_check(parser, target->arguments, target->block);
3339
3340 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3341
3342 pm_index_operator_write_node_t *node = pm_index_operator_write_node_new(
3343 parser->arena,
3344 ++parser->node_id,
3345 FL(target),
3346 PM_LOCATION_INIT_NODES(target, value),
3347 target->receiver,
3348 target->call_operator_loc,
3349 target->opening_loc,
3350 target->arguments,
3351 target->closing_loc,
3352 (pm_block_argument_node_t *) target->block,
3353 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
3354 TOK2LOC(parser, operator),
3355 value
3356 );
3357
3358 // The target is no longer necessary because we've reused its children.
3359 // It is arena-allocated so no explicit free is needed.
3360
3361 return node;
3362}
3363
3367static pm_call_or_write_node_t *
3368pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3369 assert(target->block == NULL);
3370 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3371
3372 pm_call_or_write_node_t *node = pm_call_or_write_node_new(
3373 parser->arena,
3374 ++parser->node_id,
3375 FL(target),
3376 PM_LOCATION_INIT_NODES(target, value),
3377 target->receiver,
3378 target->call_operator_loc,
3379 target->message_loc,
3380 0,
3381 target->name,
3382 TOK2LOC(parser, operator),
3383 value
3384 );
3385
3386 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3387
3388 // The target is no longer necessary because we've reused its children.
3389 // It is arena-allocated so no explicit free is needed.
3390
3391 return node;
3392}
3393
3397static pm_index_or_write_node_t *
3398pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3399 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3400
3401 pm_index_arguments_check(parser, target->arguments, target->block);
3402
3403 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3404
3405 pm_index_or_write_node_t *node = pm_index_or_write_node_new(
3406 parser->arena,
3407 ++parser->node_id,
3408 FL(target),
3409 PM_LOCATION_INIT_NODES(target, value),
3410 target->receiver,
3411 target->call_operator_loc,
3412 target->opening_loc,
3413 target->arguments,
3414 target->closing_loc,
3415 (pm_block_argument_node_t *) target->block,
3416 TOK2LOC(parser, operator),
3417 value
3418 );
3419
3420 // The target is no longer necessary because we've reused its children.
3421 // It is arena-allocated so no explicit free is needed.
3422
3423 return node;
3424}
3425
3430static pm_call_target_node_t *
3431pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3432 pm_call_target_node_t *node = pm_call_target_node_new(
3433 parser->arena,
3434 ++parser->node_id,
3435 FL(target),
3436 PM_LOCATION_INIT_NODE(target),
3437 target->receiver,
3438 target->call_operator_loc,
3439 target->name,
3440 target->message_loc
3441 );
3442
3443 /* It is possible to get here where we have parsed an invalid syntax tree
3444 * where the call operator was not present. In that case we will have a
3445 * problem because it is a required location. In this case we need to fill
3446 * it in with a fake location so that the syntax tree remains valid. */
3447 if (node->call_operator_loc.length == 0) {
3448 node->call_operator_loc = target->base.location;
3449 }
3450
3451 // The target is no longer necessary because we've reused its children.
3452 // It is arena-allocated so no explicit free is needed.
3453
3454 return node;
3455}
3456
3461static pm_index_target_node_t *
3462pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3463 pm_index_arguments_check(parser, target->arguments, target->block);
3464 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3465
3466 pm_index_target_node_t *node = pm_index_target_node_new(
3467 parser->arena,
3468 ++parser->node_id,
3469 FL(target) | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
3470 PM_LOCATION_INIT_NODE(target),
3471 target->receiver,
3472 target->opening_loc,
3473 target->arguments,
3474 target->closing_loc,
3475 (pm_block_argument_node_t *) target->block
3476 );
3477
3478 // The target is no longer necessary because we've reused its children.
3479 // It is arena-allocated so no explicit free is needed.
3480
3481 return node;
3482}
3483
3487static pm_capture_pattern_node_t *
3488pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
3489 return pm_capture_pattern_node_new(
3490 parser->arena,
3491 ++parser->node_id,
3492 0,
3493 PM_LOCATION_INIT_NODES(value, target),
3494 value,
3495 target,
3496 TOK2LOC(parser, operator)
3497 );
3498}
3499
3503static pm_case_node_t *
3504pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3505 return pm_case_node_new(
3506 parser->arena,
3507 ++parser->node_id,
3508 0,
3509 PM_LOCATION_INIT_TOKENS(parser, case_keyword, end_keyword == NULL ? case_keyword : end_keyword),
3510 predicate,
3511 ((pm_node_list_t) { 0 }),
3512 NULL,
3513 TOK2LOC(parser, case_keyword),
3514 NTOK2LOC(parser, end_keyword)
3515 );
3516}
3517
3521static void
3522pm_case_node_condition_append(pm_arena_t *arena, pm_case_node_t *node, pm_node_t *condition) {
3523 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
3524
3525 pm_node_list_append(arena, &node->conditions, condition);
3526 PM_NODE_LENGTH_SET_NODE(node, condition);
3527}
3528
3532static void
3533pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
3534 node->else_clause = else_clause;
3535 PM_NODE_LENGTH_SET_NODE(node, else_clause);
3536}
3537
3541static void
3542pm_case_node_end_keyword_loc_set(const pm_parser_t *parser, pm_case_node_t *node, const pm_token_t *end_keyword) {
3543 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
3544 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
3545}
3546
3550static pm_case_match_node_t *
3551pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate) {
3552 return pm_case_match_node_new(
3553 parser->arena,
3554 ++parser->node_id,
3555 0,
3556 PM_LOCATION_INIT_TOKEN(parser, case_keyword),
3557 predicate,
3558 ((pm_node_list_t) { 0 }),
3559 NULL,
3560 TOK2LOC(parser, case_keyword),
3561 ((pm_location_t) { 0 })
3562 );
3563}
3564
3568static void
3569pm_case_match_node_condition_append(pm_arena_t *arena, pm_case_match_node_t *node, pm_node_t *condition) {
3570 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
3571
3572 pm_node_list_append(arena, &node->conditions, condition);
3573 PM_NODE_LENGTH_SET_NODE(node, condition);
3574}
3575
3579static void
3580pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
3581 node->else_clause = else_clause;
3582 PM_NODE_LENGTH_SET_NODE(node, else_clause);
3583}
3584
3588static void
3589pm_case_match_node_end_keyword_loc_set(const pm_parser_t *parser, pm_case_match_node_t *node, const pm_token_t *end_keyword) {
3590 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
3591 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
3592}
3593
3597static pm_class_node_t *
3598pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
3599 return pm_class_node_new(
3600 parser->arena,
3601 ++parser->node_id,
3602 0,
3603 PM_LOCATION_INIT_TOKENS(parser, class_keyword, end_keyword),
3604 *locals,
3605 TOK2LOC(parser, class_keyword),
3606 constant_path,
3607 NTOK2LOC(parser, inheritance_operator),
3608 superclass,
3609 body,
3610 TOK2LOC(parser, end_keyword),
3611 pm_parser_constant_id_token(parser, name)
3612 );
3613}
3614
3618static pm_class_variable_and_write_node_t *
3619pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3620 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3621
3622 return pm_class_variable_and_write_node_new(
3623 parser->arena,
3624 ++parser->node_id,
3625 0,
3626 PM_LOCATION_INIT_NODES(target, value),
3627 target->name,
3628 target->base.location,
3629 TOK2LOC(parser, operator),
3630 value
3631 );
3632}
3633
3637static pm_class_variable_operator_write_node_t *
3638pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3639 return pm_class_variable_operator_write_node_new(
3640 parser->arena,
3641 ++parser->node_id,
3642 0,
3643 PM_LOCATION_INIT_NODES(target, value),
3644 target->name,
3645 target->base.location,
3646 TOK2LOC(parser, operator),
3647 value,
3648 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
3649 );
3650}
3651
3655static pm_class_variable_or_write_node_t *
3656pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3657 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3658
3659 return pm_class_variable_or_write_node_new(
3660 parser->arena,
3661 ++parser->node_id,
3662 0,
3663 PM_LOCATION_INIT_NODES(target, value),
3664 target->name,
3665 target->base.location,
3666 TOK2LOC(parser, operator),
3667 value
3668 );
3669}
3670
3674static pm_class_variable_read_node_t *
3675pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
3676 assert(token->type == PM_TOKEN_CLASS_VARIABLE);
3677
3678 return pm_class_variable_read_node_new(
3679 parser->arena,
3680 ++parser->node_id,
3681 0,
3682 PM_LOCATION_INIT_TOKEN(parser, token),
3683 pm_parser_constant_id_token(parser, token)
3684 );
3685}
3686
3693static PRISM_INLINE pm_node_flags_t
3694pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
3695 if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.length == 0) {
3696 return flags;
3697 }
3698 return 0;
3699}
3700
3704static pm_class_variable_write_node_t *
3705pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
3706 return pm_class_variable_write_node_new(
3707 parser->arena,
3708 ++parser->node_id,
3709 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3710 PM_LOCATION_INIT_NODES(read_node, value),
3711 read_node->name,
3712 read_node->base.location,
3713 value,
3714 TOK2LOC(parser, operator)
3715 );
3716}
3717
3721static pm_constant_path_and_write_node_t *
3722pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3723 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3724
3725 return pm_constant_path_and_write_node_new(
3726 parser->arena,
3727 ++parser->node_id,
3728 0,
3729 PM_LOCATION_INIT_NODES(target, value),
3730 target,
3731 TOK2LOC(parser, operator),
3732 value
3733 );
3734}
3735
3739static pm_constant_path_operator_write_node_t *
3740pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3741 return pm_constant_path_operator_write_node_new(
3742 parser->arena,
3743 ++parser->node_id,
3744 0,
3745 PM_LOCATION_INIT_NODES(target, value),
3746 target,
3747 TOK2LOC(parser, operator),
3748 value,
3749 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
3750 );
3751}
3752
3756static pm_constant_path_or_write_node_t *
3757pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3758 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3759
3760 return pm_constant_path_or_write_node_new(
3761 parser->arena,
3762 ++parser->node_id,
3763 0,
3764 PM_LOCATION_INIT_NODES(target, value),
3765 target,
3766 TOK2LOC(parser, operator),
3767 value
3768 );
3769}
3770
3774static pm_constant_path_node_t *
3775pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3776 pm_assert_value_expression(parser, parent);
3777
3778 pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3779 if (name_token->type == PM_TOKEN_CONSTANT) {
3780 name = pm_parser_constant_id_token(parser, name_token);
3781 }
3782
3783 return pm_constant_path_node_new(
3784 parser->arena,
3785 ++parser->node_id,
3786 0,
3787 (parent == NULL) ? PM_LOCATION_INIT_TOKENS(parser, delimiter, name_token) : PM_LOCATION_INIT_NODE_TOKEN(parser, parent, name_token),
3788 parent,
3789 name,
3790 TOK2LOC(parser, delimiter),
3791 TOK2LOC(parser, name_token)
3792 );
3793}
3794
3798static pm_constant_path_write_node_t *
3799pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3800 return pm_constant_path_write_node_new(
3801 parser->arena,
3802 ++parser->node_id,
3803 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3804 PM_LOCATION_INIT_NODES(target, value),
3805 target,
3806 TOK2LOC(parser, operator),
3807 value
3808 );
3809}
3810
3814static pm_constant_and_write_node_t *
3815pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3816 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3817
3818 return pm_constant_and_write_node_new(
3819 parser->arena,
3820 ++parser->node_id,
3821 0,
3822 PM_LOCATION_INIT_NODES(target, value),
3823 target->name,
3824 target->base.location,
3825 TOK2LOC(parser, operator),
3826 value
3827 );
3828}
3829
3833static pm_constant_operator_write_node_t *
3834pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3835 return pm_constant_operator_write_node_new(
3836 parser->arena,
3837 ++parser->node_id,
3838 0,
3839 PM_LOCATION_INIT_NODES(target, value),
3840 target->name,
3841 target->base.location,
3842 TOK2LOC(parser, operator),
3843 value,
3844 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
3845 );
3846}
3847
3851static pm_constant_or_write_node_t *
3852pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3853 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3854
3855 return pm_constant_or_write_node_new(
3856 parser->arena,
3857 ++parser->node_id,
3858 0,
3859 PM_LOCATION_INIT_NODES(target, value),
3860 target->name,
3861 target->base.location,
3862 TOK2LOC(parser, operator),
3863 value
3864 );
3865}
3866
3870static pm_constant_read_node_t *
3871pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
3872 assert(name->type == PM_TOKEN_CONSTANT || name->type == 0);
3873
3874 return pm_constant_read_node_new(
3875 parser->arena,
3876 ++parser->node_id,
3877 0,
3878 PM_LOCATION_INIT_TOKEN(parser, name),
3879 pm_parser_constant_id_token(parser, name)
3880 );
3881}
3882
3886static pm_constant_write_node_t *
3887pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3888 return pm_constant_write_node_new(
3889 parser->arena,
3890 ++parser->node_id,
3891 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3892 PM_LOCATION_INIT_NODES(target, value),
3893 target->name,
3894 target->base.location,
3895 value,
3896 TOK2LOC(parser, operator)
3897 );
3898}
3899
3903static void
3904pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3905 switch (PM_NODE_TYPE(node)) {
3906 case PM_BEGIN_NODE: {
3907 const pm_begin_node_t *cast = (pm_begin_node_t *) node;
3908 if (cast->statements != NULL) pm_def_node_receiver_check(parser, UP(cast->statements));
3909 break;
3910 }
3911 case PM_PARENTHESES_NODE: {
3912 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
3913 if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
3914 break;
3915 }
3916 case PM_STATEMENTS_NODE: {
3917 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
3918 pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
3919 break;
3920 }
3921 case PM_ARRAY_NODE:
3922 case PM_FLOAT_NODE:
3923 case PM_IMAGINARY_NODE:
3924 case PM_INTEGER_NODE:
3925 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
3926 case PM_INTERPOLATED_STRING_NODE:
3927 case PM_INTERPOLATED_SYMBOL_NODE:
3928 case PM_INTERPOLATED_X_STRING_NODE:
3929 case PM_RATIONAL_NODE:
3930 case PM_REGULAR_EXPRESSION_NODE:
3931 case PM_SOURCE_ENCODING_NODE:
3932 case PM_SOURCE_FILE_NODE:
3933 case PM_SOURCE_LINE_NODE:
3934 case PM_STRING_NODE:
3935 case PM_SYMBOL_NODE:
3936 case PM_X_STRING_NODE:
3937 pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
3938 break;
3939 default:
3940 break;
3941 }
3942}
3943
3947static pm_def_node_t *
3948pm_def_node_create(
3949 pm_parser_t *parser,
3950 pm_constant_id_t name,
3951 const pm_token_t *name_loc,
3952 pm_node_t *receiver,
3953 pm_parameters_node_t *parameters,
3954 pm_node_t *body,
3955 pm_constant_id_list_t *locals,
3956 const pm_token_t *def_keyword,
3957 const pm_token_t *operator,
3958 const pm_token_t *lparen,
3959 const pm_token_t *rparen,
3960 const pm_token_t *equal,
3961 const pm_token_t *end_keyword
3962) {
3963 if (receiver != NULL) {
3964 pm_def_node_receiver_check(parser, receiver);
3965 }
3966
3967 return pm_def_node_new(
3968 parser->arena,
3969 ++parser->node_id,
3970 0,
3971 (end_keyword == NULL) ? PM_LOCATION_INIT_TOKEN_NODE(parser, def_keyword, body) : PM_LOCATION_INIT_TOKENS(parser, def_keyword, end_keyword),
3972 name,
3973 TOK2LOC(parser, name_loc),
3974 receiver,
3975 parameters,
3976 body,
3977 *locals,
3978 TOK2LOC(parser, def_keyword),
3979 NTOK2LOC(parser, operator),
3980 NTOK2LOC(parser, lparen),
3981 NTOK2LOC(parser, rparen),
3982 NTOK2LOC(parser, equal),
3983 NTOK2LOC(parser, end_keyword)
3984 );
3985}
3986
3990static pm_defined_node_t *
3991pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_token_t *keyword) {
3992 return pm_defined_node_new(
3993 parser->arena,
3994 ++parser->node_id,
3995 0,
3996 (rparen == NULL) ? PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, value) : PM_LOCATION_INIT_TOKENS(parser, keyword, rparen),
3997 NTOK2LOC(parser, lparen),
3998 value,
3999 NTOK2LOC(parser, rparen),
4000 TOK2LOC(parser, keyword)
4001 );
4002}
4003
4007static pm_else_node_t *
4008pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
4009 return pm_else_node_new(
4010 parser->arena,
4011 ++parser->node_id,
4012 0,
4013 ((end_keyword == NULL) && (statements != NULL)) ? PM_LOCATION_INIT_TOKEN_NODE(parser, else_keyword, statements) : PM_LOCATION_INIT_TOKENS(parser, else_keyword, end_keyword),
4014 TOK2LOC(parser, else_keyword),
4015 statements,
4016 NTOK2LOC(parser, end_keyword)
4017 );
4018}
4019
4023static pm_embedded_statements_node_t *
4024pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
4025 return pm_embedded_statements_node_new(
4026 parser->arena,
4027 ++parser->node_id,
4028 0,
4029 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
4030 TOK2LOC(parser, opening),
4031 statements,
4032 TOK2LOC(parser, closing)
4033 );
4034}
4035
4039static pm_embedded_variable_node_t *
4040pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
4041 return pm_embedded_variable_node_new(
4042 parser->arena,
4043 ++parser->node_id,
4044 0,
4045 PM_LOCATION_INIT_TOKEN_NODE(parser, operator, variable),
4046 TOK2LOC(parser, operator),
4047 variable
4048 );
4049}
4050
4054static pm_ensure_node_t *
4055pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
4056 return pm_ensure_node_new(
4057 parser->arena,
4058 ++parser->node_id,
4059 0,
4060 PM_LOCATION_INIT_TOKENS(parser, ensure_keyword, end_keyword),
4061 TOK2LOC(parser, ensure_keyword),
4062 statements,
4063 TOK2LOC(parser, end_keyword)
4064 );
4065}
4066
4070static pm_false_node_t *
4071pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
4072 assert(token->type == PM_TOKEN_KEYWORD_FALSE);
4073
4074 return pm_false_node_new(
4075 parser->arena,
4076 ++parser->node_id,
4077 PM_NODE_FLAG_STATIC_LITERAL,
4078 PM_LOCATION_INIT_TOKEN(parser, token)
4079 );
4080}
4081
4086static pm_find_pattern_node_t *
4087pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
4088 assert(nodes->size >= 2);
4089 pm_node_t *left = nodes->nodes[0];
4090 pm_node_t *right = nodes->nodes[nodes->size - 1];
4091
4092 assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
4093 assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
4094
4095 pm_find_pattern_node_t *node = pm_find_pattern_node_new(
4096 parser->arena,
4097 ++parser->node_id,
4098 0,
4099 PM_LOCATION_INIT_NODES(left, right),
4100 NULL,
4101 (pm_splat_node_t *) left,
4102 ((pm_node_list_t) { 0 }),
4103 (pm_splat_node_t *) right,
4104 ((pm_location_t) { 0 }),
4105 ((pm_location_t) { 0 })
4106 );
4107
4108 // For now we're going to just copy over each pointer manually. This could be
4109 // much more efficient, as we could instead resize the node list to only point
4110 // to 1...-1.
4111 for (size_t index = 1; index < nodes->size - 1; index++) {
4112 pm_node_list_append(parser->arena, &node->requireds, nodes->nodes[index]);
4113 }
4114
4115 return node;
4116}
4117
4122static double
4123pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
4124 ptrdiff_t diff = token->end - token->start;
4125 if (diff <= 0) return 0.0;
4126
4127 // First, get a buffer of the content.
4128 size_t length = (size_t) diff;
4129 const size_t buffer_size = sizeof(char) * (length + 1);
4130 char *buffer = xmalloc(buffer_size);
4131 memcpy((void *) buffer, token->start, length);
4132
4133 // Next, determine if we need to replace the decimal point because of
4134 // locale-specific options, and then normalize them if we have to.
4135 char decimal_point = *localeconv()->decimal_point;
4136 if (decimal_point != '.') {
4137 for (size_t index = 0; index < length; index++) {
4138 if (buffer[index] == '.') buffer[index] = decimal_point;
4139 }
4140 }
4141
4142 // Next, handle underscores by removing them from the buffer.
4143 for (size_t index = 0; index < length; index++) {
4144 if (buffer[index] == '_') {
4145 memmove((void *) (buffer + index), (void *) (buffer + index + 1), length - index);
4146 length--;
4147 }
4148 }
4149
4150 // Null-terminate the buffer so that strtod cannot read off the end.
4151 buffer[length] = '\0';
4152
4153 // Now, call strtod to parse the value. Note that CRuby has their own
4154 // version of strtod which avoids locales. We're okay using the locale-aware
4155 // version because we've already validated through the parser that the token
4156 // is in a valid format.
4157 errno = 0;
4158 char *eptr;
4159 double value = strtod(buffer, &eptr);
4160
4161 // This should never happen, because we've already checked that the token
4162 // is in a valid format. However it's good to be safe.
4163 if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
4164 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, PM_ERR_FLOAT_PARSE);
4165 xfree_sized(buffer, buffer_size);
4166 return 0.0;
4167 }
4168
4169 // If errno is set, then it should only be ERANGE. At this point we need to
4170 // check if it's infinity (it should be).
4171 if (errno == ERANGE && PRISM_ISINF(value)) {
4172 int warn_width;
4173 const char *ellipsis;
4174
4175 if (length > 20) {
4176 warn_width = 20;
4177 ellipsis = "...";
4178 } else {
4179 warn_width = (int) length;
4180 ellipsis = "";
4181 }
4182
4183 pm_diagnostic_list_append_format(&parser->metadata_arena, &parser->warning_list, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
4184 value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
4185 }
4186
4187 // Finally we can free the buffer and return the value.
4188 xfree_sized(buffer, buffer_size);
4189 return value;
4190}
4191
4195static pm_float_node_t *
4196pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
4197 assert(token->type == PM_TOKEN_FLOAT);
4198
4199 return pm_float_node_new(
4200 parser->arena,
4201 ++parser->node_id,
4202 PM_NODE_FLAG_STATIC_LITERAL,
4203 PM_LOCATION_INIT_TOKEN(parser, token),
4204 pm_double_parse(parser, token)
4205 );
4206}
4207
4211static pm_imaginary_node_t *
4212pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4213 assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
4214
4215 return pm_imaginary_node_new(
4216 parser->arena,
4217 ++parser->node_id,
4218 PM_NODE_FLAG_STATIC_LITERAL,
4219 PM_LOCATION_INIT_TOKEN(parser, token),
4220 UP(pm_float_node_create(parser, &((pm_token_t) {
4221 .type = PM_TOKEN_FLOAT,
4222 .start = token->start,
4223 .end = token->end - 1
4224 })))
4225 );
4226}
4227
4231static pm_rational_node_t *
4232pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
4233 assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
4234
4235 pm_rational_node_t *node = pm_rational_node_new(
4236 parser->arena,
4237 ++parser->node_id,
4238 PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
4239 PM_LOCATION_INIT_TOKEN(parser, token),
4240 ((pm_integer_t) { 0 }),
4241 ((pm_integer_t) { 0 })
4242 );
4243
4244 const uint8_t *start = token->start;
4245 const uint8_t *end = token->end - 1; // r
4246
4247 while (start < end && *start == '0') start++; // 0.1 -> .1
4248 while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
4249
4250 size_t length = (size_t) (end - start);
4251 if (length == 1) {
4252 node->denominator.value = 1;
4253 return node;
4254 }
4255
4256 const uint8_t *point = memchr(start, '.', length);
4257 assert(point && "should have a decimal point");
4258
4259 uint8_t *digits = xmalloc(length);
4260 if (digits == NULL) {
4261 fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
4262 abort();
4263 }
4264
4265 memcpy(digits, start, (unsigned long) (point - start));
4266 memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
4267 pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
4268
4269 size_t fract_length = 0;
4270 for (const uint8_t *fract = point; fract < end; ++fract) {
4271 if (*fract != '_') ++fract_length;
4272 }
4273 digits[0] = '1';
4274 if (fract_length > 1) memset(digits + 1, '0', fract_length - 1);
4275 pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + fract_length);
4276 xfree_sized(digits, length);
4277
4278 pm_integers_reduce(&node->numerator, &node->denominator);
4279 pm_integer_arena_move(parser->arena, &node->numerator);
4280 pm_integer_arena_move(parser->arena, &node->denominator);
4281 return node;
4282}
4283
4288static pm_imaginary_node_t *
4289pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4290 assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
4291
4292 return pm_imaginary_node_new(
4293 parser->arena,
4294 ++parser->node_id,
4295 PM_NODE_FLAG_STATIC_LITERAL,
4296 PM_LOCATION_INIT_TOKEN(parser, token),
4297 UP(pm_float_node_rational_create(parser, &((pm_token_t) {
4298 .type = PM_TOKEN_FLOAT_RATIONAL,
4299 .start = token->start,
4300 .end = token->end - 1
4301 })))
4302 );
4303}
4304
4308static pm_for_node_t *
4309pm_for_node_create(
4310 pm_parser_t *parser,
4311 pm_node_t *index,
4312 pm_node_t *collection,
4313 pm_statements_node_t *statements,
4314 const pm_token_t *for_keyword,
4315 const pm_token_t *in_keyword,
4316 const pm_token_t *do_keyword,
4317 const pm_token_t *end_keyword
4318) {
4319 return pm_for_node_new(
4320 parser->arena,
4321 ++parser->node_id,
4322 0,
4323 PM_LOCATION_INIT_TOKENS(parser, for_keyword, end_keyword),
4324 index,
4325 collection,
4326 statements,
4327 TOK2LOC(parser, for_keyword),
4328 TOK2LOC(parser, in_keyword),
4329 NTOK2LOC(parser, do_keyword),
4330 TOK2LOC(parser, end_keyword)
4331 );
4332}
4333
4337static pm_forwarding_arguments_node_t *
4338pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
4339 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4340
4341 return pm_forwarding_arguments_node_new(
4342 parser->arena,
4343 ++parser->node_id,
4344 0,
4345 PM_LOCATION_INIT_TOKEN(parser, token)
4346 );
4347}
4348
4352static pm_forwarding_parameter_node_t *
4353pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
4354 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4355
4356 return pm_forwarding_parameter_node_new(
4357 parser->arena,
4358 ++parser->node_id,
4359 0,
4360 PM_LOCATION_INIT_TOKEN(parser, token)
4361 );
4362}
4363
4367static pm_forwarding_super_node_t *
4368pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
4369 assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
4370 assert(token->type == PM_TOKEN_KEYWORD_SUPER);
4371
4372 pm_block_node_t *block = NULL;
4373 if (arguments->block != NULL) {
4374 block = (pm_block_node_t *) arguments->block;
4375 }
4376
4377 return pm_forwarding_super_node_new(
4378 parser->arena,
4379 ++parser->node_id,
4380 0,
4381 (block == NULL) ? PM_LOCATION_INIT_TOKEN(parser, token) : PM_LOCATION_INIT_TOKEN_NODE(parser, token, block),
4382 PM_LOCATION_INIT_TOKEN(parser, token),
4383 block
4384 );
4385}
4386
4391static pm_hash_pattern_node_t *
4392pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4393 return pm_hash_pattern_node_new(
4394 parser->arena,
4395 ++parser->node_id,
4396 0,
4397 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
4398 NULL,
4399 ((pm_node_list_t) { 0 }),
4400 NULL,
4401 TOK2LOC(parser, opening),
4402 TOK2LOC(parser, closing)
4403 );
4404}
4405
4409static pm_hash_pattern_node_t *
4410pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
4411 uint32_t start;
4412 uint32_t end;
4413
4414 if (elements->size > 0) {
4415 if (rest) {
4416 start = MIN(PM_NODE_START(rest), PM_NODE_START(elements->nodes[0]));
4417 end = MAX(PM_NODE_END(rest), PM_NODE_END(elements->nodes[elements->size - 1]));
4418 } else {
4419 start = PM_NODE_START(elements->nodes[0]);
4420 end = PM_NODE_END(elements->nodes[elements->size - 1]);
4421 }
4422 } else {
4423 assert(rest != NULL);
4424 start = PM_NODE_START(rest);
4425 end = PM_NODE_END(rest);
4426 }
4427
4428 pm_hash_pattern_node_t *node = pm_hash_pattern_node_new(
4429 parser->arena,
4430 ++parser->node_id,
4431 0,
4432 ((pm_location_t) { .start = start, .length = U32(end - start) }),
4433 NULL,
4434 ((pm_node_list_t) { 0 }),
4435 rest,
4436 ((pm_location_t) { 0 }),
4437 ((pm_location_t) { 0 })
4438 );
4439
4440 pm_node_list_concat(parser->arena, &node->elements, elements);
4441 return node;
4442}
4443
4447static pm_constant_id_t
4448pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
4449 switch (PM_NODE_TYPE(target)) {
4450 case PM_GLOBAL_VARIABLE_READ_NODE:
4451 return ((pm_global_variable_read_node_t *) target)->name;
4452 case PM_BACK_REFERENCE_READ_NODE:
4453 return ((pm_back_reference_read_node_t *) target)->name;
4454 case PM_NUMBERED_REFERENCE_READ_NODE:
4455 // This will only ever happen in the event of a syntax error, but we
4456 // still need to provide something for the node.
4457 return pm_parser_constant_id_raw(parser, parser->start + PM_NODE_START(target), parser->start + PM_NODE_END(target));
4458 default:
4459 assert(false && "unreachable");
4460 return (pm_constant_id_t) -1;
4461 }
4462}
4463
4467static pm_global_variable_and_write_node_t *
4468pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4469 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4470
4471 return pm_global_variable_and_write_node_new(
4472 parser->arena,
4473 ++parser->node_id,
4474 0,
4475 PM_LOCATION_INIT_NODES(target, value),
4476 pm_global_variable_write_name(parser, target),
4477 target->location,
4478 TOK2LOC(parser, operator),
4479 value
4480 );
4481}
4482
4486static pm_global_variable_operator_write_node_t *
4487pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4488 return pm_global_variable_operator_write_node_new(
4489 parser->arena,
4490 ++parser->node_id,
4491 0,
4492 PM_LOCATION_INIT_NODES(target, value),
4493 pm_global_variable_write_name(parser, target),
4494 target->location,
4495 TOK2LOC(parser, operator),
4496 value,
4497 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
4498 );
4499}
4500
4504static pm_global_variable_or_write_node_t *
4505pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4506 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4507
4508 return pm_global_variable_or_write_node_new(
4509 parser->arena,
4510 ++parser->node_id,
4511 0,
4512 PM_LOCATION_INIT_NODES(target, value),
4513 pm_global_variable_write_name(parser, target),
4514 target->location,
4515 TOK2LOC(parser, operator),
4516 value
4517 );
4518}
4519
4523static pm_global_variable_read_node_t *
4524pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4525 return pm_global_variable_read_node_new(
4526 parser->arena,
4527 ++parser->node_id,
4528 0,
4529 PM_LOCATION_INIT_TOKEN(parser, name),
4530 pm_parser_constant_id_token(parser, name)
4531 );
4532}
4533
4537static pm_global_variable_read_node_t *
4538pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
4539 return pm_global_variable_read_node_new(
4540 parser->arena,
4541 ++parser->node_id,
4542 0,
4543 PM_LOCATION_INIT_UNSET,
4544 name
4545 );
4546}
4547
4551static pm_global_variable_write_node_t *
4552pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4553 return pm_global_variable_write_node_new(
4554 parser->arena,
4555 ++parser->node_id,
4556 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
4557 PM_LOCATION_INIT_NODES(target, value),
4558 pm_global_variable_write_name(parser, target),
4559 target->location,
4560 value,
4561 TOK2LOC(parser, operator)
4562 );
4563}
4564
4568static pm_global_variable_write_node_t *
4569pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
4570 return pm_global_variable_write_node_new(
4571 parser->arena,
4572 ++parser->node_id,
4573 0,
4574 PM_LOCATION_INIT_UNSET,
4575 name,
4576 ((pm_location_t) { 0 }),
4577 value,
4578 ((pm_location_t) { 0 })
4579 );
4580}
4581
4585static pm_hash_node_t *
4586pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4587 assert(opening != NULL);
4588
4589 return pm_hash_node_new(
4590 parser->arena,
4591 ++parser->node_id,
4592 PM_NODE_FLAG_STATIC_LITERAL,
4593 PM_LOCATION_INIT_TOKEN(parser, opening),
4594 TOK2LOC(parser, opening),
4595 ((pm_node_list_t) { 0 }),
4596 ((pm_location_t) { 0 })
4597 );
4598}
4599
4603static PRISM_INLINE void
4604pm_hash_node_elements_append(pm_arena_t *arena, pm_hash_node_t *hash, pm_node_t *element) {
4605 pm_node_list_append(arena, &hash->elements, element);
4606
4607 bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
4608 if (static_literal) {
4609 pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
4610 static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
4611 static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
4612 static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
4613 }
4614
4615 if (!static_literal) {
4616 pm_node_flag_unset(UP(hash), PM_NODE_FLAG_STATIC_LITERAL);
4617 }
4618}
4619
4620static PRISM_INLINE void
4621pm_hash_node_closing_loc_set(const pm_parser_t *parser, pm_hash_node_t *hash, pm_token_t *token) {
4622 PM_NODE_LENGTH_SET_TOKEN(parser, hash, token);
4623 hash->closing_loc = TOK2LOC(parser, token);
4624}
4625
4629static pm_if_node_t *
4630pm_if_node_create(pm_parser_t *parser,
4631 const pm_token_t *if_keyword,
4632 pm_node_t *predicate,
4633 const pm_token_t *then_keyword,
4634 pm_statements_node_t *statements,
4635 pm_node_t *subsequent,
4636 const pm_token_t *end_keyword
4637) {
4638 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4639
4640 uint32_t start = PM_TOKEN_START(parser, if_keyword);
4641 uint32_t end;
4642
4643 if (end_keyword != NULL) {
4644 end = PM_TOKEN_END(parser, end_keyword);
4645 } else if (subsequent != NULL) {
4646 end = PM_NODE_END(subsequent);
4647 } else if (pm_statements_node_body_length(statements) != 0) {
4648 end = PM_NODE_END(statements);
4649 } else {
4650 end = PM_NODE_END(predicate);
4651 }
4652
4653 return pm_if_node_new(
4654 parser->arena,
4655 ++parser->node_id,
4656 PM_NODE_FLAG_NEWLINE,
4657 ((pm_location_t) { .start = start, .length = U32(end - start) }),
4658 TOK2LOC(parser, if_keyword),
4659 predicate,
4660 NTOK2LOC(parser, then_keyword),
4661 statements,
4662 subsequent,
4663 NTOK2LOC(parser, end_keyword)
4664 );
4665}
4666
4670static pm_if_node_t *
4671pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
4672 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4673
4674 pm_statements_node_t *statements = pm_statements_node_create(parser);
4675 pm_statements_node_body_append(parser, statements, statement, true);
4676
4677 return pm_if_node_new(
4678 parser->arena,
4679 ++parser->node_id,
4680 PM_NODE_FLAG_NEWLINE,
4681 PM_LOCATION_INIT_NODES(statement, predicate),
4682 TOK2LOC(parser, if_keyword),
4683 predicate,
4684 ((pm_location_t) { 0 }),
4685 statements,
4686 NULL,
4687 ((pm_location_t) { 0 })
4688 );
4689}
4690
4694static pm_if_node_t *
4695pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
4696 pm_assert_value_expression(parser, predicate);
4697 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4698
4699 pm_statements_node_t *if_statements = pm_statements_node_create(parser);
4700 pm_statements_node_body_append(parser, if_statements, true_expression, true);
4701
4702 pm_statements_node_t *else_statements = pm_statements_node_create(parser);
4703 pm_statements_node_body_append(parser, else_statements, false_expression, true);
4704
4705 pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, NULL);
4706 return pm_if_node_new(
4707 parser->arena,
4708 ++parser->node_id,
4709 PM_NODE_FLAG_NEWLINE,
4710 PM_LOCATION_INIT_NODES(predicate, false_expression),
4711 ((pm_location_t) { 0 }),
4712 predicate,
4713 TOK2LOC(parser, qmark),
4714 if_statements,
4715 UP(else_node),
4716 ((pm_location_t) { 0 })
4717 );
4718}
4719
4720static PRISM_INLINE void
4721pm_if_node_end_keyword_loc_set(const pm_parser_t *parser, pm_if_node_t *node, const pm_token_t *keyword) {
4722 PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword);
4723 node->end_keyword_loc = TOK2LOC(parser, keyword);
4724}
4725
4726static PRISM_INLINE void
4727pm_else_node_end_keyword_loc_set(const pm_parser_t *parser, pm_else_node_t *node, const pm_token_t *keyword) {
4728 PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword);
4729 node->end_keyword_loc = TOK2LOC(parser, keyword);
4730}
4731
4735static pm_implicit_node_t *
4736pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
4737 return pm_implicit_node_new(
4738 parser->arena,
4739 ++parser->node_id,
4740 0,
4741 PM_LOCATION_INIT_NODE(value),
4742 value
4743 );
4744}
4745
4749static pm_implicit_rest_node_t *
4750pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
4751 assert(token->type == PM_TOKEN_COMMA);
4752
4753 return pm_implicit_rest_node_new(
4754 parser->arena,
4755 ++parser->node_id,
4756 0,
4757 PM_LOCATION_INIT_TOKEN(parser, token)
4758 );
4759}
4760
4764static pm_integer_node_t *
4765pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4766 assert(token->type == PM_TOKEN_INTEGER);
4767
4768 pm_integer_node_t *node = pm_integer_node_new(
4769 parser->arena,
4770 ++parser->node_id,
4771 base | PM_NODE_FLAG_STATIC_LITERAL,
4772 PM_LOCATION_INIT_TOKEN(parser, token),
4773 ((pm_integer_t) { 0 })
4774 );
4775
4776 if (parser->integer.lexed) {
4777 // The value was already computed during lexing.
4778 node->value.value = parser->integer.value;
4779 parser->integer.lexed = false;
4780 } else {
4781 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4782 switch (base) {
4783 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4784 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4785 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4786 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4787 default: assert(false && "unreachable"); break;
4788 }
4789
4790 pm_integer_parse(&node->value, integer_base, token->start, token->end);
4791 pm_integer_arena_move(parser->arena, &node->value);
4792 }
4793
4794 return node;
4795}
4796
4801static pm_imaginary_node_t *
4802pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4803 assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
4804
4805 return pm_imaginary_node_new(
4806 parser->arena,
4807 ++parser->node_id,
4808 PM_NODE_FLAG_STATIC_LITERAL,
4809 PM_LOCATION_INIT_TOKEN(parser, token),
4810 UP(pm_integer_node_create(parser, base, &((pm_token_t) {
4811 .type = PM_TOKEN_INTEGER,
4812 .start = token->start,
4813 .end = token->end - 1
4814 })))
4815 );
4816}
4817
4822static pm_rational_node_t *
4823pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4824 assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
4825
4826 pm_rational_node_t *node = pm_rational_node_new(
4827 parser->arena,
4828 ++parser->node_id,
4829 base | PM_NODE_FLAG_STATIC_LITERAL,
4830 PM_LOCATION_INIT_TOKEN(parser, token),
4831 ((pm_integer_t) { 0 }),
4832 ((pm_integer_t) { .value = 1 })
4833 );
4834
4835 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4836 switch (base) {
4837 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4838 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4839 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4840 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4841 default: assert(false && "unreachable"); break;
4842 }
4843
4844 pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
4845 pm_integer_arena_move(parser->arena, &node->numerator);
4846
4847 return node;
4848}
4849
4854static pm_imaginary_node_t *
4855pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4856 assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
4857
4858 return pm_imaginary_node_new(
4859 parser->arena,
4860 ++parser->node_id,
4861 PM_NODE_FLAG_STATIC_LITERAL,
4862 PM_LOCATION_INIT_TOKEN(parser, token),
4863 UP(pm_integer_node_rational_create(parser, base, &((pm_token_t) {
4864 .type = PM_TOKEN_INTEGER_RATIONAL,
4865 .start = token->start,
4866 .end = token->end - 1
4867 })))
4868 );
4869}
4870
4874static pm_in_node_t *
4875pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
4876 uint32_t start = PM_TOKEN_START(parser, in_keyword);
4877 uint32_t end;
4878
4879 if (statements != NULL) {
4880 end = PM_NODE_END(statements);
4881 } else if (then_keyword != NULL) {
4882 end = PM_TOKEN_END(parser, then_keyword);
4883 } else {
4884 end = PM_NODE_END(pattern);
4885 }
4886
4887 return pm_in_node_new(
4888 parser->arena,
4889 ++parser->node_id,
4890 0,
4891 ((pm_location_t) { .start = start, .length = U32(end - start) }),
4892 pattern,
4893 statements,
4894 TOK2LOC(parser, in_keyword),
4895 NTOK2LOC(parser, then_keyword)
4896 );
4897}
4898
4902static pm_instance_variable_and_write_node_t *
4903pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4904 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4905
4906 return pm_instance_variable_and_write_node_new(
4907 parser->arena,
4908 ++parser->node_id,
4909 0,
4910 PM_LOCATION_INIT_NODES(target, value),
4911 target->name,
4912 target->base.location,
4913 TOK2LOC(parser, operator),
4914 value
4915 );
4916}
4917
4921static pm_instance_variable_operator_write_node_t *
4922pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4923 return pm_instance_variable_operator_write_node_new(
4924 parser->arena,
4925 ++parser->node_id,
4926 0,
4927 PM_LOCATION_INIT_NODES(target, value),
4928 target->name,
4929 target->base.location,
4930 TOK2LOC(parser, operator),
4931 value,
4932 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
4933 );
4934}
4935
4939static pm_instance_variable_or_write_node_t *
4940pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4941 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4942
4943 return pm_instance_variable_or_write_node_new(
4944 parser->arena,
4945 ++parser->node_id,
4946 0,
4947 PM_LOCATION_INIT_NODES(target, value),
4948 target->name,
4949 target->base.location,
4950 TOK2LOC(parser, operator),
4951 value
4952 );
4953}
4954
4958static pm_instance_variable_read_node_t *
4959pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
4960 assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
4961
4962 return pm_instance_variable_read_node_new(
4963 parser->arena,
4964 ++parser->node_id,
4965 0,
4966 PM_LOCATION_INIT_TOKEN(parser, token),
4967 pm_parser_constant_id_token(parser, token)
4968 );
4969}
4970
4975static pm_instance_variable_write_node_t *
4976pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
4977 return pm_instance_variable_write_node_new(
4978 parser->arena,
4979 ++parser->node_id,
4980 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
4981 PM_LOCATION_INIT_NODES(read_node, value),
4982 read_node->name,
4983 read_node->base.location,
4984 value,
4985 TOK2LOC(parser, operator)
4986 );
4987}
4988
4994static void
4995pm_interpolated_node_append(pm_arena_t *arena, pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
4996 switch (PM_NODE_TYPE(part)) {
4997 case PM_STRING_NODE:
4998 pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4999 break;
5000 case PM_EMBEDDED_STATEMENTS_NODE: {
5001 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5002 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5003
5004 if (embedded == NULL) {
5005 // If there are no statements or more than one statement, then
5006 // we lose the static literal flag.
5007 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5008 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5009 // If the embedded statement is a string, then we can keep the
5010 // static literal flag and mark the string as frozen.
5011 pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5012 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5013 // If the embedded statement is an interpolated string and it's
5014 // a static literal, then we can keep the static literal flag.
5015 } else {
5016 // Otherwise we lose the static literal flag.
5017 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5018 }
5019
5020 break;
5021 }
5022 case PM_EMBEDDED_VARIABLE_NODE:
5023 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
5024 break;
5025 default:
5026 assert(false && "unexpected node type");
5027 break;
5028 }
5029
5030 pm_node_list_append(arena, parts, part);
5031}
5032
5036static pm_interpolated_regular_expression_node_t *
5037pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
5038 return pm_interpolated_regular_expression_node_new(
5039 parser->arena,
5040 ++parser->node_id,
5041 PM_NODE_FLAG_STATIC_LITERAL,
5042 PM_LOCATION_INIT_TOKEN(parser, opening),
5043 TOK2LOC(parser, opening),
5044 ((pm_node_list_t) { 0 }),
5045 TOK2LOC(parser, opening)
5046 );
5047}
5048
5049static PRISM_INLINE void
5050pm_interpolated_regular_expression_node_append(pm_arena_t *arena, pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
5051 if (PM_NODE_START(node) > PM_NODE_START(part)) {
5052 PM_NODE_START_SET_NODE(node, part);
5053 }
5054 if (PM_NODE_END(node) < PM_NODE_END(part)) {
5055 PM_NODE_LENGTH_SET_NODE(node, part);
5056 }
5057
5058 pm_interpolated_node_append(arena, UP(node), &node->parts, part);
5059}
5060
5061static PRISM_INLINE void
5062pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
5063 node->closing_loc = TOK2LOC(parser, closing);
5064 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
5065 pm_node_flag_set(UP(node), pm_regular_expression_flags_create(parser, closing));
5066}
5067
5091static PRISM_INLINE void
5092pm_interpolated_string_node_append(pm_parser_t *parser, pm_interpolated_string_node_t *node, pm_node_t *part) {
5093 pm_arena_t *arena = parser->arena;
5094#define CLEAR_FLAGS(node) \
5095 node->base.flags = (pm_node_flags_t) (FL(node) & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
5096
5097#define MUTABLE_FLAGS(node) \
5098 node->base.flags = (pm_node_flags_t) ((FL(node) | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
5099
5100 if (node->parts.size == 0 && node->opening_loc.length == 0) {
5101 PM_NODE_START_SET_NODE(node, part);
5102 }
5103
5104 if (PM_NODE_END(part) > PM_NODE_END(node)) {
5105 PM_NODE_LENGTH_SET_NODE(node, part);
5106 }
5107
5108 switch (PM_NODE_TYPE(part)) {
5109 case PM_STRING_NODE:
5110 // If inner string is not frozen, it stops being a static literal. We should *not* clear other flags,
5111 // because concatenating two frozen strings (`'foo' 'bar'`) is still frozen. This holds true for
5112 // as long as this interpolation only consists of other string literals.
5113 if (!PM_NODE_FLAG_P(part, PM_STRING_FLAGS_FROZEN)) {
5114 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
5115 }
5116 part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5117 break;
5118 case PM_INTERPOLATED_STRING_NODE:
5119 if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
5120 // If the string that we're concatenating is a static literal,
5121 // then we can keep the static literal flag for this string.
5122 } else {
5123 // Otherwise, we lose the static literal flag here and we should
5124 // also clear the mutability flags.
5125 CLEAR_FLAGS(node);
5126 }
5127 break;
5128 case PM_EMBEDDED_STATEMENTS_NODE: {
5129 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5130 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5131
5132 if (embedded == NULL) {
5133 // If we're embedding multiple statements or no statements, then
5134 // the string is not longer a static literal.
5135 CLEAR_FLAGS(node);
5136 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5137 // If the embedded statement is a string, then we can make that
5138 // string as frozen and static literal, and not touch the static
5139 // literal status of this string.
5140 embedded->flags = (pm_node_flags_t) ((embedded->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5141
5142 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5143 MUTABLE_FLAGS(node);
5144 }
5145 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5146 // If the embedded statement is an interpolated string, but that
5147 // string is marked as static literal, then we can keep our
5148 // static literal status for this string.
5149 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5150 MUTABLE_FLAGS(node);
5151 }
5152 } else {
5153 // In all other cases, we lose the static literal flag here and
5154 // become mutable.
5155 CLEAR_FLAGS(node);
5156 }
5157
5158 break;
5159 }
5160 case PM_EMBEDDED_VARIABLE_NODE:
5161 // Embedded variables clear static literal, which means we also
5162 // should clear the mutability flags.
5163 CLEAR_FLAGS(node);
5164 break;
5165 case PM_X_STRING_NODE:
5166 case PM_INTERPOLATED_X_STRING_NODE:
5167 case PM_SYMBOL_NODE:
5168 case PM_INTERPOLATED_SYMBOL_NODE:
5169 // These will only happen in error cases. But we want to handle it
5170 // here so that we don't fail the assertion.
5171 CLEAR_FLAGS(node);
5172 pm_node_list_append(arena, &node->parts, UP(pm_error_recovery_node_create_unexpected(parser, part)));
5173 return;
5174 case PM_ERROR_RECOVERY_NODE:
5175 CLEAR_FLAGS(node);
5176 break;
5177 default:
5178 assert(false && "unexpected node type");
5179 break;
5180 }
5181
5182 pm_node_list_append(arena, &node->parts, part);
5183
5184#undef CLEAR_FLAGS
5185#undef MUTABLE_FLAGS
5186}
5187
5191static pm_interpolated_string_node_t *
5192pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5193 pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
5194
5195 switch (parser->frozen_string_literal) {
5196 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
5197 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
5198 break;
5199 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
5200 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
5201 break;
5202 }
5203
5204 uint32_t start = opening == NULL ? 0 : PM_TOKEN_START(parser, opening);
5205 uint32_t end = closing == NULL ? 0 : PM_TOKEN_END(parser, closing);
5206
5207 pm_interpolated_string_node_t *node = pm_interpolated_string_node_new(
5208 parser->arena,
5209 ++parser->node_id,
5210 flags,
5211 ((pm_location_t) { .start = start, .length = U32(end - start) }),
5212 NTOK2LOC(parser, opening),
5213 ((pm_node_list_t) { 0 }),
5214 NTOK2LOC(parser, closing)
5215 );
5216
5217 if (parts != NULL) {
5218 pm_node_t *part;
5219 PM_NODE_LIST_FOREACH(parts, index, part) {
5220 pm_interpolated_string_node_append(parser, node, part);
5221 }
5222 }
5223
5224 return node;
5225}
5226
5230static void
5231pm_interpolated_string_node_closing_set(const pm_parser_t *parser, pm_interpolated_string_node_t *node, const pm_token_t *closing) {
5232 node->closing_loc = TOK2LOC(parser, closing);
5233 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
5234}
5235
5236static void
5237pm_interpolated_symbol_node_append(pm_arena_t *arena, pm_interpolated_symbol_node_t *node, pm_node_t *part) {
5238 if (node->parts.size == 0 && node->opening_loc.length == 0) {
5239 PM_NODE_START_SET_NODE(node, part);
5240 }
5241
5242 pm_interpolated_node_append(arena, UP(node), &node->parts, part);
5243
5244 if (PM_NODE_END(part) > PM_NODE_END(node)) {
5245 PM_NODE_LENGTH_SET_NODE(node, part);
5246 }
5247}
5248
5249static void
5250pm_interpolated_symbol_node_closing_loc_set(const pm_parser_t *parser, pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
5251 node->closing_loc = TOK2LOC(parser, closing);
5252 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
5253}
5254
5258static pm_interpolated_symbol_node_t *
5259pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5260 uint32_t start = opening == NULL ? 0 : PM_TOKEN_START(parser, opening);
5261 uint32_t end = closing == NULL ? 0 : PM_TOKEN_END(parser, closing);
5262
5263 pm_interpolated_symbol_node_t *node = pm_interpolated_symbol_node_new(
5264 parser->arena,
5265 ++parser->node_id,
5266 PM_NODE_FLAG_STATIC_LITERAL,
5267 ((pm_location_t) { .start = start, .length = U32(end - start) }),
5268 NTOK2LOC(parser, opening),
5269 ((pm_node_list_t) { 0 }),
5270 NTOK2LOC(parser, closing)
5271 );
5272
5273 if (parts != NULL) {
5274 pm_node_t *part;
5275 PM_NODE_LIST_FOREACH(parts, index, part) {
5276 pm_interpolated_symbol_node_append(parser->arena, node, part);
5277 }
5278 }
5279
5280 return node;
5281}
5282
5286static pm_interpolated_x_string_node_t *
5287pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5288 return pm_interpolated_x_string_node_new(
5289 parser->arena,
5290 ++parser->node_id,
5291 0,
5292 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
5293 TOK2LOC(parser, opening),
5294 ((pm_node_list_t) { 0 }),
5295 TOK2LOC(parser, closing)
5296 );
5297}
5298
5299static PRISM_INLINE void
5300pm_interpolated_xstring_node_append(pm_arena_t *arena, pm_interpolated_x_string_node_t *node, pm_node_t *part) {
5301 pm_interpolated_node_append(arena, UP(node), &node->parts, part);
5302 PM_NODE_LENGTH_SET_NODE(node, part);
5303}
5304
5305static PRISM_INLINE void
5306pm_interpolated_xstring_node_closing_set(const pm_parser_t *parser, pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
5307 node->closing_loc = TOK2LOC(parser, closing);
5308 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
5309}
5310
5314static pm_it_local_variable_read_node_t *
5315pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5316 return pm_it_local_variable_read_node_new(
5317 parser->arena,
5318 ++parser->node_id,
5319 0,
5320 PM_LOCATION_INIT_TOKEN(parser, name)
5321 );
5322}
5323
5327static pm_it_parameters_node_t *
5328pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5329 return pm_it_parameters_node_new(
5330 parser->arena,
5331 ++parser->node_id,
5332 0,
5333 PM_LOCATION_INIT_TOKENS(parser, opening, closing)
5334 );
5335}
5336
5340static pm_keyword_hash_node_t *
5341pm_keyword_hash_node_create(pm_parser_t *parser) {
5342 return pm_keyword_hash_node_new(
5343 parser->arena,
5344 ++parser->node_id,
5345 PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS,
5346 PM_LOCATION_INIT_UNSET,
5347 ((pm_node_list_t) { 0 })
5348 );
5349}
5350
5354static void
5355pm_keyword_hash_node_elements_append(pm_arena_t *arena, pm_keyword_hash_node_t *hash, pm_node_t *element) {
5356 // If the element being added is not an AssocNode or does not have a symbol
5357 // key, then we want to turn the SYMBOL_KEYS flag off.
5358 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
5359 pm_node_flag_unset(UP(hash), PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
5360 }
5361
5362 pm_node_list_append(arena, &hash->elements, element);
5363 if (PM_NODE_LENGTH(hash) == 0) {
5364 PM_NODE_START_SET_NODE(hash, element);
5365 }
5366 PM_NODE_LENGTH_SET_NODE(hash, element);
5367}
5368
5372static pm_required_keyword_parameter_node_t *
5373pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
5374 return pm_required_keyword_parameter_node_new(
5375 parser->arena,
5376 ++parser->node_id,
5377 0,
5378 PM_LOCATION_INIT_TOKEN(parser, name),
5379 pm_parser_constant_id_raw(parser, name->start, name->end - 1),
5380 TOK2LOC(parser, name)
5381 );
5382}
5383
5387static pm_optional_keyword_parameter_node_t *
5388pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
5389 return pm_optional_keyword_parameter_node_new(
5390 parser->arena,
5391 ++parser->node_id,
5392 0,
5393 PM_LOCATION_INIT_TOKEN_NODE(parser, name, value),
5394 pm_parser_constant_id_raw(parser, name->start, name->end - 1),
5395 TOK2LOC(parser, name),
5396 value
5397 );
5398}
5399
5403static pm_keyword_rest_parameter_node_t *
5404pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
5405 return pm_keyword_rest_parameter_node_new(
5406 parser->arena,
5407 ++parser->node_id,
5408 0,
5409 (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
5410 name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
5411 NTOK2LOC(parser, name),
5412 TOK2LOC(parser, operator)
5413 );
5414}
5415
5419static pm_lambda_node_t *
5420pm_lambda_node_create(
5421 pm_parser_t *parser,
5422 pm_constant_id_list_t *locals,
5423 const pm_token_t *operator,
5424 const pm_token_t *opening,
5425 const pm_token_t *closing,
5426 pm_node_t *parameters,
5427 pm_node_t *body
5428) {
5429 return pm_lambda_node_new(
5430 parser->arena,
5431 ++parser->node_id,
5432 0,
5433 PM_LOCATION_INIT_TOKENS(parser, operator, closing),
5434 *locals,
5435 TOK2LOC(parser, operator),
5436 TOK2LOC(parser, opening),
5437 TOK2LOC(parser, closing),
5438 parameters,
5439 body
5440 );
5441}
5442
5446static pm_local_variable_and_write_node_t *
5447pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5448 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5449 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5450
5451 return pm_local_variable_and_write_node_new(
5452 parser->arena,
5453 ++parser->node_id,
5454 0,
5455 PM_LOCATION_INIT_NODES(target, value),
5456 target->location,
5457 TOK2LOC(parser, operator),
5458 value,
5459 name,
5460 depth
5461 );
5462}
5463
5467static pm_local_variable_operator_write_node_t *
5468pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5469 return pm_local_variable_operator_write_node_new(
5470 parser->arena,
5471 ++parser->node_id,
5472 0,
5473 PM_LOCATION_INIT_NODES(target, value),
5474 target->location,
5475 TOK2LOC(parser, operator),
5476 value,
5477 name,
5478 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
5479 depth
5480 );
5481}
5482
5486static pm_local_variable_or_write_node_t *
5487pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5488 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5489 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5490
5491 return pm_local_variable_or_write_node_new(
5492 parser->arena,
5493 ++parser->node_id,
5494 0,
5495 PM_LOCATION_INIT_NODES(target, value),
5496 target->location,
5497 TOK2LOC(parser, operator),
5498 value,
5499 name,
5500 depth
5501 );
5502}
5503
5507static pm_local_variable_read_node_t *
5508pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
5509 if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
5510
5511 return pm_local_variable_read_node_new(
5512 parser->arena,
5513 ++parser->node_id,
5514 0,
5515 PM_LOCATION_INIT_TOKEN(parser, name),
5516 name_id,
5517 depth
5518 );
5519}
5520
5524static pm_local_variable_read_node_t *
5525pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5526 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5527 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, false);
5528}
5529
5534static pm_local_variable_read_node_t *
5535pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5536 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5537 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, true);
5538}
5539
5543static pm_local_variable_write_node_t *
5544pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
5545 return pm_local_variable_write_node_new(
5546 parser->arena,
5547 ++parser->node_id,
5548 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5549 ((pm_location_t) { .start = name_loc->start, .length = PM_NODE_END(value) - name_loc->start }),
5550 name,
5551 depth,
5552 *name_loc,
5553 value,
5554 TOK2LOC(parser, operator)
5555 );
5556}
5557
5561static PRISM_INLINE bool
5562pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5563 return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5564}
5565
5570static PRISM_INLINE bool
5571pm_token_is_numbered_parameter(const pm_parser_t *parser, uint32_t start, uint32_t length) {
5572 return (
5573 (length == 2) &&
5574 (parser->start[start] == '_') &&
5575 (parser->start[start + 1] != '0') &&
5576 pm_char_is_decimal_digit(parser->start[start + 1])
5577 );
5578}
5579
5584static PRISM_INLINE void
5585pm_refute_numbered_parameter(pm_parser_t *parser, uint32_t start, uint32_t length) {
5586 if (pm_token_is_numbered_parameter(parser, start, length)) {
5587 PM_PARSER_ERR_FORMAT(parser, start, length, PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + start);
5588 }
5589}
5590
5595static pm_local_variable_target_node_t *
5596pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
5597 pm_refute_numbered_parameter(parser, location->start, location->length);
5598
5599 return pm_local_variable_target_node_new(
5600 parser->arena,
5601 ++parser->node_id,
5602 0,
5603 ((pm_location_t) { .start = location->start, .length = location->length }),
5604 name,
5605 depth
5606 );
5607}
5608
5612static pm_match_predicate_node_t *
5613pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5614 pm_assert_value_expression(parser, value);
5615
5616 return pm_match_predicate_node_new(
5617 parser->arena,
5618 ++parser->node_id,
5619 0,
5620 PM_LOCATION_INIT_NODES(value, pattern),
5621 value,
5622 pattern,
5623 TOK2LOC(parser, operator)
5624 );
5625}
5626
5630static pm_match_required_node_t *
5631pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5632 pm_assert_value_expression(parser, value);
5633
5634 return pm_match_required_node_new(
5635 parser->arena,
5636 ++parser->node_id,
5637 0,
5638 PM_LOCATION_INIT_NODES(value, pattern),
5639 value,
5640 pattern,
5641 TOK2LOC(parser, operator)
5642 );
5643}
5644
5648static pm_match_write_node_t *
5649pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
5650 return pm_match_write_node_new(
5651 parser->arena,
5652 ++parser->node_id,
5653 0,
5654 PM_LOCATION_INIT_NODE(call),
5655 call,
5656 ((pm_node_list_t) { 0 })
5657 );
5658}
5659
5663static pm_module_node_t *
5664pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
5665 return pm_module_node_new(
5666 parser->arena,
5667 ++parser->node_id,
5668 0,
5669 PM_LOCATION_INIT_TOKENS(parser, module_keyword, end_keyword),
5670 (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
5671 TOK2LOC(parser, module_keyword),
5672 constant_path,
5673 body,
5674 TOK2LOC(parser, end_keyword),
5675 pm_parser_constant_id_token(parser, name)
5676 );
5677}
5678
5682static pm_multi_target_node_t *
5683pm_multi_target_node_create(pm_parser_t *parser) {
5684 return pm_multi_target_node_new(
5685 parser->arena,
5686 ++parser->node_id,
5687 0,
5688 PM_LOCATION_INIT_UNSET,
5689 ((pm_node_list_t) { 0 }),
5690 NULL,
5691 ((pm_node_list_t) { 0 }),
5692 ((pm_location_t) { 0 }),
5693 ((pm_location_t) { 0 })
5694 );
5695}
5696
5700static void
5701pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
5702 if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
5703 if (node->rest == NULL) {
5704 node->rest = target;
5705 } else {
5706 pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
5707 pm_node_list_append(parser->arena, &node->rights, target);
5708 }
5709 } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
5710 if (node->rest == NULL) {
5711 node->rest = target;
5712 } else {
5713 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
5714 pm_node_list_append(parser->arena, &node->rights, target);
5715 }
5716 } else if (node->rest == NULL) {
5717 pm_node_list_append(parser->arena, &node->lefts, target);
5718 } else {
5719 pm_node_list_append(parser->arena, &node->rights, target);
5720 }
5721
5722 if (PM_NODE_LENGTH(node) == 0 || (PM_NODE_START(node) > PM_NODE_START(target))) {
5723 PM_NODE_START_SET_NODE(node, target);
5724 }
5725
5726 if (PM_NODE_LENGTH(node) == 0 || (PM_NODE_END(node) < PM_NODE_END(target))) {
5727 PM_NODE_LENGTH_SET_NODE(node, target);
5728 }
5729}
5730
5734static void
5735pm_multi_target_node_opening_set(const pm_parser_t *parser, pm_multi_target_node_t *node, const pm_token_t *lparen) {
5736 PM_NODE_START_SET_TOKEN(parser, node, lparen);
5737 PM_NODE_LENGTH_SET_TOKEN(parser, node, lparen);
5738 node->lparen_loc = TOK2LOC(parser, lparen);
5739}
5740
5744static void
5745pm_multi_target_node_closing_set(const pm_parser_t *parser, pm_multi_target_node_t *node, const pm_token_t *rparen) {
5746 PM_NODE_LENGTH_SET_TOKEN(parser, node, rparen);
5747 node->rparen_loc = TOK2LOC(parser, rparen);
5748}
5749
5753static pm_multi_write_node_t *
5754pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5755 /* The target is no longer necessary because we have reused its children. It
5756 * is arena-allocated so no explicit free is needed. */
5757 return pm_multi_write_node_new(
5758 parser->arena,
5759 ++parser->node_id,
5760 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5761 PM_LOCATION_INIT_NODES(target, value),
5762 target->lefts,
5763 target->rest,
5764 target->rights,
5765 target->lparen_loc,
5766 target->rparen_loc,
5767 TOK2LOC(parser, operator),
5768 value
5769 );
5770}
5771
5775static pm_next_node_t *
5776pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
5777 assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
5778
5779 return pm_next_node_new(
5780 parser->arena,
5781 ++parser->node_id,
5782 0,
5783 (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
5784 arguments,
5785 TOK2LOC(parser, keyword)
5786 );
5787}
5788
5792static pm_nil_node_t *
5793pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
5794 assert(token->type == PM_TOKEN_KEYWORD_NIL);
5795
5796 return pm_nil_node_new(
5797 parser->arena,
5798 ++parser->node_id,
5799 PM_NODE_FLAG_STATIC_LITERAL,
5800 PM_LOCATION_INIT_TOKEN(parser, token)
5801 );
5802}
5803
5807static pm_no_block_parameter_node_t *
5808pm_no_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
5809 assert(operator->type == PM_TOKEN_AMPERSAND || operator->type == PM_TOKEN_UAMPERSAND);
5810 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
5811
5812 return pm_no_block_parameter_node_new(
5813 parser->arena,
5814 ++parser->node_id,
5815 0,
5816 PM_LOCATION_INIT_TOKENS(parser, operator, keyword),
5817 TOK2LOC(parser, operator),
5818 TOK2LOC(parser, keyword)
5819 );
5820}
5821
5825static pm_no_keywords_parameter_node_t *
5826pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
5827 assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
5828 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
5829
5830 return pm_no_keywords_parameter_node_new(
5831 parser->arena,
5832 ++parser->node_id,
5833 0,
5834 PM_LOCATION_INIT_TOKENS(parser, operator, keyword),
5835 TOK2LOC(parser, operator),
5836 TOK2LOC(parser, keyword)
5837 );
5838}
5839
5843static pm_numbered_parameters_node_t *
5844pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing, uint8_t maximum) {
5845 return pm_numbered_parameters_node_new(
5846 parser->arena,
5847 ++parser->node_id,
5848 0,
5849 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
5850 maximum
5851 );
5852}
5853
5858#define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1))
5859
5866static uint32_t
5867pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) {
5868 const uint8_t *start = token->start + 1;
5869 const uint8_t *end = token->end;
5870
5871 ptrdiff_t diff = end - start;
5872 assert(diff > 0);
5873#if PTRDIFF_MAX > SIZE_MAX
5874 assert(diff < (ptrdiff_t) SIZE_MAX);
5875#endif
5876 size_t length = (size_t) diff;
5877
5878 char *digits = xcalloc(length + 1, sizeof(char));
5879 memcpy(digits, start, length);
5880 digits[length] = '\0';
5881
5882 char *endptr;
5883 errno = 0;
5884 unsigned long value = strtoul(digits, &endptr, 10);
5885
5886 if ((digits == endptr) || (*endptr != '\0')) {
5887 pm_parser_err(parser, U32(start - parser->start), U32(length), PM_ERR_INVALID_NUMBER_DECIMAL);
5888 value = 0;
5889 }
5890
5891 xfree_sized(digits, sizeof(char) * (length + 1));
5892
5893 if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
5894 PM_PARSER_WARN_FORMAT(parser, U32(start - parser->start), U32(length), PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
5895 value = 0;
5896 }
5897
5898 return (uint32_t) value;
5899}
5900
5901#undef NTH_REF_MAX
5902
5906static pm_numbered_reference_read_node_t *
5907pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5908 assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
5909
5910 return pm_numbered_reference_read_node_new(
5911 parser->arena,
5912 ++parser->node_id,
5913 0,
5914 PM_LOCATION_INIT_TOKEN(parser, name),
5915 pm_numbered_reference_read_node_number(parser, name)
5916 );
5917}
5918
5922static pm_optional_parameter_node_t *
5923pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
5924 return pm_optional_parameter_node_new(
5925 parser->arena,
5926 ++parser->node_id,
5927 0,
5928 PM_LOCATION_INIT_TOKEN_NODE(parser, name, value),
5929 pm_parser_constant_id_token(parser, name),
5930 TOK2LOC(parser, name),
5931 TOK2LOC(parser, operator),
5932 value
5933 );
5934}
5935
5939static pm_or_node_t *
5940pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
5941 pm_assert_value_expression(parser, left);
5942
5943 return pm_or_node_new(
5944 parser->arena,
5945 ++parser->node_id,
5946 0,
5947 PM_LOCATION_INIT_NODES(left, right),
5948 left,
5949 right,
5950 TOK2LOC(parser, operator)
5951 );
5952}
5953
5957static pm_parameters_node_t *
5958pm_parameters_node_create(pm_parser_t *parser) {
5959 return pm_parameters_node_new(
5960 parser->arena,
5961 ++parser->node_id,
5962 0,
5963 PM_LOCATION_INIT_UNSET,
5964 ((pm_node_list_t) { 0 }),
5965 ((pm_node_list_t) { 0 }),
5966 NULL,
5967 ((pm_node_list_t) { 0 }),
5968 ((pm_node_list_t) { 0 }),
5969 NULL,
5970 NULL
5971 );
5972}
5973
5977static void
5978pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
5979 if ((params->base.location.length == 0) || PM_NODE_START(params) > PM_NODE_START(param)) {
5980 PM_NODE_START_SET_NODE(params, param);
5981 }
5982
5983 if ((params->base.location.length == 0) || (PM_NODE_END(params) < PM_NODE_END(param))) {
5984 PM_NODE_LENGTH_SET_NODE(params, param);
5985 }
5986}
5987
5991static void
5992pm_parameters_node_requireds_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
5993 pm_parameters_node_location_set(params, param);
5994 pm_node_list_append(arena, &params->requireds, param);
5995}
5996
6000static void
6001pm_parameters_node_optionals_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
6002 pm_parameters_node_location_set(params, UP(param));
6003 pm_node_list_append(arena, &params->optionals, UP(param));
6004}
6005
6009static void
6010pm_parameters_node_posts_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
6011 pm_parameters_node_location_set(params, param);
6012 pm_node_list_append(arena, &params->posts, param);
6013}
6014
6018static void
6019pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6020 pm_parameters_node_location_set(params, param);
6021 params->rest = param;
6022}
6023
6027static void
6028pm_parameters_node_keywords_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
6029 pm_parameters_node_location_set(params, param);
6030 pm_node_list_append(arena, &params->keywords, param);
6031}
6032
6036static void
6037pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6038 assert(params->keyword_rest == NULL);
6039 pm_parameters_node_location_set(params, param);
6040 params->keyword_rest = param;
6041}
6042
6046static void
6047pm_parameters_node_block_set(pm_parameters_node_t *params, pm_node_t *param) {
6048 assert(params->block == NULL);
6049 pm_parameters_node_location_set(params, param);
6050 params->block = param;
6051}
6052
6056static pm_program_node_t *
6057pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
6058 return pm_program_node_new(
6059 parser->arena,
6060 ++parser->node_id,
6061 0,
6062 PM_LOCATION_INIT_NODE(statements),
6063 *locals,
6064 statements
6065 );
6066}
6067
6071static pm_parentheses_node_t *
6072pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing, pm_node_flags_t flags) {
6073 return pm_parentheses_node_new(
6074 parser->arena,
6075 ++parser->node_id,
6076 flags,
6077 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
6078 body,
6079 TOK2LOC(parser, opening),
6080 TOK2LOC(parser, closing)
6081 );
6082}
6083
6087static pm_pinned_expression_node_t *
6088pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
6089 return pm_pinned_expression_node_new(
6090 parser->arena,
6091 ++parser->node_id,
6092 0,
6093 PM_LOCATION_INIT_TOKENS(parser, operator, rparen),
6094 expression,
6095 TOK2LOC(parser, operator),
6096 TOK2LOC(parser, lparen),
6097 TOK2LOC(parser, rparen)
6098 );
6099}
6100
6104static pm_pinned_variable_node_t *
6105pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
6106 return pm_pinned_variable_node_new(
6107 parser->arena,
6108 ++parser->node_id,
6109 0,
6110 PM_LOCATION_INIT_TOKEN_NODE(parser, operator, variable),
6111 variable,
6112 TOK2LOC(parser, operator)
6113 );
6114}
6115
6119static pm_post_execution_node_t *
6120pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6121 return pm_post_execution_node_new(
6122 parser->arena,
6123 ++parser->node_id,
6124 0,
6125 PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
6126 statements,
6127 TOK2LOC(parser, keyword),
6128 TOK2LOC(parser, opening),
6129 TOK2LOC(parser, closing)
6130 );
6131}
6132
6136static pm_pre_execution_node_t *
6137pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6138 return pm_pre_execution_node_new(
6139 parser->arena,
6140 ++parser->node_id,
6141 0,
6142 PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
6143 statements,
6144 TOK2LOC(parser, keyword),
6145 TOK2LOC(parser, opening),
6146 TOK2LOC(parser, closing)
6147 );
6148}
6149
6153static pm_range_node_t *
6154pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6155 pm_assert_value_expression(parser, left);
6156 pm_assert_value_expression(parser, right);
6157 pm_node_flags_t flags = 0;
6158
6159 // Indicate that this node is an exclusive range if the operator is `...`.
6160 if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
6161 flags |= PM_RANGE_FLAGS_EXCLUDE_END;
6162 }
6163
6164 // Indicate that this node is a static literal (i.e., can be compiled with
6165 // a putobject in CRuby) if the left and right are implicit nil, explicit
6166 // nil, or integers.
6167 if (
6168 (left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
6169 (right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
6170 ) {
6171 flags |= PM_NODE_FLAG_STATIC_LITERAL;
6172 }
6173
6174 uint32_t start = left == NULL ? PM_TOKEN_START(parser, operator) : PM_NODE_START(left);
6175 uint32_t end = right == NULL ? PM_TOKEN_END(parser, operator) : PM_NODE_END(right);
6176
6177 return pm_range_node_new(
6178 parser->arena,
6179 ++parser->node_id,
6180 flags,
6181 ((pm_location_t) { .start = start, .length = U32(end - start) }),
6182 left,
6183 right,
6184 TOK2LOC(parser, operator)
6185 );
6186}
6187
6191static pm_redo_node_t *
6192pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
6193 assert(token->type == PM_TOKEN_KEYWORD_REDO);
6194
6195 return pm_redo_node_new(
6196 parser->arena,
6197 ++parser->node_id,
6198 0,
6199 PM_LOCATION_INIT_TOKEN(parser, token)
6200 );
6201}
6202
6207static pm_regular_expression_node_t *
6208pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
6209 return pm_regular_expression_node_new(
6210 parser->arena,
6211 ++parser->node_id,
6212 pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL,
6213 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
6214 TOK2LOC(parser, opening),
6215 TOK2LOC(parser, content),
6216 TOK2LOC(parser, closing),
6217 *unescaped
6218 );
6219}
6220
6224static PRISM_INLINE pm_regular_expression_node_t *
6225pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6226 return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6227}
6228
6232static pm_required_parameter_node_t *
6233pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
6234 return pm_required_parameter_node_new(
6235 parser->arena,
6236 ++parser->node_id,
6237 0,
6238 PM_LOCATION_INIT_TOKEN(parser, token),
6239 pm_parser_constant_id_token(parser, token)
6240 );
6241}
6242
6246static pm_rescue_modifier_node_t *
6247pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
6248 return pm_rescue_modifier_node_new(
6249 parser->arena,
6250 ++parser->node_id,
6251 0,
6252 PM_LOCATION_INIT_NODES(expression, rescue_expression),
6253 expression,
6254 TOK2LOC(parser, keyword),
6255 rescue_expression
6256 );
6257}
6258
6262static pm_rescue_node_t *
6263pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
6264 return pm_rescue_node_new(
6265 parser->arena,
6266 ++parser->node_id,
6267 0,
6268 PM_LOCATION_INIT_TOKEN(parser, keyword),
6269 TOK2LOC(parser, keyword),
6270 ((pm_node_list_t) { 0 }),
6271 ((pm_location_t) { 0 }),
6272 NULL,
6273 ((pm_location_t) { 0 }),
6274 NULL,
6275 NULL
6276 );
6277}
6278
6279static PRISM_INLINE void
6280pm_rescue_node_operator_set(const pm_parser_t *parser, pm_rescue_node_t *node, const pm_token_t *operator) {
6281 node->operator_loc = TOK2LOC(parser, operator);
6282}
6283
6287static void
6288pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
6289 node->reference = reference;
6290 PM_NODE_LENGTH_SET_NODE(node, reference);
6291}
6292
6296static void
6297pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
6298 node->statements = statements;
6299 if (pm_statements_node_body_length(statements) > 0) {
6300 PM_NODE_LENGTH_SET_NODE(node, statements);
6301 }
6302}
6303
6307static void
6308pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
6309 node->subsequent = subsequent;
6310 PM_NODE_LENGTH_SET_NODE(node, subsequent);
6311}
6312
6316static void
6317pm_rescue_node_exceptions_append(pm_arena_t *arena, pm_rescue_node_t *node, pm_node_t *exception) {
6318 pm_node_list_append(arena, &node->exceptions, exception);
6319 PM_NODE_LENGTH_SET_NODE(node, exception);
6320}
6321
6325static pm_rest_parameter_node_t *
6326pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
6327 return pm_rest_parameter_node_new(
6328 parser->arena,
6329 ++parser->node_id,
6330 0,
6331 (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
6332 name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
6333 NTOK2LOC(parser, name),
6334 TOK2LOC(parser, operator)
6335 );
6336}
6337
6341static pm_retry_node_t *
6342pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
6343 assert(token->type == PM_TOKEN_KEYWORD_RETRY);
6344
6345 return pm_retry_node_new(
6346 parser->arena,
6347 ++parser->node_id,
6348 0,
6349 PM_LOCATION_INIT_TOKEN(parser, token)
6350 );
6351}
6352
6356static pm_return_node_t *
6357pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6358 return pm_return_node_new(
6359 parser->arena,
6360 ++parser->node_id,
6361 0,
6362 (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
6363 TOK2LOC(parser, keyword),
6364 arguments
6365 );
6366}
6367
6371static pm_self_node_t *
6372pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
6373 assert(token->type == PM_TOKEN_KEYWORD_SELF);
6374
6375 return pm_self_node_new(
6376 parser->arena,
6377 ++parser->node_id,
6378 0,
6379 PM_LOCATION_INIT_TOKEN(parser, token)
6380 );
6381}
6382
6386static pm_shareable_constant_node_t *
6387pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
6388 return pm_shareable_constant_node_new(
6389 parser->arena,
6390 ++parser->node_id,
6391 (pm_node_flags_t) value,
6392 PM_LOCATION_INIT_NODE(write),
6393 write
6394 );
6395}
6396
6400static pm_singleton_class_node_t *
6401pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
6402 return pm_singleton_class_node_new(
6403 parser->arena,
6404 ++parser->node_id,
6405 0,
6406 PM_LOCATION_INIT_TOKENS(parser, class_keyword, end_keyword),
6407 *locals,
6408 TOK2LOC(parser, class_keyword),
6409 TOK2LOC(parser, operator),
6410 expression,
6411 body,
6412 TOK2LOC(parser, end_keyword)
6413 );
6414}
6415
6419static pm_source_encoding_node_t *
6420pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
6421 assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
6422
6423 return pm_source_encoding_node_new(
6424 parser->arena,
6425 ++parser->node_id,
6426 PM_NODE_FLAG_STATIC_LITERAL,
6427 PM_LOCATION_INIT_TOKEN(parser, token)
6428 );
6429}
6430
6434static pm_source_file_node_t*
6435pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
6436 assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
6437
6438 pm_node_flags_t flags = 0;
6439
6440 switch (parser->frozen_string_literal) {
6441 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6442 flags |= PM_STRING_FLAGS_MUTABLE;
6443 break;
6444 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6445 flags |= PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6446 break;
6447 }
6448
6449 return pm_source_file_node_new(
6450 parser->arena,
6451 ++parser->node_id,
6452 flags,
6453 PM_LOCATION_INIT_TOKEN(parser, file_keyword),
6454 parser->filepath
6455 );
6456}
6457
6461static pm_source_line_node_t *
6462pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
6463 assert(token->type == PM_TOKEN_KEYWORD___LINE__);
6464
6465 return pm_source_line_node_new(
6466 parser->arena,
6467 ++parser->node_id,
6468 PM_NODE_FLAG_STATIC_LITERAL,
6469 PM_LOCATION_INIT_TOKEN(parser, token)
6470 );
6471}
6472
6476static pm_splat_node_t *
6477pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
6478 return pm_splat_node_new(
6479 parser->arena,
6480 ++parser->node_id,
6481 0,
6482 (expression == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, expression),
6483 TOK2LOC(parser, operator),
6484 expression
6485 );
6486}
6487
6491static pm_statements_node_t *
6492pm_statements_node_create(pm_parser_t *parser) {
6493 return pm_statements_node_new(
6494 parser->arena,
6495 ++parser->node_id,
6496 0,
6497 PM_LOCATION_INIT_UNSET,
6498 ((pm_node_list_t) { 0 })
6499 );
6500}
6501
6505static size_t
6506pm_statements_node_body_length(pm_statements_node_t *node) {
6507 return node && node->body.size;
6508}
6509
6514static PRISM_INLINE void
6515pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
6516 if (pm_statements_node_body_length(node) == 0 || PM_NODE_START(statement) < PM_NODE_START(node)) {
6517 PM_NODE_START_SET_NODE(node, statement);
6518 }
6519
6520 if (PM_NODE_END(statement) > PM_NODE_END(node)) {
6521 PM_NODE_LENGTH_SET_NODE(node, statement);
6522 }
6523}
6524
6528static void
6529pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline) {
6530 pm_statements_node_body_update(node, statement);
6531
6532 if (node->body.size > 0) {
6533 const pm_node_t *previous = node->body.nodes[node->body.size - 1];
6534
6535 switch (PM_NODE_TYPE(previous)) {
6536 case PM_BREAK_NODE:
6537 case PM_NEXT_NODE:
6538 case PM_REDO_NODE:
6539 case PM_RETRY_NODE:
6540 case PM_RETURN_NODE:
6541 pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
6542 break;
6543 default:
6544 break;
6545 }
6546 }
6547
6548 pm_node_list_append(parser->arena, &node->body, statement);
6549 if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
6550}
6551
6555static void
6556pm_statements_node_body_prepend(pm_arena_t *arena, pm_statements_node_t *node, pm_node_t *statement) {
6557 pm_statements_node_body_update(node, statement);
6558 pm_node_list_prepend(arena, &node->body, statement);
6559 pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
6560}
6561
6565static PRISM_INLINE pm_string_node_t *
6566pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
6567 pm_node_flags_t flags = 0;
6568
6569 switch (parser->frozen_string_literal) {
6570 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6571 flags = PM_STRING_FLAGS_MUTABLE;
6572 break;
6573 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6574 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6575 break;
6576 }
6577
6578 uint32_t start = PM_TOKEN_START(parser, opening == NULL ? content : opening);
6579 uint32_t end = PM_TOKEN_END(parser, closing == NULL ? content : closing);
6580
6581 return pm_string_node_new(
6582 parser->arena,
6583 ++parser->node_id,
6584 flags,
6585 ((pm_location_t) { .start = start, .length = U32(end - start) }),
6586 NTOK2LOC(parser, opening),
6587 TOK2LOC(parser, content),
6588 NTOK2LOC(parser, closing),
6589 *string
6590 );
6591}
6592
6596static pm_string_node_t *
6597pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6598 return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6599}
6600
6605static pm_string_node_t *
6606pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6607 pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
6608 parser->current_string = PM_STRING_EMPTY;
6609 return node;
6610}
6611
6615static pm_super_node_t *
6616pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
6617 assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
6618
6619 const pm_location_t *end = pm_arguments_end(arguments);
6620 assert(end != NULL && "unreachable");
6621
6622 return pm_super_node_new(
6623 parser->arena,
6624 ++parser->node_id,
6625 0,
6626 ((pm_location_t) { .start = PM_TOKEN_START(parser, keyword), .length = PM_LOCATION_END(end) - PM_TOKEN_START(parser, keyword) }),
6627 TOK2LOC(parser, keyword),
6628 arguments->opening_loc,
6629 arguments->arguments,
6630 arguments->closing_loc,
6631 arguments->block
6632 );
6633}
6634
6639static bool
6640pm_ascii_only_p(const pm_string_t *contents) {
6641 const size_t length = pm_string_length(contents);
6642 const uint8_t *source = pm_string_source(contents);
6643
6644 for (size_t index = 0; index < length; index++) {
6645 if (source[index] & 0x80) return false;
6646 }
6647
6648 return true;
6649}
6650
6654static void
6655parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
6656 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
6657 size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
6658
6659 if (width == 0) {
6660 pm_parser_err(parser, PM_TOKEN_START(parser, location), PM_TOKEN_LENGTH(location), PM_ERR_INVALID_SYMBOL);
6661 break;
6662 }
6663
6664 cursor += width;
6665 }
6666}
6667
6672static void
6673parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
6674 const pm_encoding_t *encoding = parser->encoding;
6675
6676 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
6677 size_t width = encoding->char_width(cursor, end - cursor);
6678
6679 if (width == 0) {
6680 pm_parser_err(parser, PM_TOKEN_START(parser, location), PM_TOKEN_LENGTH(location), PM_ERR_INVALID_SYMBOL);
6681 break;
6682 }
6683
6684 cursor += width;
6685 }
6686}
6687
6697static PRISM_INLINE pm_node_flags_t
6698parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
6699 if (parser->explicit_encoding != NULL) {
6700 // A Symbol may optionally have its encoding explicitly set. This will
6701 // happen if an escape sequence results in a non-ASCII code point.
6702 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
6703 if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
6704 return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
6705 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6706 return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
6707 } else if (validate) {
6708 parse_symbol_encoding_validate_other(parser, location, contents);
6709 }
6710 } else if (pm_ascii_only_p(contents)) {
6711 // Ruby stipulates that all source files must use an ASCII-compatible
6712 // encoding. Thus, all symbols appearing in source are eligible for
6713 // "downgrading" to US-ASCII.
6714 return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
6715 } else if (validate) {
6716 parse_symbol_encoding_validate_other(parser, location, contents);
6717 }
6718
6719 return 0;
6720}
6721
6726static pm_symbol_node_t *
6727pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
6728 uint32_t start = opening == NULL ? PM_TOKEN_START(parser, value) : PM_TOKEN_START(parser, opening);
6729 uint32_t end = closing == NULL ? PM_TOKEN_END(parser, value) : PM_TOKEN_END(parser, closing);
6730
6731 return pm_symbol_node_new(
6732 parser->arena,
6733 ++parser->node_id,
6734 PM_NODE_FLAG_STATIC_LITERAL | flags,
6735 ((pm_location_t) { .start = start, .length = U32(end - start) }),
6736 NTOK2LOC(parser, opening),
6737 NTOK2LOC(parser, value),
6738 NTOK2LOC(parser, closing),
6739 *unescaped
6740 );
6741}
6742
6746static PRISM_INLINE pm_symbol_node_t *
6747pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
6748 return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
6749}
6750
6754static pm_symbol_node_t *
6755pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
6756 pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
6757 parser->current_string = PM_STRING_EMPTY;
6758 return node;
6759}
6760
6764static pm_symbol_node_t *
6765pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
6766 assert(token->type == PM_TOKEN_LABEL);
6767
6768 pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
6769 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
6770 pm_symbol_node_t *node = pm_symbol_node_create(parser, NULL, &label, &closing);
6771
6772 assert((label.end - label.start) >= 0);
6773 pm_string_shared_init(&node->unescaped, label.start, label.end);
6774 pm_node_flag_set(UP(node), parse_symbol_encoding(parser, &label, &node->unescaped, false));
6775
6776 return node;
6777}
6778
6782static pm_symbol_node_t *
6783pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
6784 pm_symbol_node_t *node = pm_symbol_node_new(
6785 parser->arena,
6786 ++parser->node_id,
6787 PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
6788 PM_LOCATION_INIT_UNSET,
6789 ((pm_location_t) { 0 }),
6790 ((pm_location_t) { 0 }),
6791 ((pm_location_t) { 0 }),
6792 ((pm_string_t) { 0 })
6793 );
6794
6795 pm_string_constant_init(&node->unescaped, content, strlen(content));
6796 return node;
6797}
6798
6802static bool
6803pm_symbol_node_label_p(const pm_parser_t *parser, const pm_node_t *node) {
6804 const pm_location_t *location = NULL;
6805
6806 switch (PM_NODE_TYPE(node)) {
6807 case PM_SYMBOL_NODE: {
6808 const pm_symbol_node_t *cast = (pm_symbol_node_t *) node;
6809 if (cast->closing_loc.length > 0) {
6810 location = &cast->closing_loc;
6811 }
6812 break;
6813 }
6814 case PM_INTERPOLATED_SYMBOL_NODE: {
6815 const pm_interpolated_symbol_node_t *cast = (pm_interpolated_symbol_node_t *) node;
6816 if (cast->closing_loc.length > 0) {
6817 location = &cast->closing_loc;
6818 }
6819 break;
6820 }
6821 default:
6822 return false;
6823 }
6824
6825 return (location != NULL) && (parser->start[PM_LOCATION_END(location) - 1] == ':');
6826}
6827
6831static pm_symbol_node_t *
6832pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
6833 pm_symbol_node_t *new_node = pm_symbol_node_new(
6834 parser->arena,
6835 ++parser->node_id,
6836 PM_NODE_FLAG_STATIC_LITERAL,
6837 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
6838 TOK2LOC(parser, opening),
6839 node->content_loc,
6840 TOK2LOC(parser, closing),
6841 node->unescaped
6842 );
6843
6844 pm_token_t content = {
6845 .type = PM_TOKEN_IDENTIFIER,
6846 .start = parser->start + node->content_loc.start,
6847 .end = parser->start + node->content_loc.start + node->content_loc.length
6848 };
6849
6850 pm_node_flag_set(UP(new_node), parse_symbol_encoding(parser, &content, &node->unescaped, true));
6851
6852 /* The old node is arena-allocated so no explicit free is needed. */
6853 return new_node;
6854}
6855
6859static pm_string_node_t *
6860pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
6861 pm_node_flags_t flags = 0;
6862
6863 switch (parser->frozen_string_literal) {
6864 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6865 flags = PM_STRING_FLAGS_MUTABLE;
6866 break;
6867 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6868 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6869 break;
6870 }
6871
6872 pm_string_node_t *new_node = pm_string_node_new(
6873 parser->arena,
6874 ++parser->node_id,
6875 flags,
6876 PM_LOCATION_INIT_NODE(node),
6877 node->opening_loc,
6878 node->value_loc,
6879 node->closing_loc,
6880 node->unescaped
6881 );
6882
6883 /* The old node is arena-allocated so no explicit free is needed. */
6884 return new_node;
6885}
6886
6890static pm_true_node_t *
6891pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
6892 assert(token->type == PM_TOKEN_KEYWORD_TRUE);
6893
6894 return pm_true_node_new(
6895 parser->arena,
6896 ++parser->node_id,
6897 PM_NODE_FLAG_STATIC_LITERAL,
6898 PM_LOCATION_INIT_TOKEN(parser, token)
6899 );
6900}
6901
6905static pm_true_node_t *
6906pm_true_node_synthesized_create(pm_parser_t *parser) {
6907 return pm_true_node_new(
6908 parser->arena,
6909 ++parser->node_id,
6910 PM_NODE_FLAG_STATIC_LITERAL,
6911 PM_LOCATION_INIT_UNSET
6912 );
6913}
6914
6918static pm_undef_node_t *
6919pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
6920 assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
6921
6922 return pm_undef_node_new(
6923 parser->arena,
6924 ++parser->node_id,
6925 0,
6926 PM_LOCATION_INIT_TOKEN(parser, token),
6927 ((pm_node_list_t) { 0 }),
6928 TOK2LOC(parser, token)
6929 );
6930}
6931
6935static void
6936pm_undef_node_append(pm_arena_t *arena, pm_undef_node_t *node, pm_node_t *name) {
6937 PM_NODE_LENGTH_SET_NODE(node, name);
6938 pm_node_list_append(arena, &node->names, name);
6939}
6940
6944static pm_unless_node_t *
6945pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
6946 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6947 pm_node_t *end = statements == NULL ? predicate : UP(statements);
6948
6949 return pm_unless_node_new(
6950 parser->arena,
6951 ++parser->node_id,
6952 PM_NODE_FLAG_NEWLINE,
6953 PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, end),
6954 TOK2LOC(parser, keyword),
6955 predicate,
6956 NTOK2LOC(parser, then_keyword),
6957 statements,
6958 NULL,
6959 ((pm_location_t) { 0 })
6960 );
6961}
6962
6966static pm_unless_node_t *
6967pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
6968 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6969
6970 pm_statements_node_t *statements = pm_statements_node_create(parser);
6971 pm_statements_node_body_append(parser, statements, statement, true);
6972
6973 return pm_unless_node_new(
6974 parser->arena,
6975 ++parser->node_id,
6976 PM_NODE_FLAG_NEWLINE,
6977 PM_LOCATION_INIT_NODES(statement, predicate),
6978 TOK2LOC(parser, unless_keyword),
6979 predicate,
6980 ((pm_location_t) { 0 }),
6981 statements,
6982 NULL,
6983 ((pm_location_t) { 0 })
6984 );
6985}
6986
6987static PRISM_INLINE void
6988pm_unless_node_end_keyword_loc_set(const pm_parser_t *parser, pm_unless_node_t *node, const pm_token_t *end_keyword) {
6989 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
6990 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
6991}
6992
6998static void
6999pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statements) {
7000 assert(parser->current_block_exits != NULL);
7001
7002 // All of the block exits that we want to remove should be within the
7003 // statements, and since we are modifying the statements, we shouldn't have
7004 // to check the end location.
7005 uint32_t start = statements->base.location.start;
7006
7007 for (size_t index = parser->current_block_exits->size; index > 0; index--) {
7008 pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
7009 if (block_exit->location.start < start) break;
7010
7011 // Implicitly remove from the list by lowering the size.
7012 parser->current_block_exits->size--;
7013 }
7014}
7015
7019static pm_until_node_t *
7020pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7021 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7022
7023 return pm_until_node_new(
7024 parser->arena,
7025 ++parser->node_id,
7026 flags,
7027 PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
7028 TOK2LOC(parser, keyword),
7029 NTOK2LOC(parser, do_keyword),
7030 TOK2LOC(parser, closing),
7031 predicate,
7032 statements
7033 );
7034}
7035
7039static pm_until_node_t *
7040pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7041 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7042 pm_loop_modifier_block_exits(parser, statements);
7043
7044 return pm_until_node_new(
7045 parser->arena,
7046 ++parser->node_id,
7047 flags,
7048 PM_LOCATION_INIT_NODES(statements, predicate),
7049 TOK2LOC(parser, keyword),
7050 ((pm_location_t) { 0 }),
7051 ((pm_location_t) { 0 }),
7052 predicate,
7053 statements
7054 );
7055}
7056
7060static pm_when_node_t *
7061pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
7062 return pm_when_node_new(
7063 parser->arena,
7064 ++parser->node_id,
7065 0,
7066 PM_LOCATION_INIT_TOKEN(parser, keyword),
7067 TOK2LOC(parser, keyword),
7068 ((pm_node_list_t) { 0 }),
7069 ((pm_location_t) { 0 }),
7070 NULL
7071 );
7072}
7073
7077static void
7078pm_when_node_conditions_append(pm_arena_t *arena, pm_when_node_t *node, pm_node_t *condition) {
7079 PM_NODE_LENGTH_SET_NODE(node, condition);
7080 pm_node_list_append(arena, &node->conditions, condition);
7081}
7082
7086static PRISM_INLINE void
7087pm_when_node_then_keyword_loc_set(const pm_parser_t *parser, pm_when_node_t *node, const pm_token_t *then_keyword) {
7088 PM_NODE_LENGTH_SET_TOKEN(parser, node, then_keyword);
7089 node->then_keyword_loc = TOK2LOC(parser, then_keyword);
7090}
7091
7095static void
7096pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
7097 if (PM_NODE_END(statements) > PM_NODE_END(node)) {
7098 PM_NODE_LENGTH_SET_NODE(node, statements);
7099 }
7100
7101 node->statements = statements;
7102}
7103
7107static pm_while_node_t *
7108pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7109 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7110
7111 return pm_while_node_new(
7112 parser->arena,
7113 ++parser->node_id,
7114 flags,
7115 PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
7116 TOK2LOC(parser, keyword),
7117 NTOK2LOC(parser, do_keyword),
7118 TOK2LOC(parser, closing),
7119 predicate,
7120 statements
7121 );
7122}
7123
7127static pm_while_node_t *
7128pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7129 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7130 pm_loop_modifier_block_exits(parser, statements);
7131
7132 return pm_while_node_new(
7133 parser->arena,
7134 ++parser->node_id,
7135 flags,
7136 PM_LOCATION_INIT_NODES(statements, predicate),
7137 TOK2LOC(parser, keyword),
7138 ((pm_location_t) { 0 }),
7139 ((pm_location_t) { 0 }),
7140 predicate,
7141 statements
7142 );
7143}
7144
7148static pm_while_node_t *
7149pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
7150 return pm_while_node_new(
7151 parser->arena,
7152 ++parser->node_id,
7153 0,
7154 PM_LOCATION_INIT_UNSET,
7155 ((pm_location_t) { 0 }),
7156 ((pm_location_t) { 0 }),
7157 ((pm_location_t) { 0 }),
7158 predicate,
7159 statements
7160 );
7161}
7162
7167static pm_x_string_node_t *
7168pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
7169 return pm_x_string_node_new(
7170 parser->arena,
7171 ++parser->node_id,
7172 PM_STRING_FLAGS_FROZEN,
7173 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
7174 TOK2LOC(parser, opening),
7175 TOK2LOC(parser, content),
7176 TOK2LOC(parser, closing),
7177 *unescaped
7178 );
7179}
7180
7184static PRISM_INLINE pm_x_string_node_t *
7185pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7186 return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7187}
7188
7192static pm_yield_node_t *
7193pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
7194 uint32_t start = PM_TOKEN_START(parser, keyword);
7195 uint32_t end;
7196
7197 if (rparen_loc->length > 0) {
7198 end = PM_LOCATION_END(rparen_loc);
7199 } else if (arguments != NULL) {
7200 end = PM_NODE_END(arguments);
7201 } else if (lparen_loc->length > 0) {
7202 end = PM_LOCATION_END(lparen_loc);
7203 } else {
7204 end = PM_TOKEN_END(parser, keyword);
7205 }
7206
7207 return pm_yield_node_new(
7208 parser->arena,
7209 ++parser->node_id,
7210 0,
7211 ((pm_location_t) { .start = start, .length = U32(end - start) }),
7212 TOK2LOC(parser, keyword),
7213 *lparen_loc,
7214 arguments,
7215 *rparen_loc
7216 );
7217}
7218
7223static int
7224pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
7225 pm_scope_t *scope = parser->current_scope;
7226 int depth = 0;
7227
7228 while (scope != NULL) {
7229 if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
7230 if (scope->closed) break;
7231
7232 scope = scope->previous;
7233 depth++;
7234 }
7235
7236 return -1;
7237}
7238
7244static PRISM_INLINE int
7245pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
7246 return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
7247}
7248
7252static PRISM_INLINE void
7253pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7254 pm_locals_write(&parser->current_scope->locals, constant_id, U32(start - parser->start), U32(end - start), reads);
7255}
7256
7260static pm_constant_id_t
7261pm_parser_local_add_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7262 pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, start, end);
7263 if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
7264 return constant_id;
7265}
7266
7270static PRISM_INLINE pm_constant_id_t
7271pm_parser_local_add_location(pm_parser_t *parser, pm_location_t *location, uint32_t reads) {
7272 return pm_parser_local_add_raw(parser, parser->start + location->start, parser->start + location->start + location->length, reads);
7273}
7274
7278static PRISM_INLINE pm_constant_id_t
7279pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
7280 return pm_parser_local_add_raw(parser, token->start, token->end, reads);
7281}
7282
7286static pm_constant_id_t
7287pm_parser_local_add_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
7288 pm_constant_id_t constant_id = pm_parser_constant_id_owned(parser, start, length);
7289 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
7290 return constant_id;
7291}
7292
7296static pm_constant_id_t
7297pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
7298 pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
7299 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
7300 return constant_id;
7301}
7302
7310static bool
7311pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
7312 // We want to check whether the parameter name is a numbered parameter or
7313 // not.
7314 pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, name), PM_TOKEN_LENGTH(name));
7315
7316 // Otherwise we'll fetch the constant id for the parameter name and check
7317 // whether it's already in the current scope.
7318 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
7319
7320 if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
7321 // Add an error if the parameter doesn't start with _ and has been seen before
7322 if ((name->start < name->end) && (*name->start != '_')) {
7323 pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
7324 }
7325 return true;
7326 }
7327 return false;
7328}
7329
7333static void
7334pm_parser_scope_pop(pm_parser_t *parser) {
7335 pm_scope_t *scope = parser->current_scope;
7336 parser->current_scope = scope->previous;
7337 pm_locals_free(&scope->locals);
7338 xfree_sized(scope, sizeof(pm_scope_t));
7339}
7340
7341/******************************************************************************/
7342/* Stack helpers */
7343/******************************************************************************/
7344
7348static PRISM_INLINE void
7349pm_state_stack_push(pm_state_stack_t *stack, bool value) {
7350 *stack = (*stack << 1) | (value & 1);
7351}
7352
7356static PRISM_INLINE void
7357pm_state_stack_pop(pm_state_stack_t *stack) {
7358 *stack >>= 1;
7359}
7360
7364static PRISM_INLINE bool
7365pm_state_stack_p(const pm_state_stack_t *stack) {
7366 return *stack & 1;
7367}
7368
7369static PRISM_INLINE void
7370pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
7371 // Use the negation of the value to prevent stack overflow.
7372 pm_state_stack_push(&parser->accepts_block_stack, !value);
7373}
7374
7375static PRISM_INLINE void
7376pm_accepts_block_stack_pop(pm_parser_t *parser) {
7377 pm_state_stack_pop(&parser->accepts_block_stack);
7378}
7379
7380static PRISM_INLINE bool
7381pm_accepts_block_stack_p(pm_parser_t *parser) {
7382 return !pm_state_stack_p(&parser->accepts_block_stack);
7383}
7384
7385static PRISM_INLINE void
7386pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
7387 pm_state_stack_push(&parser->do_loop_stack, value);
7388}
7389
7390static PRISM_INLINE void
7391pm_do_loop_stack_pop(pm_parser_t *parser) {
7392 pm_state_stack_pop(&parser->do_loop_stack);
7393}
7394
7395static PRISM_INLINE bool
7396pm_do_loop_stack_p(pm_parser_t *parser) {
7397 return pm_state_stack_p(&parser->do_loop_stack);
7398}
7399
7400/******************************************************************************/
7401/* Lexer check helpers */
7402/******************************************************************************/
7403
7408static PRISM_INLINE uint8_t
7409peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
7410 if (cursor < parser->end) {
7411 return *cursor;
7412 } else {
7413 return '\0';
7414 }
7415}
7416
7422static PRISM_INLINE uint8_t
7423peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
7424 return peek_at(parser, parser->current.end + offset);
7425}
7426
7431static PRISM_INLINE uint8_t
7432peek(const pm_parser_t *parser) {
7433 return peek_at(parser, parser->current.end);
7434}
7435
7440static PRISM_INLINE bool
7441match(pm_parser_t *parser, uint8_t value) {
7442 if (peek(parser) == value) {
7443 parser->current.end++;
7444 return true;
7445 }
7446 return false;
7447}
7448
7453static PRISM_INLINE size_t
7454match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
7455 if (peek_at(parser, cursor) == '\n') {
7456 return 1;
7457 }
7458 if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
7459 return 2;
7460 }
7461 return 0;
7462}
7463
7469static PRISM_INLINE size_t
7470match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
7471 return match_eol_at(parser, parser->current.end + offset);
7472}
7473
7479static PRISM_INLINE size_t
7480match_eol(pm_parser_t *parser) {
7481 return match_eol_at(parser, parser->current.end);
7482}
7483
7487static PRISM_INLINE const uint8_t *
7488next_newline(const uint8_t *cursor, ptrdiff_t length) {
7489 assert(length >= 0);
7490
7491 // Note that it's okay for us to use memchr here to look for \n because none
7492 // of the encodings that we support have \n as a component of a multi-byte
7493 // character.
7494 return memchr(cursor, '\n', (size_t) length);
7495}
7496
7500static PRISM_INLINE bool
7501ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
7502 return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
7503}
7504
7509static bool
7510parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
7511 const pm_encoding_t *encoding = pm_encoding_find(start, end);
7512
7513 if (encoding != NULL) {
7514 if (parser->encoding != encoding) {
7515 parser->encoding = encoding;
7516 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
7517 }
7518
7519 parser->encoding_changed = (encoding != PM_ENCODING_UTF_8_ENTRY);
7520 return true;
7521 }
7522
7523 return false;
7524}
7525
7530static void
7531parser_lex_magic_comment_encoding(pm_parser_t *parser) {
7532 const uint8_t *cursor = parser->current.start + 1;
7533 const uint8_t *end = parser->current.end;
7534
7535 bool separator = false;
7536 while (true) {
7537 if (end - cursor <= 6) return;
7538 switch (cursor[6]) {
7539 case 'C': case 'c': cursor += 6; continue;
7540 case 'O': case 'o': cursor += 5; continue;
7541 case 'D': case 'd': cursor += 4; continue;
7542 case 'I': case 'i': cursor += 3; continue;
7543 case 'N': case 'n': cursor += 2; continue;
7544 case 'G': case 'g': cursor += 1; continue;
7545 case '=': case ':':
7546 separator = true;
7547 cursor += 6;
7548 break;
7549 default:
7550 cursor += 6;
7551 if (pm_char_is_whitespace(*cursor)) break;
7552 continue;
7553 }
7554 if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
7555 separator = false;
7556 }
7557
7558 while (true) {
7559 do {
7560 if (++cursor >= end) return;
7561 } while (pm_char_is_whitespace(*cursor));
7562
7563 if (separator) break;
7564 if (*cursor != '=' && *cursor != ':') return;
7565
7566 separator = true;
7567 cursor++;
7568 }
7569
7570 const uint8_t *value_start = cursor;
7571 while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
7572
7573 if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
7574 // If we were unable to parse the encoding value, then we've got an
7575 // issue because we didn't understand the encoding that the user was
7576 // trying to use. In this case we'll keep using the default encoding but
7577 // add an error to the parser to indicate an unsuccessful parse.
7578 pm_parser_err(parser, U32(value_start - parser->start), U32(cursor - value_start), PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
7579 }
7580}
7581
7582typedef enum {
7583 PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE,
7584 PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE,
7585 PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID
7586} pm_magic_comment_boolean_value_t;
7587
7592static pm_magic_comment_boolean_value_t
7593parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t value_length) {
7594 if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "true", 4) == 0) {
7595 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE;
7596 } else if (value_length == 5 && pm_strncasecmp(value_start, (const uint8_t *) "false", 5) == 0) {
7597 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE;
7598 } else {
7599 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID;
7600 }
7601}
7602
7603static PRISM_INLINE bool
7604pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
7605 return b == '\'' || b == '"' || b == ':' || b == ';';
7606}
7607
7613static PRISM_INLINE const uint8_t *
7614parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
7615 // Scan for '*' as the middle character, since it is rarer than '-' in
7616 // typical comments and avoids repeated memchr calls for '-' that hit
7617 // dashes in words like "foo-bar".
7618 while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor + 1, '*', (size_t) (end - cursor - 1), parser->encoding_changed, parser->encoding)) != NULL) {
7619 if (cursor[-1] == '-' && cursor + 1 < end && cursor[1] == '-') {
7620 return cursor - 1;
7621 }
7622 }
7623 return NULL;
7624}
7625
7636static PRISM_INLINE bool
7637parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
7638 bool result = true;
7639
7640 const uint8_t *start = parser->current.start + 1;
7641 const uint8_t *end = parser->current.end;
7642 if (end - start <= 7) return false;
7643
7644 const uint8_t *cursor;
7645 bool indicator = false;
7646
7647 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
7648 start = cursor + 3;
7649
7650 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
7651 end = cursor;
7652 indicator = true;
7653 } else {
7654 // If we have a start marker but not an end marker, then we cannot
7655 // have a magic comment.
7656 return false;
7657 }
7658 } else {
7659 // Non-emacs magic comments must contain a colon for `key: value`.
7660 // Reject early if there is no colon to avoid scanning the entire
7661 // comment character-by-character.
7662 if (pm_memchr(start, ':', (size_t) (end - start), parser->encoding_changed, parser->encoding) == NULL) {
7663 return false;
7664 }
7665
7666 // Advance start past leading whitespace so the main loop begins
7667 // directly at the key, avoiding a redundant whitespace scan.
7668 start += pm_strspn_whitespace(start, end - start);
7669 }
7670
7671 cursor = start;
7672 while (cursor < end) {
7673 if (indicator) {
7674 while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
7675 }
7676
7677 const uint8_t *key_start = cursor;
7678 while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
7679
7680 const uint8_t *key_end = cursor;
7681 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7682 if (cursor == end) break;
7683
7684 if (*cursor == ':') {
7685 cursor++;
7686 } else {
7687 if (!indicator) return false;
7688 continue;
7689 }
7690
7691 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7692 if (cursor == end) break;
7693
7694 const uint8_t *value_start;
7695 const uint8_t *value_end;
7696
7697 if (*cursor == '"') {
7698 value_start = ++cursor;
7699 for (; cursor < end && *cursor != '"'; cursor++) {
7700 if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
7701 }
7702 value_end = cursor;
7703 if (cursor < end && *cursor == '"') cursor++;
7704 } else {
7705 value_start = cursor;
7706 while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
7707 value_end = cursor;
7708 }
7709
7710 if (indicator) {
7711 while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
7712 } else {
7713 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7714 if (cursor != end) return false;
7715 }
7716
7717 // Here, we need to do some processing on the key to swap out dashes for
7718 // underscores. We only need to do this if there _is_ a dash in the key.
7719 pm_string_t key;
7720 const size_t key_length = (size_t) (key_end - key_start);
7721 const uint8_t *dash = pm_memchr(key_start, '-', key_length, parser->encoding_changed, parser->encoding);
7722
7723 if (dash == NULL) {
7724 pm_string_shared_init(&key, key_start, key_end);
7725 } else {
7726 uint8_t *buffer = xmalloc(key_length);
7727 if (buffer == NULL) break;
7728
7729 memcpy(buffer, key_start, key_length);
7730 buffer[dash - key_start] = '_';
7731
7732 while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
7733 buffer[dash - key_start] = '_';
7734 }
7735
7736 pm_string_owned_init(&key, buffer, key_length);
7737 }
7738
7739 // Finally, we can start checking the key against the list of known
7740 // magic comment keys, and potentially change state based on that.
7741 const uint8_t *key_source = pm_string_source(&key);
7742 uint32_t value_length = (uint32_t) (value_end - value_start);
7743
7744 // We only want to attempt to compare against encoding comments if it's
7745 // the first line in the file (or the second in the case of a shebang).
7746 if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
7747 if (
7748 (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
7749 (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
7750 ) {
7751 result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
7752 }
7753 }
7754
7755 if (key_length == 11) {
7756 if (pm_strncasecmp(key_source, (const uint8_t *) "warn_indent", 11) == 0) {
7757 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
7758 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
7759 PM_PARSER_WARN_TOKEN_FORMAT(
7760 parser,
7761 &parser->current,
7762 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7763 (int) key_length,
7764 (const char *) key_source,
7765 (int) value_length,
7766 (const char *) value_start
7767 );
7768 break;
7769 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
7770 parser->warn_mismatched_indentation = false;
7771 break;
7772 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
7773 parser->warn_mismatched_indentation = true;
7774 break;
7775 }
7776 }
7777 } else if (key_length == 21) {
7778 if (pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
7779 // We only want to handle frozen string literal comments if it's
7780 // before any semantic tokens have been seen.
7781 if (semantic_token_seen) {
7782 pm_parser_warn_token(parser, &parser->current, PM_WARN_IGNORED_FROZEN_STRING_LITERAL);
7783 } else {
7784 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
7785 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
7786 PM_PARSER_WARN_TOKEN_FORMAT(
7787 parser,
7788 &parser->current,
7789 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7790 (int) key_length,
7791 (const char *) key_source,
7792 (int) value_length,
7793 (const char *) value_start
7794 );
7795 break;
7796 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
7797 parser->frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED;
7798 break;
7799 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
7800 parser->frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED;
7801 break;
7802 }
7803 }
7804 }
7805 } else if (key_length == 24) {
7806 if (pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
7807 const uint8_t *cursor = parser->current.start;
7808 while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
7809
7810 if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
7811 pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
7812 } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
7813 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
7814 } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
7815 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
7816 } else if (value_length == 23 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_everything", 23) == 0) {
7817 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING);
7818 } else if (value_length == 17 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_copy", 17) == 0) {
7819 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY);
7820 } else {
7821 PM_PARSER_WARN_TOKEN_FORMAT(
7822 parser,
7823 &parser->current,
7824 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7825 (int) key_length,
7826 (const char *) key_source,
7827 (int) value_length,
7828 (const char *) value_start
7829 );
7830 }
7831 }
7832 }
7833
7834 // When we're done, we want to free the string in case we had to
7835 // allocate memory for it.
7836 pm_string_cleanup(&key);
7837
7838 // Allocate a new magic comment node to append to the parser's list.
7839 pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) pm_arena_alloc(&parser->metadata_arena, sizeof(pm_magic_comment_t), PRISM_ALIGNOF(pm_magic_comment_t));
7840 magic_comment->node.next = NULL;
7841 magic_comment->key = (pm_location_t) { .start = U32(key_start - parser->start), .length = U32(key_length) };
7842 magic_comment->value = (pm_location_t) { .start = U32(value_start - parser->start), .length = value_length };
7843 pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
7844 }
7845
7846 return result;
7847}
7848
7849/******************************************************************************/
7850/* Context manipulations */
7851/******************************************************************************/
7852
7853static const uint32_t context_terminators[] = {
7854 [PM_CONTEXT_NONE] = 0,
7855 [PM_CONTEXT_BEGIN] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7856 [PM_CONTEXT_BEGIN_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7857 [PM_CONTEXT_BEGIN_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7858 [PM_CONTEXT_BEGIN_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7859 [PM_CONTEXT_BLOCK_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
7860 [PM_CONTEXT_BLOCK_KEYWORDS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7861 [PM_CONTEXT_BLOCK_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7862 [PM_CONTEXT_BLOCK_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7863 [PM_CONTEXT_BLOCK_PARAMETERS] = (1U << PM_TOKEN_PIPE),
7864 [PM_CONTEXT_BLOCK_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7865 [PM_CONTEXT_CASE_WHEN] = (1U << PM_TOKEN_KEYWORD_WHEN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
7866 [PM_CONTEXT_CASE_IN] = (1U << PM_TOKEN_KEYWORD_IN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
7867 [PM_CONTEXT_CLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7868 [PM_CONTEXT_CLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7869 [PM_CONTEXT_CLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7870 [PM_CONTEXT_CLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7871 [PM_CONTEXT_DEF] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7872 [PM_CONTEXT_DEF_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7873 [PM_CONTEXT_DEF_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7874 [PM_CONTEXT_DEF_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7875 [PM_CONTEXT_DEF_PARAMS] = (1U << PM_TOKEN_EOF),
7876 [PM_CONTEXT_DEFINED] = (1U << PM_TOKEN_EOF),
7877 [PM_CONTEXT_DEFAULT_PARAMS] = (1U << PM_TOKEN_COMMA) | (1U << PM_TOKEN_PARENTHESIS_RIGHT),
7878 [PM_CONTEXT_ELSE] = (1U << PM_TOKEN_KEYWORD_END),
7879 [PM_CONTEXT_ELSIF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
7880 [PM_CONTEXT_EMBEXPR] = (1U << PM_TOKEN_EMBEXPR_END),
7881 [PM_CONTEXT_FOR] = (1U << PM_TOKEN_KEYWORD_END),
7882 [PM_CONTEXT_FOR_INDEX] = (1U << PM_TOKEN_KEYWORD_IN),
7883 [PM_CONTEXT_IF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
7884 [PM_CONTEXT_LAMBDA_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
7885 [PM_CONTEXT_LAMBDA_DO_END] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7886 [PM_CONTEXT_LAMBDA_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7887 [PM_CONTEXT_LAMBDA_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7888 [PM_CONTEXT_LAMBDA_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7889 [PM_CONTEXT_LOOP_PREDICATE] = (1U << PM_TOKEN_KEYWORD_DO) | (1U << PM_TOKEN_KEYWORD_THEN),
7890 [PM_CONTEXT_MAIN] = (1U << PM_TOKEN_EOF),
7891 [PM_CONTEXT_MODULE] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7892 [PM_CONTEXT_MODULE_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7893 [PM_CONTEXT_MODULE_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7894 [PM_CONTEXT_MODULE_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7895 [PM_CONTEXT_MULTI_TARGET] = (1U << PM_TOKEN_EOF),
7896 [PM_CONTEXT_PARENS] = (1U << PM_TOKEN_PARENTHESIS_RIGHT),
7897 [PM_CONTEXT_POSTEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
7898 [PM_CONTEXT_PREDICATE] = (1U << PM_TOKEN_KEYWORD_THEN) | (1U << PM_TOKEN_NEWLINE) | (1U << PM_TOKEN_SEMICOLON),
7899 [PM_CONTEXT_PREEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
7900 [PM_CONTEXT_RESCUE_MODIFIER] = (1U << PM_TOKEN_EOF),
7901 [PM_CONTEXT_SCLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7902 [PM_CONTEXT_SCLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7903 [PM_CONTEXT_SCLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7904 [PM_CONTEXT_SCLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7905 [PM_CONTEXT_TERNARY] = (1U << PM_TOKEN_EOF),
7906 [PM_CONTEXT_UNLESS] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7907 [PM_CONTEXT_UNTIL] = (1U << PM_TOKEN_KEYWORD_END),
7908 [PM_CONTEXT_WHILE] = (1U << PM_TOKEN_KEYWORD_END),
7909};
7910
7911static PRISM_INLINE bool
7912context_terminator(pm_context_t context, pm_token_t *token) {
7913 return token->type < 32 && (context_terminators[context] & (1U << token->type));
7914}
7915
7920static pm_context_t
7921context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
7922 pm_context_node_t *context_node = parser->current_context;
7923
7924 while (context_node != NULL) {
7925 if (context_terminator(context_node->context, token)) return context_node->context;
7926 context_node = context_node->prev;
7927 }
7928
7929 return PM_CONTEXT_NONE;
7930}
7931
7932static bool
7933context_push(pm_parser_t *parser, pm_context_t context) {
7934 pm_context_node_t *context_node = (pm_context_node_t *) xmalloc(sizeof(pm_context_node_t));
7935 if (context_node == NULL) return false;
7936
7937 *context_node = (pm_context_node_t) { .context = context, .prev = NULL };
7938
7939 if (parser->current_context == NULL) {
7940 parser->current_context = context_node;
7941 } else {
7942 context_node->prev = parser->current_context;
7943 parser->current_context = context_node;
7944 }
7945
7946 return true;
7947}
7948
7949static void
7950context_pop(pm_parser_t *parser) {
7951 pm_context_node_t *prev = parser->current_context->prev;
7952 xfree_sized(parser->current_context, sizeof(pm_context_node_t));
7953 parser->current_context = prev;
7954}
7955
7956static bool
7957context_p(const pm_parser_t *parser, pm_context_t context) {
7958 pm_context_node_t *context_node = parser->current_context;
7959
7960 while (context_node != NULL) {
7961 if (context_node->context == context) return true;
7962 context_node = context_node->prev;
7963 }
7964
7965 return false;
7966}
7967
7968static bool
7969context_def_p(const pm_parser_t *parser) {
7970 pm_context_node_t *context_node = parser->current_context;
7971
7972 while (context_node != NULL) {
7973 switch (context_node->context) {
7974 case PM_CONTEXT_DEF:
7975 case PM_CONTEXT_DEF_PARAMS:
7976 case PM_CONTEXT_DEF_ENSURE:
7977 case PM_CONTEXT_DEF_RESCUE:
7978 case PM_CONTEXT_DEF_ELSE:
7979 return true;
7980 case PM_CONTEXT_CLASS:
7981 case PM_CONTEXT_CLASS_ENSURE:
7982 case PM_CONTEXT_CLASS_RESCUE:
7983 case PM_CONTEXT_CLASS_ELSE:
7984 case PM_CONTEXT_MODULE:
7985 case PM_CONTEXT_MODULE_ENSURE:
7986 case PM_CONTEXT_MODULE_RESCUE:
7987 case PM_CONTEXT_MODULE_ELSE:
7988 case PM_CONTEXT_SCLASS:
7989 case PM_CONTEXT_SCLASS_ENSURE:
7990 case PM_CONTEXT_SCLASS_RESCUE:
7991 case PM_CONTEXT_SCLASS_ELSE:
7992 return false;
7993 default:
7994 context_node = context_node->prev;
7995 }
7996 }
7997
7998 return false;
7999}
8000
8005static const char *
8006context_human(pm_context_t context) {
8007 switch (context) {
8008 case PM_CONTEXT_NONE:
8009 assert(false && "unreachable");
8010 return "";
8011 case PM_CONTEXT_BEGIN: return "begin statement";
8012 case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
8013 case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
8014 case PM_CONTEXT_BLOCK_PARAMETERS: return "'|'..'|' block parameter";
8015 case PM_CONTEXT_CASE_WHEN: return "'when' clause";
8016 case PM_CONTEXT_CASE_IN: return "'in' clause";
8017 case PM_CONTEXT_CLASS: return "class definition";
8018 case PM_CONTEXT_DEF: return "method definition";
8019 case PM_CONTEXT_DEF_PARAMS: return "method parameters";
8020 case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
8021 case PM_CONTEXT_DEFINED: return "'defined?' expression";
8022 case PM_CONTEXT_ELSE:
8023 case PM_CONTEXT_BEGIN_ELSE:
8024 case PM_CONTEXT_BLOCK_ELSE:
8025 case PM_CONTEXT_CLASS_ELSE:
8026 case PM_CONTEXT_DEF_ELSE:
8027 case PM_CONTEXT_LAMBDA_ELSE:
8028 case PM_CONTEXT_MODULE_ELSE:
8029 case PM_CONTEXT_SCLASS_ELSE: return "'else' clause";
8030 case PM_CONTEXT_ELSIF: return "'elsif' clause";
8031 case PM_CONTEXT_EMBEXPR: return "embedded expression";
8032 case PM_CONTEXT_BEGIN_ENSURE:
8033 case PM_CONTEXT_BLOCK_ENSURE:
8034 case PM_CONTEXT_CLASS_ENSURE:
8035 case PM_CONTEXT_DEF_ENSURE:
8036 case PM_CONTEXT_LAMBDA_ENSURE:
8037 case PM_CONTEXT_MODULE_ENSURE:
8038 case PM_CONTEXT_SCLASS_ENSURE: return "'ensure' clause";
8039 case PM_CONTEXT_FOR: return "for loop";
8040 case PM_CONTEXT_FOR_INDEX: return "for loop index";
8041 case PM_CONTEXT_IF: return "if statement";
8042 case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
8043 case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
8044 case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
8045 case PM_CONTEXT_MAIN: return "top level context";
8046 case PM_CONTEXT_MODULE: return "module definition";
8047 case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
8048 case PM_CONTEXT_PARENS: return "parentheses";
8049 case PM_CONTEXT_POSTEXE: return "'END' block";
8050 case PM_CONTEXT_PREDICATE: return "predicate";
8051 case PM_CONTEXT_PREEXE: return "'BEGIN' block";
8052 case PM_CONTEXT_BEGIN_RESCUE:
8053 case PM_CONTEXT_BLOCK_RESCUE:
8054 case PM_CONTEXT_CLASS_RESCUE:
8055 case PM_CONTEXT_DEF_RESCUE:
8056 case PM_CONTEXT_LAMBDA_RESCUE:
8057 case PM_CONTEXT_MODULE_RESCUE:
8058 case PM_CONTEXT_RESCUE_MODIFIER:
8059 case PM_CONTEXT_SCLASS_RESCUE: return "'rescue' clause";
8060 case PM_CONTEXT_SCLASS: return "singleton class definition";
8061 case PM_CONTEXT_TERNARY: return "ternary expression";
8062 case PM_CONTEXT_UNLESS: return "unless statement";
8063 case PM_CONTEXT_UNTIL: return "until statement";
8064 case PM_CONTEXT_WHILE: return "while statement";
8065 }
8066
8067 assert(false && "unreachable");
8068 return "";
8069}
8070
8071/******************************************************************************/
8072/* Specific token lexers */
8073/******************************************************************************/
8074
8075static PRISM_INLINE void
8076pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
8077 if (invalid != NULL) {
8078 pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
8079 pm_parser_err(parser, U32(invalid - parser->start), 1, diag_id);
8080 }
8081}
8082
8083static size_t
8084pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
8085 const uint8_t *invalid = NULL;
8086 size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
8087 pm_strspn_number_validate(parser, string, length, invalid);
8088 return length;
8089}
8090
8091static size_t
8092pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8093 const uint8_t *invalid = NULL;
8094 size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
8095 pm_strspn_number_validate(parser, string, length, invalid);
8096 return length;
8097}
8098
8099static size_t
8100pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8101 const uint8_t *invalid = NULL;
8102 size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
8103 pm_strspn_number_validate(parser, string, length, invalid);
8104 return length;
8105}
8106
8107static size_t
8108pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8109 const uint8_t *invalid = NULL;
8110 size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
8111 pm_strspn_number_validate(parser, string, length, invalid);
8112 return length;
8113}
8114
8115static pm_token_type_t
8116lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
8117 pm_token_type_t type = PM_TOKEN_INTEGER;
8118
8119 // Here we're going to attempt to parse the optional decimal portion of a
8120 // float. If it's not there, then it's okay and we'll just continue on.
8121 if (peek(parser) == '.') {
8122 if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8123 parser->current.end += 2;
8124 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8125 type = PM_TOKEN_FLOAT;
8126 } else {
8127 // If we had a . and then something else, then it's not a float
8128 // suffix on a number it's a method call or something else.
8129 return type;
8130 }
8131 }
8132
8133 // Here we're going to attempt to parse the optional exponent portion of a
8134 // float. If it's not there, it's okay and we'll just continue on.
8135 if ((peek(parser) == 'e') || (peek(parser) == 'E')) {
8136 if ((peek_offset(parser, 1) == '+') || (peek_offset(parser, 1) == '-')) {
8137 parser->current.end += 2;
8138
8139 if (pm_char_is_decimal_digit(peek(parser))) {
8140 parser->current.end++;
8141 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8142 } else {
8143 pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
8144 }
8145 } else if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8146 parser->current.end++;
8147 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8148 } else {
8149 return type;
8150 }
8151
8152 *seen_e = true;
8153 type = PM_TOKEN_FLOAT;
8154 }
8155
8156 return type;
8157}
8158
8159static pm_token_type_t
8160lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8161 pm_token_type_t type = PM_TOKEN_INTEGER;
8162 *seen_e = false;
8163
8164 if (peek_offset(parser, -1) == '0') {
8165 switch (*parser->current.end) {
8166 // 0d1111 is a decimal number
8167 case 'd':
8168 case 'D':
8169 parser->current.end++;
8170 if (pm_char_is_decimal_digit(peek(parser))) {
8171 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8172 } else {
8173 match(parser, '_');
8174 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
8175 }
8176
8177 break;
8178
8179 // 0b1111 is a binary number
8180 case 'b':
8181 case 'B':
8182 parser->current.end++;
8183 if (pm_char_is_binary_digit(peek(parser))) {
8184 parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
8185 } else {
8186 match(parser, '_');
8187 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
8188 }
8189
8190 parser->integer.base = PM_INTEGER_BASE_FLAGS_BINARY;
8191 break;
8192
8193 // 0o1111 is an octal number
8194 case 'o':
8195 case 'O':
8196 parser->current.end++;
8197 if (pm_char_is_octal_digit(peek(parser))) {
8198 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8199 } else {
8200 match(parser, '_');
8201 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
8202 }
8203
8204 parser->integer.base = PM_INTEGER_BASE_FLAGS_OCTAL;
8205 break;
8206
8207 // 01111 is an octal number
8208 case '_':
8209 case '0':
8210 case '1':
8211 case '2':
8212 case '3':
8213 case '4':
8214 case '5':
8215 case '6':
8216 case '7':
8217 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8218 parser->integer.base = PM_INTEGER_BASE_FLAGS_OCTAL;
8219 break;
8220
8221 // 0x1111 is a hexadecimal number
8222 case 'x':
8223 case 'X':
8224 parser->current.end++;
8225 if (pm_char_is_hexadecimal_digit(peek(parser))) {
8226 parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
8227 } else {
8228 match(parser, '_');
8229 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
8230 }
8231
8232 parser->integer.base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
8233 break;
8234
8235 // 0.xxx is a float
8236 case '.': {
8237 type = lex_optional_float_suffix(parser, seen_e);
8238 break;
8239 }
8240
8241 // 0exxx is a float
8242 case 'e':
8243 case 'E': {
8244 type = lex_optional_float_suffix(parser, seen_e);
8245 break;
8246 }
8247 }
8248 } else {
8249 // If it didn't start with a 0, then we'll lex as far as we can into a
8250 // decimal number. We compute the integer value inline to avoid
8251 // re-scanning the digits later in pm_integer_parse.
8252 {
8253 const uint8_t *cursor = parser->current.end;
8254 const uint8_t *end = parser->end;
8255 uint64_t value = (uint64_t) (cursor[-1] - '0');
8256
8257 bool has_underscore = false;
8258 bool prev_underscore = false;
8259 const uint8_t *invalid = NULL;
8260
8261 while (cursor < end) {
8262 uint8_t c = *cursor;
8263 if (c >= '0' && c <= '9') {
8264 if (value <= UINT32_MAX) value = value * 10 + (uint64_t) (c - '0');
8265 prev_underscore = false;
8266 cursor++;
8267 } else if (c == '_') {
8268 has_underscore = true;
8269 if (prev_underscore && invalid == NULL) invalid = cursor;
8270 prev_underscore = true;
8271 cursor++;
8272 } else {
8273 break;
8274 }
8275 }
8276
8277 if (has_underscore) {
8278 if (prev_underscore && invalid == NULL) invalid = cursor - 1;
8279 pm_strspn_number_validate(parser, parser->current.end, (size_t) (cursor - parser->current.end), invalid);
8280 }
8281
8282 if (value <= UINT32_MAX) {
8283 parser->integer.value = (uint32_t) value;
8284 parser->integer.lexed = true;
8285 }
8286
8287 parser->current.end = cursor;
8288 }
8289
8290 // Afterward, we'll lex as far as we can into an optional float suffix.
8291 // Guard the function call: the vast majority of decimal numbers are
8292 // plain integers, so avoid the call when the next byte cannot start a
8293 // float suffix.
8294 {
8295 uint8_t next = peek(parser);
8296 if (next == '.' || next == 'e' || next == 'E') {
8297 type = lex_optional_float_suffix(parser, seen_e);
8298
8299 // If it turned out to be a float, the cached integer value is
8300 // invalid.
8301 if (type != PM_TOKEN_INTEGER) {
8302 parser->integer.lexed = false;
8303 }
8304 }
8305 }
8306 }
8307
8308 // At this point we have a completed number, but we want to provide the user
8309 // with a good experience if they put an additional .xxx fractional
8310 // component on the end, so we'll check for that here.
8311 if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8312 const uint8_t *fraction_start = parser->current.end;
8313 const uint8_t *fraction_end = parser->current.end + 2;
8314 fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
8315 pm_parser_err(parser, U32(fraction_start - parser->start), U32(fraction_end - fraction_start), PM_ERR_INVALID_NUMBER_FRACTION);
8316 }
8317
8318 return type;
8319}
8320
8321static pm_token_type_t
8322lex_numeric(pm_parser_t *parser) {
8323 pm_token_type_t type = PM_TOKEN_INTEGER;
8324 parser->integer.base = PM_INTEGER_BASE_FLAGS_DECIMAL;
8325 parser->integer.lexed = false;
8326
8327 if (parser->current.end < parser->end) {
8328 bool seen_e = false;
8329 type = lex_numeric_prefix(parser, &seen_e);
8330
8331 const uint8_t *end = parser->current.end;
8332 pm_token_type_t suffix_type = type;
8333
8334 if (type == PM_TOKEN_INTEGER) {
8335 if (match(parser, 'r')) {
8336 suffix_type = PM_TOKEN_INTEGER_RATIONAL;
8337
8338 if (match(parser, 'i')) {
8339 suffix_type = PM_TOKEN_INTEGER_RATIONAL_IMAGINARY;
8340 }
8341 } else if (match(parser, 'i')) {
8342 suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
8343 }
8344 } else {
8345 if (!seen_e && match(parser, 'r')) {
8346 suffix_type = PM_TOKEN_FLOAT_RATIONAL;
8347
8348 if (match(parser, 'i')) {
8349 suffix_type = PM_TOKEN_FLOAT_RATIONAL_IMAGINARY;
8350 }
8351 } else if (match(parser, 'i')) {
8352 suffix_type = PM_TOKEN_FLOAT_IMAGINARY;
8353 }
8354 }
8355
8356 const uint8_t b = peek(parser);
8357 if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
8358 parser->current.end = end;
8359 } else {
8360 type = suffix_type;
8361 }
8362 }
8363
8364 return type;
8365}
8366
8367static pm_token_type_t
8368lex_global_variable(pm_parser_t *parser) {
8369 if (parser->current.end >= parser->end) {
8370 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
8371 return PM_TOKEN_GLOBAL_VARIABLE;
8372 }
8373
8374 // True if multiple characters are allowed after the declaration of the
8375 // global variable. Not true when it starts with "$-".
8376 bool allow_multiple = true;
8377
8378 switch (*parser->current.end) {
8379 case '~': // $~: match-data
8380 case '*': // $*: argv
8381 case '$': // $$: pid
8382 case '?': // $?: last status
8383 case '!': // $!: error string
8384 case '@': // $@: error position
8385 case '/': // $/: input record separator
8386 case '\\': // $\: output record separator
8387 case ';': // $;: field separator
8388 case ',': // $,: output field separator
8389 case '.': // $.: last read line number
8390 case '=': // $=: ignorecase
8391 case ':': // $:: load path
8392 case '<': // $<: reading filename
8393 case '>': // $>: default output handle
8394 case '\"': // $": already loaded files
8395 parser->current.end++;
8396 return PM_TOKEN_GLOBAL_VARIABLE;
8397
8398 case '&': // $&: last match
8399 case '`': // $`: string before last match
8400 case '\'': // $': string after last match
8401 case '+': // $+: string matches last paren.
8402 parser->current.end++;
8403 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_BACK_REFERENCE;
8404
8405 case '0': {
8406 parser->current.end++;
8407 size_t width;
8408
8409 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
8410 do {
8411 parser->current.end += width;
8412 } while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
8413
8414 // $0 isn't allowed to be followed by anything.
8415 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8416 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->current, diag_id);
8417 }
8418
8419 return PM_TOKEN_GLOBAL_VARIABLE;
8420 }
8421
8422 case '1':
8423 case '2':
8424 case '3':
8425 case '4':
8426 case '5':
8427 case '6':
8428 case '7':
8429 case '8':
8430 case '9':
8431 parser->current.end += pm_strspn_decimal_digit(parser->current.end, parser->end - parser->current.end);
8432 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_NUMBERED_REFERENCE;
8433
8434 case '-':
8435 parser->current.end++;
8436 allow_multiple = false;
8438 default: {
8439 size_t width;
8440
8441 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
8442 do {
8443 parser->current.end += width;
8444 } while (allow_multiple && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
8445 } else if (pm_char_is_whitespace(peek(parser))) {
8446 // If we get here, then we have a $ followed by whitespace,
8447 // which is not allowed.
8448 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
8449 } else {
8450 // If we get here, then we have a $ followed by something that
8451 // isn't recognized as a global variable.
8452 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8453 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8454 PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), diag_id, (int) (PM_TOKEN_LENGTH(&parser->current) + U32(width)), (const char *) parser->current.start);
8455 }
8456
8457 return PM_TOKEN_GLOBAL_VARIABLE;
8458 }
8459 }
8460}
8461
8474static PRISM_INLINE pm_token_type_t
8475lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
8476 if (memcmp(current_start, value, vlen) == 0) {
8477 pm_lex_state_t last_state = parser->lex_state;
8478
8479 if (parser->lex_state & PM_LEX_STATE_FNAME) {
8480 lex_state_set(parser, PM_LEX_STATE_ENDFN);
8481 } else {
8482 lex_state_set(parser, state);
8483 if (state == PM_LEX_STATE_BEG) {
8484 parser->command_start = true;
8485 }
8486
8487 if ((modifier_type != PM_TOKEN_EOF) && !(last_state & (PM_LEX_STATE_BEG | PM_LEX_STATE_LABELED | PM_LEX_STATE_CLASS))) {
8488 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
8489 return modifier_type;
8490 }
8491 }
8492
8493 return type;
8494 }
8495
8496 return PM_TOKEN_EOF;
8497}
8498
8499static pm_token_type_t
8500lex_identifier(pm_parser_t *parser, bool previous_command_start) {
8501 // Lex as far as we can into the current identifier.
8502 size_t width;
8503 const uint8_t *end = parser->end;
8504 const uint8_t *current_start = parser->current.start;
8505 const uint8_t *current_end = parser->current.end;
8506 bool encoding_changed = parser->encoding_changed;
8507
8508 if (encoding_changed) {
8509 while ((width = char_is_identifier(parser, current_end, end - current_end)) > 0) {
8510 current_end += width;
8511 }
8512 } else {
8513 // Fast path: scan ASCII identifier bytes using wide operations.
8514 current_end += scan_identifier_ascii(current_end, end);
8515
8516 // Byte-at-a-time fallback for the tail and any UTF-8 sequences.
8517 while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
8518 current_end += width;
8519 }
8520 }
8521 parser->current.end = current_end;
8522
8523 // Now cache the length of the identifier so that we can quickly compare it
8524 // against known keywords.
8525 width = (size_t) (current_end - current_start);
8526
8527 if (current_end < end) {
8528 if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
8529 // First we'll attempt to extend the identifier by a ! or ?. Then we'll
8530 // check if we're returning the defined? keyword or just an identifier.
8531 width++;
8532
8533 if (
8534 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
8535 (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
8536 ) {
8537 // If we're in a position where we can accept a : at the end of an
8538 // identifier, then we'll optionally accept it.
8539 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
8540 (void) match(parser, ':');
8541 return PM_TOKEN_LABEL;
8542 }
8543
8544 if (parser->lex_state != PM_LEX_STATE_DOT) {
8545 if (width == 8 && (lex_keyword(parser, current_start, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
8546 return PM_TOKEN_KEYWORD_DEFINED;
8547 }
8548 }
8549
8550 return PM_TOKEN_METHOD_NAME;
8551 }
8552
8553 if (lex_state_p(parser, PM_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
8554 // If we're in a position where we can accept a = at the end of an
8555 // identifier, then we'll optionally accept it.
8556 return PM_TOKEN_IDENTIFIER;
8557 }
8558
8559 if (
8560 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
8561 peek(parser) == ':' && peek_offset(parser, 1) != ':'
8562 ) {
8563 // If we're in a position where we can accept a : at the end of an
8564 // identifier, then we'll optionally accept it.
8565 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
8566 (void) match(parser, ':');
8567 return PM_TOKEN_LABEL;
8568 }
8569 }
8570
8571 if (parser->lex_state != PM_LEX_STATE_DOT) {
8572 pm_token_type_t type;
8573 switch (width) {
8574 case 2:
8575 if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
8576 if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
8577 return PM_TOKEN_KEYWORD_DO;
8578 }
8579 if (pm_do_loop_stack_p(parser)) {
8580 return PM_TOKEN_KEYWORD_DO_LOOP;
8581 }
8582 if (!pm_accepts_block_stack_p(parser)) {
8583 return PM_TOKEN_KEYWORD_DO_BLOCK;
8584 }
8585 return PM_TOKEN_KEYWORD_DO;
8586 }
8587
8588 if ((type = lex_keyword(parser, current_start, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
8589 if ((type = lex_keyword(parser, current_start, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8590 if ((type = lex_keyword(parser, current_start, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8591 break;
8592 case 3:
8593 if ((type = lex_keyword(parser, current_start, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8594 if ((type = lex_keyword(parser, current_start, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8595 if ((type = lex_keyword(parser, current_start, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8596 if ((type = lex_keyword(parser, current_start, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8597 if ((type = lex_keyword(parser, current_start, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8598 if ((type = lex_keyword(parser, current_start, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8599 if ((type = lex_keyword(parser, current_start, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8600 break;
8601 case 4:
8602 if ((type = lex_keyword(parser, current_start, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8603 if ((type = lex_keyword(parser, current_start, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8604 if ((type = lex_keyword(parser, current_start, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8605 if ((type = lex_keyword(parser, current_start, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8606 if ((type = lex_keyword(parser, current_start, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8607 if ((type = lex_keyword(parser, current_start, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8608 if ((type = lex_keyword(parser, current_start, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8609 if ((type = lex_keyword(parser, current_start, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8610 break;
8611 case 5:
8612 if ((type = lex_keyword(parser, current_start, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8613 if ((type = lex_keyword(parser, current_start, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8614 if ((type = lex_keyword(parser, current_start, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8615 if ((type = lex_keyword(parser, current_start, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8616 if ((type = lex_keyword(parser, current_start, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8617 if ((type = lex_keyword(parser, current_start, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8618 if ((type = lex_keyword(parser, current_start, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8619 if ((type = lex_keyword(parser, current_start, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8620 if ((type = lex_keyword(parser, current_start, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8621 if ((type = lex_keyword(parser, current_start, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8622 if ((type = lex_keyword(parser, current_start, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
8623 if ((type = lex_keyword(parser, current_start, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
8624 if ((type = lex_keyword(parser, current_start, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8625 break;
8626 case 6:
8627 if ((type = lex_keyword(parser, current_start, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8628 if ((type = lex_keyword(parser, current_start, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8629 if ((type = lex_keyword(parser, current_start, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
8630 if ((type = lex_keyword(parser, current_start, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8631 if ((type = lex_keyword(parser, current_start, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
8632 break;
8633 case 8:
8634 if ((type = lex_keyword(parser, current_start, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8635 if ((type = lex_keyword(parser, current_start, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8636 break;
8637 case 12:
8638 if ((type = lex_keyword(parser, current_start, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8639 break;
8640 }
8641 }
8642
8643 if (encoding_changed) {
8644 return parser->encoding->isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
8645 }
8646 return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
8647}
8648
8653static bool
8654current_token_starts_line(pm_parser_t *parser) {
8655 return (parser->current.start == parser->start) || (parser->current.start[-1] == '\n');
8656}
8657
8672static pm_token_type_t
8673lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
8674 // If there is no content following this #, then we're at the end of
8675 // the string and we can safely return string content.
8676 if (pound + 1 >= parser->end) {
8677 parser->current.end = pound + 1;
8678 return PM_TOKEN_STRING_CONTENT;
8679 }
8680
8681 // Now we'll check against the character that follows the #. If it
8682 // constitutes valid interplation, we'll handle that, otherwise we'll return
8683 // 0.
8684 switch (pound[1]) {
8685 case '@': {
8686 // In this case we may have hit an embedded instance or class variable.
8687 if (pound + 2 >= parser->end) {
8688 parser->current.end = pound + 1;
8689 return PM_TOKEN_STRING_CONTENT;
8690 }
8691
8692 // If we're looking at a @ and there's another @, then we'll skip past the
8693 // second @.
8694 const uint8_t *variable = pound + 2;
8695 if (*variable == '@' && pound + 3 < parser->end) variable++;
8696
8697 if (char_is_identifier_start(parser, variable, parser->end - variable)) {
8698 // At this point we're sure that we've either hit an embedded instance
8699 // or class variable. In this case we'll first need to check if we've
8700 // already consumed content.
8701 if (pound > parser->current.start) {
8702 parser->current.end = pound;
8703 return PM_TOKEN_STRING_CONTENT;
8704 }
8705
8706 // Otherwise we need to return the embedded variable token
8707 // and then switch to the embedded variable lex mode.
8708 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
8709 parser->current.end = pound + 1;
8710 return PM_TOKEN_EMBVAR;
8711 }
8712
8713 // If we didn't get a valid interpolation, then this is just regular
8714 // string content. This is like if we get "#@-". In this case the caller
8715 // should keep lexing.
8716 parser->current.end = pound + 1;
8717 return 0;
8718 }
8719 case '$':
8720 // In this case we may have hit an embedded global variable. If there's
8721 // not enough room, then we'll just return string content.
8722 if (pound + 2 >= parser->end) {
8723 parser->current.end = pound + 1;
8724 return PM_TOKEN_STRING_CONTENT;
8725 }
8726
8727 // This is the character that we're going to check to see if it is the
8728 // start of an identifier that would indicate that this is a global
8729 // variable.
8730 const uint8_t *check = pound + 2;
8731
8732 if (pound[2] == '-') {
8733 if (pound + 3 >= parser->end) {
8734 parser->current.end = pound + 2;
8735 return PM_TOKEN_STRING_CONTENT;
8736 }
8737
8738 check++;
8739 }
8740
8741 // If the character that we're going to check is the start of an
8742 // identifier, or we don't have a - and the character is a decimal number
8743 // or a global name punctuation character, then we've hit an embedded
8744 // global variable.
8745 if (
8746 char_is_identifier_start(parser, check, parser->end - check) ||
8747 (pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
8748 ) {
8749 // In this case we've hit an embedded global variable. First check to
8750 // see if we've already consumed content. If we have, then we need to
8751 // return that content as string content first.
8752 if (pound > parser->current.start) {
8753 parser->current.end = pound;
8754 return PM_TOKEN_STRING_CONTENT;
8755 }
8756
8757 // Otherwise, we need to return the embedded variable token and switch
8758 // to the embedded variable lex mode.
8759 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
8760 parser->current.end = pound + 1;
8761 return PM_TOKEN_EMBVAR;
8762 }
8763
8764 // In this case we've hit a #$ that does not indicate a global variable.
8765 // In this case we'll continue lexing past it.
8766 parser->current.end = pound + 1;
8767 return 0;
8768 case '{':
8769 // In this case it's the start of an embedded expression. If we have
8770 // already consumed content, then we need to return that content as string
8771 // content first.
8772 if (pound > parser->current.start) {
8773 parser->current.end = pound;
8774 return PM_TOKEN_STRING_CONTENT;
8775 }
8776
8777 parser->enclosure_nesting++;
8778
8779 // Otherwise we'll skip past the #{ and begin lexing the embedded
8780 // expression.
8781 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBEXPR });
8782 parser->current.end = pound + 2;
8783 parser->command_start = true;
8784 pm_do_loop_stack_push(parser, false);
8785 return PM_TOKEN_EMBEXPR_BEGIN;
8786 default:
8787 // In this case we've hit a # that doesn't constitute interpolation. We'll
8788 // mark that by returning the not provided token type. This tells the
8789 // consumer to keep lexing forward.
8790 parser->current.end = pound + 1;
8791 return 0;
8792 }
8793}
8794
8795static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
8796static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
8797static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
8798static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
8799static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
8800
8804static const bool ascii_printable_chars[] = {
8805 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
8806 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8807 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8808 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8809 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8810 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
8811 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8812 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
8813};
8814
8815static PRISM_INLINE bool
8816char_is_ascii_printable(const uint8_t b) {
8817 return (b < 0x80) && ascii_printable_chars[b];
8818}
8819
8824static PRISM_INLINE uint8_t
8825escape_hexadecimal_digit(const uint8_t value) {
8826 return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
8827}
8828
8834static PRISM_INLINE uint32_t
8835escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length, const pm_location_t *error_location, const uint8_t flags) {
8836 uint32_t value = 0;
8837 for (size_t index = 0; index < length; index++) {
8838 if (index != 0) value <<= 4;
8839 value |= escape_hexadecimal_digit(string[index]);
8840 }
8841
8842 // Here we're going to verify that the value is actually a valid Unicode
8843 // codepoint and not a surrogate pair.
8844 if (value >= 0xD800 && value <= 0xDFFF) {
8845 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8846 // In regexp context, defer the error to regexp encoding
8847 // validation where we can produce a regexp-specific message.
8848 } else if (error_location != NULL) {
8849 pm_parser_err(parser, error_location->start, error_location->length, PM_ERR_ESCAPE_INVALID_UNICODE);
8850 } else {
8851 pm_parser_err(parser, U32(string - parser->start), U32(length), PM_ERR_ESCAPE_INVALID_UNICODE);
8852 }
8853 return 0xFFFD;
8854 }
8855
8856 return value;
8857}
8858
8862static PRISM_INLINE uint8_t
8863escape_byte(uint8_t value, const uint8_t flags) {
8864 if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
8865 if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
8866 return value;
8867}
8868
8872static PRISM_INLINE void
8873escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
8874 // \u escape sequences in string-like structures implicitly change the
8875 // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
8876 // literal.
8877 if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
8878 if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
8879 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8880 // In regexp context, suppress this error — the regexp encoding
8881 // validation will produce a more specific error message.
8882 } else {
8883 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(end - start), PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
8884 }
8885 }
8886
8887 parser->explicit_encoding = PM_ENCODING_UTF_8_ENTRY;
8888 }
8889
8890 if (!pm_buffer_append_unicode_codepoint(buffer, value)) {
8891 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8892 // In regexp context, defer the error to the regexp encoding
8893 // validation which produces a regexp-specific message.
8894 } else {
8895 pm_parser_err(parser, U32(start - parser->start), U32(end - start), PM_ERR_ESCAPE_INVALID_UNICODE);
8896 }
8897
8898 pm_buffer_append_byte(buffer, 0xEF);
8899 pm_buffer_append_byte(buffer, 0xBF);
8900 pm_buffer_append_byte(buffer, 0xBD);
8901 }
8902}
8903
8908static PRISM_INLINE void
8909escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, uint8_t byte) {
8910 if (byte >= 0x80) {
8911 if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
8912 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8913 // In regexp context, suppress this error — the regexp encoding
8914 // validation will produce a more specific error message.
8915 } else {
8916 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
8917 }
8918 }
8919
8920 parser->explicit_encoding = parser->encoding;
8921 }
8922
8923 pm_buffer_append_byte(buffer, byte);
8924}
8925
8941static PRISM_INLINE void
8942escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
8943 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8944 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
8945 }
8946
8947 escape_write_byte_encoded(parser, buffer, flags, byte);
8948}
8949
8953static PRISM_INLINE void
8954escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
8955 size_t width;
8956 if (parser->encoding_changed) {
8957 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8958 } else {
8959 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
8960 }
8961
8962 if (width == 1) {
8963 if (*parser->current.end == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
8964 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
8965 } else if (width > 1) {
8966 // Valid multibyte character. Just ignore escape.
8967 pm_buffer_t *b = (flags & PM_ESCAPE_FLAG_REGEXP) ? regular_expression_buffer : buffer;
8968 pm_buffer_append_bytes(b, parser->current.end, width);
8969 parser->current.end += width;
8970 } else {
8971 // Assume the next character wasn't meant to be part of this escape
8972 // sequence since it is invalid. Add an error and move on.
8973 parser->current.end++;
8974 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
8975 }
8976}
8977
8983static void
8984escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *type) {
8985#define FLAG(value) ((value & PM_ESCAPE_FLAG_CONTROL) ? "\\C-" : (value & PM_ESCAPE_FLAG_META) ? "\\M-" : "")
8986
8987 PM_PARSER_WARN_TOKEN_FORMAT(
8988 parser,
8989 &parser->current,
8990 PM_WARN_INVALID_CHARACTER,
8991 FLAG(flags),
8992 FLAG(flag),
8993 type
8994 );
8995
8996#undef FLAG
8997}
8998
9002static void
9003escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
9004 uint8_t peeked = peek(parser);
9005 switch (peeked) {
9006 case '\\': {
9007 parser->current.end++;
9008 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
9009 return;
9010 }
9011 case '\'': {
9012 parser->current.end++;
9013 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
9014 return;
9015 }
9016 case 'a': {
9017 parser->current.end++;
9018 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
9019 return;
9020 }
9021 case 'b': {
9022 parser->current.end++;
9023 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
9024 return;
9025 }
9026 case 'e': {
9027 parser->current.end++;
9028 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
9029 return;
9030 }
9031 case 'f': {
9032 parser->current.end++;
9033 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
9034 return;
9035 }
9036 case 'n': {
9037 parser->current.end++;
9038 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
9039 return;
9040 }
9041 case 'r': {
9042 parser->current.end++;
9043 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
9044 return;
9045 }
9046 case 's': {
9047 parser->current.end++;
9048 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
9049 return;
9050 }
9051 case 't': {
9052 parser->current.end++;
9053 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
9054 return;
9055 }
9056 case 'v': {
9057 parser->current.end++;
9058 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
9059 return;
9060 }
9061 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
9062 uint8_t value = (uint8_t) (*parser->current.end - '0');
9063 parser->current.end++;
9064
9065 if (pm_char_is_octal_digit(peek(parser))) {
9066 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9067 parser->current.end++;
9068
9069 if (pm_char_is_octal_digit(peek(parser))) {
9070 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9071 parser->current.end++;
9072 }
9073 }
9074
9075 value = escape_byte(value, flags);
9076 escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
9077 return;
9078 }
9079 case 'x': {
9080 const uint8_t *start = parser->current.end - 1;
9081
9082 parser->current.end++;
9083 uint8_t byte = peek(parser);
9084
9085 if (pm_char_is_hexadecimal_digit(byte)) {
9086 uint8_t value = escape_hexadecimal_digit(byte);
9087 parser->current.end++;
9088
9089 byte = peek(parser);
9090 if (pm_char_is_hexadecimal_digit(byte)) {
9091 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
9092 parser->current.end++;
9093 }
9094
9095 value = escape_byte(value, flags);
9096 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9097 if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
9098 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
9099 } else {
9100 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9101 }
9102 }
9103
9104 escape_write_byte_encoded(parser, buffer, flags, value);
9105 } else {
9106 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
9107 }
9108
9109 return;
9110 }
9111 case 'u': {
9112 const uint8_t *start = parser->current.end - 1;
9113 parser->current.end++;
9114
9115 if (parser->current.end == parser->end) {
9116 const uint8_t *start = parser->current.end - 2;
9117 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9118 } else if (peek(parser) == '{') {
9119 const uint8_t *unicode_codepoints_start = parser->current.end - 2;
9120 parser->current.end++;
9121
9122 size_t whitespace;
9123 while (true) {
9124 if ((whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
9125 parser->current.end += whitespace;
9126 } else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
9127 // This is super hacky, but it gets us nicer error
9128 // messages because we can still pass it off to the
9129 // regular expression engine even if we hit an
9130 // unterminated regular expression.
9131 parser->current.end += 2;
9132 } else {
9133 break;
9134 }
9135 }
9136
9137 const uint8_t *extra_codepoints_start = NULL;
9138 int codepoints_count = 0;
9139
9140 while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
9141 const uint8_t *unicode_start = parser->current.end;
9142 size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
9143
9144 if (hexadecimal_length > 6) {
9145 // \u{nnnn} character literal allows only 1-6 hexadecimal digits
9146 pm_parser_err(parser, U32(unicode_start - parser->start), U32(hexadecimal_length), PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
9147 } else if (hexadecimal_length == 0) {
9148 // there are not hexadecimal characters
9149
9150 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9151 // If this is a regular expression, we are going to
9152 // let the regular expression engine handle this
9153 // error instead of us because we don't know at this
9154 // point if we're inside a comment in /x mode.
9155 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9156 } else {
9157 pm_parser_err(parser, PM_TOKEN_END(parser, &parser->current), 0, PM_ERR_ESCAPE_INVALID_UNICODE);
9158 pm_parser_err(parser, PM_TOKEN_END(parser, &parser->current), 0, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9159 }
9160
9161 return;
9162 }
9163
9164 parser->current.end += hexadecimal_length;
9165 codepoints_count++;
9166 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
9167 extra_codepoints_start = unicode_start;
9168 }
9169
9170 uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length, NULL, flags);
9171 escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
9172
9173 parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
9174 }
9175
9176 // ?\u{nnnn} character literal should contain only one codepoint
9177 // and cannot be like ?\u{nnnn mmmm}.
9178 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
9179 pm_parser_err(parser, U32(extra_codepoints_start - parser->start), U32(parser->current.end - 1 - extra_codepoints_start), PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
9180 }
9181
9182 if (parser->current.end == parser->end) {
9183 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
9184 } else if (peek(parser) == '}') {
9185 parser->current.end++;
9186 } else {
9187 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9188 // If this is a regular expression, we are going to let
9189 // the regular expression engine handle this error
9190 // instead of us because we don't know at this point if
9191 // we're inside a comment in /x mode.
9192 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9193 } else {
9194 pm_parser_err(parser, U32(unicode_codepoints_start - parser->start), U32(parser->current.end - unicode_codepoints_start), PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9195 }
9196 }
9197
9198 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9199 pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
9200 }
9201 } else {
9202 size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
9203
9204 if (length == 0) {
9205 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9206 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9207 } else {
9208 const uint8_t *start = parser->current.end - 2;
9209 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9210 }
9211 } else if (length == 4) {
9212 uint32_t value = escape_unicode(parser, parser->current.end, 4, NULL, flags);
9213
9214 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9215 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
9216 }
9217
9218 escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
9219 parser->current.end += 4;
9220 } else {
9221 parser->current.end += length;
9222
9223 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9224 // If this is a regular expression, we are going to let
9225 // the regular expression engine handle this error
9226 // instead of us.
9227 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9228 } else {
9229 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
9230 }
9231 }
9232 }
9233
9234 return;
9235 }
9236 case 'c': {
9237 parser->current.end++;
9238 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9239 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9240 }
9241
9242 if (parser->current.end == parser->end) {
9243 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9244 return;
9245 }
9246
9247 uint8_t peeked = peek(parser);
9248 switch (peeked) {
9249 case '?': {
9250 parser->current.end++;
9251 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9252 return;
9253 }
9254 case '\\':
9255 parser->current.end++;
9256
9257 if (match(parser, 'u') || match(parser, 'U')) {
9258 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
9259 return;
9260 }
9261
9262 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9263 return;
9264 case ' ':
9265 parser->current.end++;
9266 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9267 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9268 return;
9269 case '\t':
9270 parser->current.end++;
9271 escape_read_warn(parser, flags, 0, "\\t");
9272 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9273 return;
9274 default: {
9275 if (!char_is_ascii_printable(peeked)) {
9276 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9277 return;
9278 }
9279
9280 if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
9281 parser->current.end++;
9282 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9283 return;
9284 }
9285 }
9286 }
9287 case 'C': {
9288 parser->current.end++;
9289 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9290 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9291 }
9292
9293 if (peek(parser) != '-') {
9294 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9295 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_CONTROL);
9296 return;
9297 }
9298
9299 parser->current.end++;
9300 if (parser->current.end == parser->end) {
9301 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9302 return;
9303 }
9304
9305 uint8_t peeked = peek(parser);
9306 switch (peeked) {
9307 case '?': {
9308 parser->current.end++;
9309 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9310 return;
9311 }
9312 case '\\':
9313 parser->current.end++;
9314
9315 if (match(parser, 'u') || match(parser, 'U')) {
9316 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
9317 return;
9318 }
9319
9320 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9321 return;
9322 case ' ':
9323 parser->current.end++;
9324 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9325 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9326 return;
9327 case '\t':
9328 parser->current.end++;
9329 escape_read_warn(parser, flags, 0, "\\t");
9330 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9331 return;
9332 default: {
9333 if (!char_is_ascii_printable(peeked)) {
9334 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9335 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_CONTROL);
9336 return;
9337 }
9338
9339 if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
9340 parser->current.end++;
9341 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9342 return;
9343 }
9344 }
9345 }
9346 case 'M': {
9347 parser->current.end++;
9348 if (flags & PM_ESCAPE_FLAG_META) {
9349 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
9350 }
9351
9352 if (peek(parser) != '-') {
9353 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9354 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
9355 return;
9356 }
9357
9358 parser->current.end++;
9359 if (parser->current.end == parser->end) {
9360 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
9361 return;
9362 }
9363
9364 uint8_t peeked = peek(parser);
9365 switch (peeked) {
9366 case '\\':
9367 parser->current.end++;
9368
9369 if (match(parser, 'u') || match(parser, 'U')) {
9370 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
9371 return;
9372 }
9373
9374 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
9375 return;
9376 case ' ':
9377 parser->current.end++;
9378 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_META, "\\s");
9379 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9380 return;
9381 case '\t':
9382 parser->current.end++;
9383 escape_read_warn(parser, flags & ((uint8_t) ~PM_ESCAPE_FLAG_CONTROL), PM_ESCAPE_FLAG_META, "\\t");
9384 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9385 return;
9386 default:
9387 if (!char_is_ascii_printable(peeked)) {
9388 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9389 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
9390 return;
9391 }
9392
9393 if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
9394 parser->current.end++;
9395 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9396 return;
9397 }
9398 }
9399 case '\r': {
9400 if (peek_offset(parser, 1) == '\n') {
9401 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 2);
9402 parser->current.end += 2;
9403 escape_write_byte_encoded(parser, buffer, flags, escape_byte('\n', flags));
9404 return;
9405 }
9407 }
9408 default: {
9409 if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
9410 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9411 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
9412 return;
9413 }
9414 if (parser->current.end < parser->end) {
9415 escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags);
9416 } else {
9417 pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
9418 }
9419 return;
9420 }
9421 }
9422}
9423
9449static pm_token_type_t
9450lex_question_mark(pm_parser_t *parser) {
9451 if (lex_state_end_p(parser)) {
9452 lex_state_set(parser, PM_LEX_STATE_BEG);
9453 return PM_TOKEN_QUESTION_MARK;
9454 }
9455
9456 if (parser->current.end >= parser->end) {
9457 pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
9458 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
9459 return PM_TOKEN_CHARACTER_LITERAL;
9460 }
9461
9462 if (pm_char_is_whitespace(*parser->current.end)) {
9463 lex_state_set(parser, PM_LEX_STATE_BEG);
9464 return PM_TOKEN_QUESTION_MARK;
9465 }
9466
9467 lex_state_set(parser, PM_LEX_STATE_BEG);
9468
9469 if (match(parser, '\\')) {
9470 lex_state_set(parser, PM_LEX_STATE_END);
9471
9472 pm_buffer_t buffer;
9473 pm_buffer_init(&buffer, 3);
9474
9475 escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
9476
9477 // Copy buffer data into the arena and free the heap buffer.
9478 void *arena_data = pm_arena_memdup(parser->arena, buffer.value, buffer.length, PRISM_ALIGNOF(uint8_t));
9479 pm_string_constant_init(&parser->current_string, (const char *) arena_data, buffer.length);
9480 pm_buffer_cleanup(&buffer);
9481
9482 return PM_TOKEN_CHARACTER_LITERAL;
9483 } else {
9484 size_t encoding_width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9485
9486 // Ternary operators can have a ? immediately followed by an identifier
9487 // which starts with an underscore. We check for this case here.
9488 if (
9489 !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
9490 (
9491 (parser->current.end + encoding_width >= parser->end) ||
9492 !char_is_identifier(parser, parser->current.end + encoding_width, parser->end - (parser->current.end + encoding_width))
9493 )
9494 ) {
9495 lex_state_set(parser, PM_LEX_STATE_END);
9496 parser->current.end += encoding_width;
9497 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
9498 return PM_TOKEN_CHARACTER_LITERAL;
9499 }
9500 }
9501
9502 return PM_TOKEN_QUESTION_MARK;
9503}
9504
9509static pm_token_type_t
9510lex_at_variable(pm_parser_t *parser) {
9511 pm_token_type_t type = match(parser, '@') ? PM_TOKEN_CLASS_VARIABLE : PM_TOKEN_INSTANCE_VARIABLE;
9512 const uint8_t *end = parser->end;
9513
9514 size_t width;
9515 if ((width = char_is_identifier_start(parser, parser->current.end, end - parser->current.end)) > 0) {
9516 parser->current.end += width;
9517
9518 while ((width = char_is_identifier(parser, parser->current.end, end - parser->current.end)) > 0) {
9519 parser->current.end += width;
9520 }
9521 } else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) {
9522 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
9523 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) {
9524 diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
9525 }
9526
9527 size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end);
9528 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
9529 } else {
9530 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
9531 pm_parser_err_token(parser, &parser->current, diag_id);
9532 }
9533
9534 // If we're lexing an embedded variable, then we need to pop back into the
9535 // parent lex context.
9536 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
9537 lex_mode_pop(parser);
9538 }
9539
9540 return type;
9541}
9542
9546static PRISM_INLINE void
9547parser_lex_callback(pm_parser_t *parser) {
9548 if (parser->lex_callback.callback) {
9549 parser->lex_callback.callback(parser, &parser->current, parser->lex_callback.data);
9550 }
9551}
9552
9557parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
9558 pm_comment_t *comment = (pm_comment_t *) pm_arena_alloc(&parser->metadata_arena, sizeof(pm_comment_t), PRISM_ALIGNOF(pm_comment_t));
9559
9560 *comment = (pm_comment_t) {
9561 .type = type,
9562 .location = TOK2LOC(parser, &parser->current)
9563 };
9564
9565 return comment;
9566}
9567
9573static pm_token_type_t
9574lex_embdoc(pm_parser_t *parser) {
9575 // First, lex out the EMBDOC_BEGIN token.
9576 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9577
9578 if (newline == NULL) {
9579 parser->current.end = parser->end;
9580 } else {
9581 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
9582 parser->current.end = newline + 1;
9583 }
9584
9585 parser->current.type = PM_TOKEN_EMBDOC_BEGIN;
9586 parser_lex_callback(parser);
9587
9588 // Now, create a comment that is going to be attached to the parser.
9589 const uint8_t *comment_start = parser->current.start;
9590 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
9591
9592 // Now, loop until we find the end of the embedded documentation or the end
9593 // of the file.
9594 while (parser->current.end + 4 <= parser->end) {
9595 parser->current.start = parser->current.end;
9596
9597 // If we've hit the end of the embedded documentation then we'll return
9598 // that token here.
9599 if (
9600 (memcmp(parser->current.end, "=end", 4) == 0) &&
9601 (
9602 (parser->current.end + 4 == parser->end) || // end of file
9603 pm_char_is_whitespace(parser->current.end[4]) || // whitespace
9604 (parser->current.end[4] == '\0') || // NUL or end of script
9605 (parser->current.end[4] == '\004') || // ^D
9606 (parser->current.end[4] == '\032') // ^Z
9607 )
9608 ) {
9609 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9610
9611 if (newline == NULL) {
9612 parser->current.end = parser->end;
9613 } else {
9614 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
9615 parser->current.end = newline + 1;
9616 }
9617
9618 parser->current.type = PM_TOKEN_EMBDOC_END;
9619 parser_lex_callback(parser);
9620
9621 comment->location.length = (uint32_t) (parser->current.end - comment_start);
9622 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9623
9624 return PM_TOKEN_EMBDOC_END;
9625 }
9626
9627 // Otherwise, we'll parse until the end of the line and return a line of
9628 // embedded documentation.
9629 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9630
9631 if (newline == NULL) {
9632 parser->current.end = parser->end;
9633 } else {
9634 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
9635 parser->current.end = newline + 1;
9636 }
9637
9638 parser->current.type = PM_TOKEN_EMBDOC_LINE;
9639 parser_lex_callback(parser);
9640 }
9641
9642 pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
9643
9644 comment->location.length = (uint32_t) (parser->current.end - comment_start);
9645 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9646
9647 return PM_TOKEN_EOF;
9648}
9649
9655static PRISM_INLINE void
9656parser_lex_ignored_newline(pm_parser_t *parser) {
9657 parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
9658 parser_lex_callback(parser);
9659}
9660
9670static PRISM_INLINE void
9671parser_flush_heredoc_end(pm_parser_t *parser) {
9672 assert(parser->heredoc_end <= parser->end);
9673 parser->next_start = parser->heredoc_end;
9674 parser->heredoc_end = NULL;
9675}
9676
9680static bool
9681parser_end_of_line_p(const pm_parser_t *parser) {
9682 const uint8_t *cursor = parser->current.end;
9683
9684 while (cursor < parser->end && *cursor != '\n' && *cursor != '#') {
9685 if (!pm_char_is_inline_whitespace(*cursor++)) return false;
9686 }
9687
9688 return true;
9689}
9690
9709typedef struct {
9715
9720 const uint8_t *cursor;
9722
9742
9746static PRISM_INLINE void
9747pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
9748 pm_buffer_append_byte(&token_buffer->buffer, byte);
9749}
9750
9751static PRISM_INLINE void
9752pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
9753 pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
9754}
9755
9759static PRISM_INLINE size_t
9760parser_char_width(const pm_parser_t *parser) {
9761 size_t width;
9762 if (parser->encoding_changed) {
9763 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9764 } else {
9765 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
9766 }
9767
9768 // TODO: If the character is invalid in the given encoding, then we'll just
9769 // push one byte into the buffer. This should actually be an error.
9770 return (width == 0 ? 1 : width);
9771}
9772
9776static void
9777pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
9778 size_t width = parser_char_width(parser);
9779 pm_buffer_append_bytes(&token_buffer->buffer, parser->current.end, width);
9780 parser->current.end += width;
9781}
9782
9783static void
9784pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
9785 size_t width = parser_char_width(parser);
9786 const uint8_t *start = parser->current.end;
9787 pm_buffer_append_bytes(&token_buffer->base.buffer, start, width);
9788 pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, width);
9789 parser->current.end += width;
9790}
9791
9798static PRISM_INLINE void
9799pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9800 // Copy buffer data into the arena and free the heap buffer.
9801 size_t len = pm_buffer_length(&token_buffer->buffer);
9802 void *arena_data = pm_arena_memdup(parser->arena, pm_buffer_value(&token_buffer->buffer), len, PRISM_ALIGNOF(uint8_t));
9803 pm_string_constant_init(&parser->current_string, (const char *) arena_data, len);
9804 pm_buffer_cleanup(&token_buffer->buffer);
9805}
9806
9807static PRISM_INLINE void
9808pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9809 pm_token_buffer_copy(parser, &token_buffer->base);
9810 pm_buffer_cleanup(&token_buffer->regexp_buffer);
9811}
9812
9822static void
9823pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9824 if (token_buffer->cursor == NULL) {
9825 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
9826 } else {
9827 pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
9828 pm_token_buffer_copy(parser, token_buffer);
9829 }
9830}
9831
9832static void
9833pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9834 if (token_buffer->base.cursor == NULL) {
9835 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
9836 } else {
9837 const uint8_t *cursor = token_buffer->base.cursor;
9838 size_t length = (size_t) (parser->current.end - cursor);
9839 pm_buffer_append_bytes(&token_buffer->base.buffer, cursor, length);
9840 pm_buffer_append_bytes(&token_buffer->regexp_buffer, cursor, length);
9841 pm_regexp_token_buffer_copy(parser, token_buffer);
9842 }
9843}
9844
9845#define PM_TOKEN_BUFFER_DEFAULT_SIZE 16
9846
9855static void
9856pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9857 const uint8_t *start;
9858 if (token_buffer->cursor == NULL) {
9859 pm_buffer_init(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9860 start = parser->current.start;
9861 } else {
9862 start = token_buffer->cursor;
9863 }
9864
9865 const uint8_t *end = parser->current.end - 1;
9866 assert(end >= start);
9867 pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
9868
9869 token_buffer->cursor = end;
9870}
9871
9872static void
9873pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9874 const uint8_t *start;
9875 if (token_buffer->base.cursor == NULL) {
9876 pm_buffer_init(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9877 pm_buffer_init(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9878 start = parser->current.start;
9879 } else {
9880 start = token_buffer->base.cursor;
9881 }
9882
9883 const uint8_t *end = parser->current.end - 1;
9884 pm_buffer_append_bytes(&token_buffer->base.buffer, start, (size_t) (end - start));
9885 pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, (size_t) (end - start));
9886
9887 token_buffer->base.cursor = end;
9888}
9889
9890#undef PM_TOKEN_BUFFER_DEFAULT_SIZE
9891
9896static PRISM_INLINE size_t
9897pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
9898 size_t whitespace = 0;
9899
9900 switch (indent) {
9901 case PM_HEREDOC_INDENT_NONE:
9902 // Do nothing, we can't match a terminator with
9903 // indentation and there's no need to calculate common
9904 // whitespace.
9905 break;
9906 case PM_HEREDOC_INDENT_DASH:
9907 // Skip past inline whitespace.
9908 *cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
9909 break;
9910 case PM_HEREDOC_INDENT_TILDE:
9911 // Skip past inline whitespace and calculate common
9912 // whitespace.
9913 while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
9914 if (**cursor == '\t') {
9915 whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
9916 } else {
9917 whitespace++;
9918 }
9919 (*cursor)++;
9920 }
9921
9922 break;
9923 }
9924
9925 return whitespace;
9926}
9927
9932static uint8_t
9933pm_lex_percent_delimiter(pm_parser_t *parser) {
9934 size_t eol_length = match_eol(parser);
9935
9936 if (eol_length) {
9937 if (parser->heredoc_end) {
9938 // If we have already lexed a heredoc, then the newline has already
9939 // been added to the list. In this case we want to just flush the
9940 // heredoc end.
9941 parser_flush_heredoc_end(parser);
9942 } else {
9943 // Otherwise, we'll add the newline to the list of newlines.
9944 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + U32(eol_length));
9945 }
9946
9947 uint8_t delimiter = *parser->current.end;
9948
9949 // If our delimiter is \r\n, we want to treat it as if it's \n.
9950 // For example, %\r\nfoo\r\n should be "foo"
9951 if (eol_length == 2) {
9952 delimiter = *(parser->current.end + 1);
9953 }
9954
9955 parser->current.end += eol_length;
9956 return delimiter;
9957 }
9958
9959 return *parser->current.end++;
9960}
9961
9966#define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
9967
9974static void
9975parser_lex(pm_parser_t *parser) {
9976 assert(parser->current.end <= parser->end);
9977 parser->previous = parser->current;
9978
9979 // This value mirrors cmd_state from CRuby.
9980 bool previous_command_start = parser->command_start;
9981 parser->command_start = false;
9982
9983 // This is used to communicate to the newline lexing function that we've
9984 // already seen a comment.
9985 bool lexed_comment = false;
9986
9987 // Here we cache the current value of the semantic token seen flag. This is
9988 // used to reset it in case we find a token that shouldn't flip this flag.
9989 unsigned int semantic_token_seen = parser->semantic_token_seen;
9990 parser->semantic_token_seen = true;
9991
9992 // We'll jump to this label when we are about to encounter an EOF.
9993 // If we still have lex_modes on the stack, we pop them so that cleanup
9994 // can happen. For example, we should still continue parsing after a heredoc
9995 // identifier, even if the heredoc body was syntax invalid.
9996 switch_lex_modes:
9997
9998 switch (parser->lex_modes.current->mode) {
9999 case PM_LEX_DEFAULT:
10000 case PM_LEX_EMBEXPR:
10001 case PM_LEX_EMBVAR:
10002
10003 // We have a specific named label here because we are going to jump back to
10004 // this location in the event that we have lexed a token that should not be
10005 // returned to the parser. This includes comments, ignored newlines, and
10006 // invalid tokens of some form.
10007 lex_next_token: {
10008 // If we have the special next_start pointer set, then we're going to jump
10009 // to that location and start lexing from there.
10010 if (parser->next_start != NULL) {
10011 parser->current.end = parser->next_start;
10012 parser->next_start = NULL;
10013 }
10014
10015 // This value mirrors space_seen from CRuby. It tracks whether or not
10016 // space has been eaten before the start of the next token.
10017 bool space_seen = false;
10018
10019 // First, we're going to skip past any whitespace at the front of the next
10020 // token. Skip runs of inline whitespace in bulk to avoid per-character
10021 // stores back to parser->current.end.
10022 bool chomping = true;
10023 while (parser->current.end < parser->end && chomping) {
10024 {
10025 static const uint8_t inline_whitespace[256] = {
10026 [' '] = 1, ['\t'] = 1, ['\f'] = 1, ['\v'] = 1
10027 };
10028 const uint8_t *scan = parser->current.end;
10029 while (scan < parser->end && inline_whitespace[*scan]) scan++;
10030 if (scan > parser->current.end) {
10031 parser->current.end = scan;
10032 space_seen = true;
10033 continue;
10034 }
10035 }
10036
10037 switch (*parser->current.end) {
10038 case '\r':
10039 if (match_eol_offset(parser, 1)) {
10040 chomping = false;
10041 } else {
10042 pm_parser_warn(parser, PM_TOKEN_END(parser, &parser->current), 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
10043 parser->current.end++;
10044 space_seen = true;
10045 }
10046 break;
10047 case '\\': {
10048 size_t eol_length = match_eol_offset(parser, 1);
10049 if (eol_length) {
10050 if (parser->heredoc_end) {
10051 parser->current.end = parser->heredoc_end;
10052 parser->heredoc_end = NULL;
10053 } else {
10054 parser->current.end += eol_length + 1;
10055 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
10056 space_seen = true;
10057 }
10058 } else if (pm_char_is_inline_whitespace(*parser->current.end)) {
10059 parser->current.end += 2;
10060 } else {
10061 chomping = false;
10062 }
10063
10064 break;
10065 }
10066 default:
10067 chomping = false;
10068 break;
10069 }
10070 }
10071
10072 // Next, we'll set to start of this token to be the current end.
10073 parser->current.start = parser->current.end;
10074
10075 // We'll check if we're at the end of the file. If we are, then we
10076 // need to return the EOF token.
10077 if (parser->current.end >= parser->end) {
10078 // We may be missing closing tokens. We should pop modes one by one
10079 // to do the appropriate cleanup like moving next_start for heredocs.
10080 // Only when no mode is remaining will we actually emit the EOF token.
10081 if (parser->lex_modes.current->mode != PM_LEX_DEFAULT) {
10082 lex_mode_pop(parser);
10083 goto switch_lex_modes;
10084 }
10085
10086 // If we hit EOF, but the EOF came immediately after a newline,
10087 // set the start of the token to the newline. This way any EOF
10088 // errors will be reported as happening on that line rather than
10089 // a line after. For example "foo(\n" should report an error
10090 // on line 1 even though EOF technically occurs on line 2.
10091 if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
10092 parser->current.start -= 1;
10093 }
10094 LEX(PM_TOKEN_EOF);
10095 }
10096
10097 // Finally, we'll check the current character to determine the next
10098 // token.
10099 switch (*parser->current.end++) {
10100 case '\0': // NUL or end of script
10101 case '\004': // ^D
10102 case '\032': // ^Z
10103 parser->current.end--;
10104 LEX(PM_TOKEN_EOF);
10105
10106 case '#': { // comments
10107 const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
10108 parser->current.end = ending == NULL ? parser->end : ending;
10109
10110 // If we found a comment while lexing, then we're going to
10111 // add it to the list of comments in the file and keep
10112 // lexing.
10113 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_INLINE);
10114 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10115
10116 if (ending) parser->current.end++;
10117 parser->current.type = PM_TOKEN_COMMENT;
10118 parser_lex_callback(parser);
10119
10120 // Here, parse the comment to see if it's a magic comment
10121 // and potentially change state on the parser.
10122 if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
10123 ptrdiff_t length = parser->current.end - parser->current.start;
10124
10125 // If we didn't find a magic comment within the first
10126 // pass and we're at the start of the file, then we need
10127 // to do another pass to potentially find other patterns
10128 // for encoding comments.
10129 if (length >= 10 && !parser->encoding_locked) {
10130 parser_lex_magic_comment_encoding(parser);
10131 }
10132 }
10133
10134 lexed_comment = true;
10135 }
10137 case '\r':
10138 case '\n': {
10139 parser->semantic_token_seen = semantic_token_seen & 0x1;
10140 size_t eol_length = match_eol_at(parser, parser->current.end - 1);
10141
10142 if (eol_length) {
10143 // The only way you can have carriage returns in this
10144 // particular loop is if you have a carriage return
10145 // followed by a newline. In that case we'll just skip
10146 // over the carriage return and continue lexing, in
10147 // order to make it so that the newline token
10148 // encapsulates both the carriage return and the
10149 // newline. Note that we need to check that we haven't
10150 // already lexed a comment here because that falls
10151 // through into here as well.
10152 if (!lexed_comment) {
10153 parser->current.end += eol_length - 1; // skip CR
10154 }
10155
10156 if (parser->heredoc_end == NULL) {
10157 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
10158 }
10159 }
10160
10161 if (parser->heredoc_end) {
10162 parser_flush_heredoc_end(parser);
10163 }
10164
10165 // If this is an ignored newline, then we can continue lexing after
10166 // calling the callback with the ignored newline token.
10167 switch (lex_state_ignored_p(parser)) {
10168 case PM_IGNORED_NEWLINE_NONE:
10169 break;
10170 case PM_IGNORED_NEWLINE_PATTERN:
10171 if (parser->pattern_matching_newlines || parser->in_keyword_arg) {
10172 if (!lexed_comment) parser_lex_ignored_newline(parser);
10173 lex_state_set(parser, PM_LEX_STATE_BEG);
10174 parser->command_start = true;
10175 parser->current.type = PM_TOKEN_NEWLINE;
10176 return;
10177 }
10179 case PM_IGNORED_NEWLINE_ALL:
10180 if (!lexed_comment) parser_lex_ignored_newline(parser);
10181 lexed_comment = false;
10182 goto lex_next_token;
10183 }
10184
10185 // Here we need to look ahead and see if there is a call operator
10186 // (either . or &.) that starts the next line. If there is, then this
10187 // is going to become an ignored newline and we're going to instead
10188 // return the call operator.
10189 const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
10190 next_content += pm_strspn_inline_whitespace(next_content, parser->end - next_content);
10191
10192 if (next_content < parser->end) {
10193 // If we hit a comment after a newline, then we're going to check
10194 // if it's ignored or if it's followed by a method call ('.').
10195 // If it is, then we're going to call the
10196 // callback with an ignored newline and then continue lexing.
10197 // Otherwise we'll return a regular newline.
10198 if (next_content[0] == '#') {
10199 // Here we look for a "." or "&." following a "\n".
10200 const uint8_t *following = next_newline(next_content, parser->end - next_content);
10201
10202 while (following && (following + 1 < parser->end)) {
10203 following++;
10204 following += pm_strspn_inline_whitespace(following, parser->end - following);
10205
10206 // If this is not followed by a comment, then we can break out
10207 // of this loop.
10208 if (peek_at(parser, following) != '#') break;
10209
10210 // If there is a comment, then we need to find the end of the
10211 // comment and continue searching from there.
10212 following = next_newline(following, parser->end - following);
10213 }
10214
10215 // If the lex state was ignored, we will lex the
10216 // ignored newline.
10217 if (lex_state_ignored_p(parser)) {
10218 if (!lexed_comment) parser_lex_ignored_newline(parser);
10219 lexed_comment = false;
10220 goto lex_next_token;
10221 }
10222
10223 // If we hit a '.' or a '&.' we will lex the ignored
10224 // newline.
10225 if (following && (
10226 (peek_at(parser, following) == '.') ||
10227 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
10228 )) {
10229 if (!lexed_comment) parser_lex_ignored_newline(parser);
10230 lexed_comment = false;
10231 goto lex_next_token;
10232 }
10233
10234
10235 // If we are parsing as CRuby 4.0 or later and we
10236 // hit a '&&' or a '||' then we will lex the ignored
10237 // newline.
10238 if (
10239 (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) &&
10240 following && (
10241 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '&') ||
10242 (peek_at(parser, following) == '|' && peek_at(parser, following + 1) == '|') ||
10243 (
10244 peek_at(parser, following) == 'a' &&
10245 peek_at(parser, following + 1) == 'n' &&
10246 peek_at(parser, following + 2) == 'd' &&
10247 peek_at(parser, next_content + 3) != '!' &&
10248 peek_at(parser, next_content + 3) != '?' &&
10249 !char_is_identifier(parser, following + 3, parser->end - (following + 3))
10250 ) ||
10251 (
10252 peek_at(parser, following) == 'o' &&
10253 peek_at(parser, following + 1) == 'r' &&
10254 peek_at(parser, next_content + 2) != '!' &&
10255 peek_at(parser, next_content + 2) != '?' &&
10256 !char_is_identifier(parser, following + 2, parser->end - (following + 2))
10257 )
10258 )
10259 ) {
10260 if (!lexed_comment) parser_lex_ignored_newline(parser);
10261 lexed_comment = false;
10262 goto lex_next_token;
10263 }
10264 }
10265
10266 // If we hit a . after a newline, then we're in a call chain and
10267 // we need to return the call operator.
10268 if (next_content[0] == '.') {
10269 // To match ripper, we need to emit an ignored newline even though
10270 // it's a real newline in the case that we have a beginless range
10271 // on a subsequent line.
10272 if (peek_at(parser, next_content + 1) == '.') {
10273 if (!lexed_comment) parser_lex_ignored_newline(parser);
10274 lex_state_set(parser, PM_LEX_STATE_BEG);
10275 parser->command_start = true;
10276 parser->current.type = PM_TOKEN_NEWLINE;
10277 return;
10278 }
10279
10280 if (!lexed_comment) parser_lex_ignored_newline(parser);
10281 lex_state_set(parser, PM_LEX_STATE_DOT);
10282 parser->current.start = next_content;
10283 parser->current.end = next_content + 1;
10284 parser->next_start = NULL;
10285 LEX(PM_TOKEN_DOT);
10286 }
10287
10288 // If we hit a &. after a newline, then we're in a call chain and
10289 // we need to return the call operator.
10290 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
10291 if (!lexed_comment) parser_lex_ignored_newline(parser);
10292 lex_state_set(parser, PM_LEX_STATE_DOT);
10293 parser->current.start = next_content;
10294 parser->current.end = next_content + 2;
10295 parser->next_start = NULL;
10296 LEX(PM_TOKEN_AMPERSAND_DOT);
10297 }
10298
10299 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
10300 // If we hit an && then we are in a logical chain
10301 // and we need to return the logical operator.
10302 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '&') {
10303 if (!lexed_comment) parser_lex_ignored_newline(parser);
10304 lex_state_set(parser, PM_LEX_STATE_BEG);
10305 parser->current.start = next_content;
10306 parser->current.end = next_content + 2;
10307 parser->next_start = NULL;
10308 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
10309 }
10310
10311 // If we hit a || then we are in a logical chain and
10312 // we need to return the logical operator.
10313 if (peek_at(parser, next_content) == '|' && peek_at(parser, next_content + 1) == '|') {
10314 if (!lexed_comment) parser_lex_ignored_newline(parser);
10315 lex_state_set(parser, PM_LEX_STATE_BEG);
10316 parser->current.start = next_content;
10317 parser->current.end = next_content + 2;
10318 parser->next_start = NULL;
10319 LEX(PM_TOKEN_PIPE_PIPE);
10320 }
10321
10322 // If we hit an 'and' then we are in a logical chain
10323 // and we need to return the logical operator.
10324 if (
10325 peek_at(parser, next_content) == 'a' &&
10326 peek_at(parser, next_content + 1) == 'n' &&
10327 peek_at(parser, next_content + 2) == 'd' &&
10328 peek_at(parser, next_content + 3) != '!' &&
10329 peek_at(parser, next_content + 3) != '?' &&
10330 !char_is_identifier(parser, next_content + 3, parser->end - (next_content + 3))
10331 ) {
10332 if (!lexed_comment) parser_lex_ignored_newline(parser);
10333 lex_state_set(parser, PM_LEX_STATE_BEG);
10334 parser->current.start = next_content;
10335 parser->current.end = next_content + 3;
10336 parser->next_start = NULL;
10337 parser->command_start = true;
10338 LEX(PM_TOKEN_KEYWORD_AND);
10339 }
10340
10341 // If we hit a 'or' then we are in a logical chain
10342 // and we need to return the logical operator.
10343 if (
10344 peek_at(parser, next_content) == 'o' &&
10345 peek_at(parser, next_content + 1) == 'r' &&
10346 peek_at(parser, next_content + 2) != '!' &&
10347 peek_at(parser, next_content + 2) != '?' &&
10348 !char_is_identifier(parser, next_content + 2, parser->end - (next_content + 2))
10349 ) {
10350 if (!lexed_comment) parser_lex_ignored_newline(parser);
10351 lex_state_set(parser, PM_LEX_STATE_BEG);
10352 parser->current.start = next_content;
10353 parser->current.end = next_content + 2;
10354 parser->next_start = NULL;
10355 parser->command_start = true;
10356 LEX(PM_TOKEN_KEYWORD_OR);
10357 }
10358 }
10359 }
10360
10361 // At this point we know this is a regular newline, and we can set the
10362 // necessary state and return the token.
10363 lex_state_set(parser, PM_LEX_STATE_BEG);
10364 parser->command_start = true;
10365 parser->current.type = PM_TOKEN_NEWLINE;
10366 if (!lexed_comment) parser_lex_callback(parser);
10367 return;
10368 }
10369
10370 // ,
10371 case ',':
10372 if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
10373 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_str(parser->current.type));
10374 }
10375
10376 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10377 LEX(PM_TOKEN_COMMA);
10378
10379 // (
10380 case '(': {
10381 pm_token_type_t type = PM_TOKEN_PARENTHESIS_LEFT;
10382
10383 if (space_seen && (lex_state_arg_p(parser) || parser->lex_state == (PM_LEX_STATE_END | PM_LEX_STATE_LABEL))) {
10384 type = PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES;
10385 }
10386
10387 parser->enclosure_nesting++;
10388 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10389 pm_do_loop_stack_push(parser, false);
10390 LEX(type);
10391 }
10392
10393 // )
10394 case ')':
10395 parser->enclosure_nesting--;
10396 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10397 pm_do_loop_stack_pop(parser);
10398 LEX(PM_TOKEN_PARENTHESIS_RIGHT);
10399
10400 // ;
10401 case ';':
10402 lex_state_set(parser, PM_LEX_STATE_BEG);
10403 parser->command_start = true;
10404 LEX(PM_TOKEN_SEMICOLON);
10405
10406 // [ [] []=
10407 case '[':
10408 parser->enclosure_nesting++;
10409 pm_token_type_t type = PM_TOKEN_BRACKET_LEFT;
10410
10411 if (lex_state_operator_p(parser)) {
10412 if (match(parser, ']')) {
10413 parser->enclosure_nesting--;
10414 lex_state_set(parser, PM_LEX_STATE_ARG);
10415 LEX(match(parser, '=') ? PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL : PM_TOKEN_BRACKET_LEFT_RIGHT);
10416 }
10417
10418 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABEL);
10419 LEX(type);
10420 }
10421
10422 if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, PM_LEX_STATE_LABELED)))) {
10423 type = PM_TOKEN_BRACKET_LEFT_ARRAY;
10424 }
10425
10426 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10427 pm_do_loop_stack_push(parser, false);
10428 LEX(type);
10429
10430 // ]
10431 case ']':
10432 parser->enclosure_nesting--;
10433 lex_state_set(parser, PM_LEX_STATE_END);
10434 pm_do_loop_stack_pop(parser);
10435 LEX(PM_TOKEN_BRACKET_RIGHT);
10436
10437 // {
10438 case '{': {
10439 pm_token_type_t type = PM_TOKEN_BRACE_LEFT;
10440
10441 if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
10442 // This { begins a lambda
10443 parser->command_start = true;
10444 lex_state_set(parser, PM_LEX_STATE_BEG);
10445 type = PM_TOKEN_LAMBDA_BEGIN;
10446 } else if (lex_state_p(parser, PM_LEX_STATE_LABELED)) {
10447 // This { begins a hash literal
10448 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10449 } else if (lex_state_p(parser, PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_END | PM_LEX_STATE_ENDFN)) {
10450 // This { begins a block
10451 parser->command_start = true;
10452 lex_state_set(parser, PM_LEX_STATE_BEG);
10453 } else if (lex_state_p(parser, PM_LEX_STATE_ENDARG)) {
10454 // This { begins a block on a command
10455 parser->command_start = true;
10456 lex_state_set(parser, PM_LEX_STATE_BEG);
10457 } else {
10458 // This { begins a hash literal
10459 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10460 }
10461
10462 parser->enclosure_nesting++;
10463 parser->brace_nesting++;
10464 pm_do_loop_stack_push(parser, false);
10465
10466 LEX(type);
10467 }
10468
10469 // }
10470 case '}':
10471 parser->enclosure_nesting--;
10472 pm_do_loop_stack_pop(parser);
10473
10474 if ((parser->lex_modes.current->mode == PM_LEX_EMBEXPR) && (parser->brace_nesting == 0)) {
10475 lex_mode_pop(parser);
10476 LEX(PM_TOKEN_EMBEXPR_END);
10477 }
10478
10479 parser->brace_nesting--;
10480 lex_state_set(parser, PM_LEX_STATE_END);
10481 LEX(PM_TOKEN_BRACE_RIGHT);
10482
10483 // * ** **= *=
10484 case '*': {
10485 if (match(parser, '*')) {
10486 if (match(parser, '=')) {
10487 lex_state_set(parser, PM_LEX_STATE_BEG);
10488 LEX(PM_TOKEN_STAR_STAR_EQUAL);
10489 }
10490
10491 pm_token_type_t type = PM_TOKEN_STAR_STAR;
10492
10493 if (lex_state_spcarg_p(parser, space_seen)) {
10494 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR);
10495 type = PM_TOKEN_USTAR_STAR;
10496 } else if (lex_state_beg_p(parser)) {
10497 type = PM_TOKEN_USTAR_STAR;
10498 } else if (ambiguous_operator_p(parser, space_seen)) {
10499 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
10500 }
10501
10502 if (lex_state_operator_p(parser)) {
10503 lex_state_set(parser, PM_LEX_STATE_ARG);
10504 } else {
10505 lex_state_set(parser, PM_LEX_STATE_BEG);
10506 }
10507
10508 LEX(type);
10509 }
10510
10511 if (match(parser, '=')) {
10512 lex_state_set(parser, PM_LEX_STATE_BEG);
10513 LEX(PM_TOKEN_STAR_EQUAL);
10514 }
10515
10516 pm_token_type_t type = PM_TOKEN_STAR;
10517
10518 if (lex_state_spcarg_p(parser, space_seen)) {
10519 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
10520 type = PM_TOKEN_USTAR;
10521 } else if (lex_state_beg_p(parser)) {
10522 type = PM_TOKEN_USTAR;
10523 } else if (ambiguous_operator_p(parser, space_seen)) {
10524 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
10525 }
10526
10527 if (lex_state_operator_p(parser)) {
10528 lex_state_set(parser, PM_LEX_STATE_ARG);
10529 } else {
10530 lex_state_set(parser, PM_LEX_STATE_BEG);
10531 }
10532
10533 LEX(type);
10534 }
10535
10536 // ! != !~ !@
10537 case '!':
10538 if (lex_state_operator_p(parser)) {
10539 lex_state_set(parser, PM_LEX_STATE_ARG);
10540 if (match(parser, '@')) {
10541 LEX(PM_TOKEN_BANG);
10542 }
10543 } else {
10544 lex_state_set(parser, PM_LEX_STATE_BEG);
10545 }
10546
10547 if (match(parser, '=')) {
10548 LEX(PM_TOKEN_BANG_EQUAL);
10549 }
10550
10551 if (match(parser, '~')) {
10552 LEX(PM_TOKEN_BANG_TILDE);
10553 }
10554
10555 LEX(PM_TOKEN_BANG);
10556
10557 // = => =~ == === =begin
10558 case '=':
10559 if (
10560 current_token_starts_line(parser) &&
10561 (parser->current.end + 5 <= parser->end) &&
10562 memcmp(parser->current.end, "begin", 5) == 0 &&
10563 (pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) == '\0'))
10564 ) {
10565 pm_token_type_t type = lex_embdoc(parser);
10566 if (type == PM_TOKEN_EOF) {
10567 LEX(type);
10568 }
10569
10570 goto lex_next_token;
10571 }
10572
10573 if (lex_state_operator_p(parser)) {
10574 lex_state_set(parser, PM_LEX_STATE_ARG);
10575 } else {
10576 lex_state_set(parser, PM_LEX_STATE_BEG);
10577 }
10578
10579 if (match(parser, '>')) {
10580 LEX(PM_TOKEN_EQUAL_GREATER);
10581 }
10582
10583 if (match(parser, '~')) {
10584 LEX(PM_TOKEN_EQUAL_TILDE);
10585 }
10586
10587 if (match(parser, '=')) {
10588 LEX(match(parser, '=') ? PM_TOKEN_EQUAL_EQUAL_EQUAL : PM_TOKEN_EQUAL_EQUAL);
10589 }
10590
10591 LEX(PM_TOKEN_EQUAL);
10592
10593 // < << <<= <= <=>
10594 case '<':
10595 if (match(parser, '<')) {
10596 if (
10597 !lex_state_p(parser, PM_LEX_STATE_DOT | PM_LEX_STATE_CLASS) &&
10598 !lex_state_end_p(parser) &&
10599 (!lex_state_p(parser, PM_LEX_STATE_ARG_ANY) || lex_state_p(parser, PM_LEX_STATE_LABELED) || space_seen)
10600 ) {
10601 const uint8_t *end = parser->current.end;
10602
10603 pm_heredoc_quote_t quote = PM_HEREDOC_QUOTE_NONE;
10604 pm_heredoc_indent_t indent = PM_HEREDOC_INDENT_NONE;
10605
10606 if (match(parser, '-')) {
10607 indent = PM_HEREDOC_INDENT_DASH;
10608 }
10609 else if (match(parser, '~')) {
10610 indent = PM_HEREDOC_INDENT_TILDE;
10611 }
10612
10613 if (match(parser, '`')) {
10614 quote = PM_HEREDOC_QUOTE_BACKTICK;
10615 }
10616 else if (match(parser, '"')) {
10617 quote = PM_HEREDOC_QUOTE_DOUBLE;
10618 }
10619 else if (match(parser, '\'')) {
10620 quote = PM_HEREDOC_QUOTE_SINGLE;
10621 }
10622
10623 const uint8_t *ident_start = parser->current.end;
10624 size_t width = 0;
10625
10626 if (parser->current.end >= parser->end) {
10627 parser->current.end = end;
10628 } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) == 0) {
10629 parser->current.end = end;
10630 } else {
10631 if (quote == PM_HEREDOC_QUOTE_NONE) {
10632 parser->current.end += width;
10633
10634 while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end))) {
10635 parser->current.end += width;
10636 }
10637 } else {
10638 // If we have quotes, then we're going to go until we find the
10639 // end quote.
10640 while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
10641 if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
10642 parser->current.end++;
10643 }
10644 }
10645
10646 size_t ident_length = (size_t) (parser->current.end - ident_start);
10647 bool ident_error = false;
10648
10649 if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
10650 pm_parser_err(parser, U32(ident_start - parser->start), U32(ident_length), PM_ERR_HEREDOC_IDENTIFIER);
10651 ident_error = true;
10652 }
10653
10654 parser->explicit_encoding = NULL;
10655 lex_mode_push(parser, (pm_lex_mode_t) {
10656 .mode = PM_LEX_HEREDOC,
10657 .as.heredoc = {
10658 .base = {
10659 .ident_start = ident_start,
10660 .ident_length = ident_length,
10661 .quote = quote,
10662 .indent = indent
10663 },
10664 .next_start = parser->current.end,
10665 .common_whitespace = NULL,
10666 .line_continuation = false
10667 }
10668 });
10669
10670 if (parser->heredoc_end == NULL) {
10671 const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
10672
10673 if (body_start == NULL) {
10674 // If there is no newline after the heredoc identifier, then
10675 // this is not a valid heredoc declaration. In this case we
10676 // will add an error, but we will still return a heredoc
10677 // start.
10678 if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
10679 body_start = parser->end;
10680 } else {
10681 // Otherwise, we want to indicate that the body of the
10682 // heredoc starts on the character after the next newline.
10683 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(body_start - parser->start + 1));
10684 body_start++;
10685 }
10686
10687 parser->next_start = body_start;
10688 } else {
10689 parser->next_start = parser->heredoc_end;
10690 }
10691
10692 LEX(PM_TOKEN_HEREDOC_START);
10693 }
10694 }
10695
10696 if (match(parser, '=')) {
10697 lex_state_set(parser, PM_LEX_STATE_BEG);
10698 LEX(PM_TOKEN_LESS_LESS_EQUAL);
10699 }
10700
10701 if (ambiguous_operator_p(parser, space_seen)) {
10702 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
10703 }
10704
10705 if (lex_state_operator_p(parser)) {
10706 lex_state_set(parser, PM_LEX_STATE_ARG);
10707 } else {
10708 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
10709 lex_state_set(parser, PM_LEX_STATE_BEG);
10710 }
10711
10712 LEX(PM_TOKEN_LESS_LESS);
10713 }
10714
10715 if (lex_state_operator_p(parser)) {
10716 lex_state_set(parser, PM_LEX_STATE_ARG);
10717 } else {
10718 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
10719 lex_state_set(parser, PM_LEX_STATE_BEG);
10720 }
10721
10722 if (match(parser, '=')) {
10723 if (match(parser, '>')) {
10724 LEX(PM_TOKEN_LESS_EQUAL_GREATER);
10725 }
10726
10727 LEX(PM_TOKEN_LESS_EQUAL);
10728 }
10729
10730 LEX(PM_TOKEN_LESS);
10731
10732 // > >> >>= >=
10733 case '>':
10734 if (match(parser, '>')) {
10735 if (lex_state_operator_p(parser)) {
10736 lex_state_set(parser, PM_LEX_STATE_ARG);
10737 } else {
10738 lex_state_set(parser, PM_LEX_STATE_BEG);
10739 }
10740 LEX(match(parser, '=') ? PM_TOKEN_GREATER_GREATER_EQUAL : PM_TOKEN_GREATER_GREATER);
10741 }
10742
10743 if (lex_state_operator_p(parser)) {
10744 lex_state_set(parser, PM_LEX_STATE_ARG);
10745 } else {
10746 lex_state_set(parser, PM_LEX_STATE_BEG);
10747 }
10748
10749 LEX(match(parser, '=') ? PM_TOKEN_GREATER_EQUAL : PM_TOKEN_GREATER);
10750
10751 // double-quoted string literal
10752 case '"': {
10753 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
10754 lex_mode_push_string(parser, true, label_allowed, '\0', '"');
10755 LEX(PM_TOKEN_STRING_BEGIN);
10756 }
10757
10758 // xstring literal
10759 case '`': {
10760 if (lex_state_p(parser, PM_LEX_STATE_FNAME)) {
10761 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10762 LEX(PM_TOKEN_BACKTICK);
10763 }
10764
10765 if (lex_state_p(parser, PM_LEX_STATE_DOT)) {
10766 if (previous_command_start) {
10767 lex_state_set(parser, PM_LEX_STATE_CMDARG);
10768 } else {
10769 lex_state_set(parser, PM_LEX_STATE_ARG);
10770 }
10771
10772 LEX(PM_TOKEN_BACKTICK);
10773 }
10774
10775 lex_mode_push_string(parser, true, false, '\0', '`');
10776 LEX(PM_TOKEN_BACKTICK);
10777 }
10778
10779 // single-quoted string literal
10780 case '\'': {
10781 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
10782 lex_mode_push_string(parser, false, label_allowed, '\0', '\'');
10783 LEX(PM_TOKEN_STRING_BEGIN);
10784 }
10785
10786 // ? character literal
10787 case '?':
10788 LEX(lex_question_mark(parser));
10789
10790 // & && &&= &=
10791 case '&': {
10792 if (match(parser, '&')) {
10793 lex_state_set(parser, PM_LEX_STATE_BEG);
10794
10795 if (match(parser, '=')) {
10796 LEX(PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
10797 }
10798
10799 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
10800 }
10801
10802 if (match(parser, '=')) {
10803 lex_state_set(parser, PM_LEX_STATE_BEG);
10804 LEX(PM_TOKEN_AMPERSAND_EQUAL);
10805 }
10806
10807 if (match(parser, '.')) {
10808 lex_state_set(parser, PM_LEX_STATE_DOT);
10809 LEX(PM_TOKEN_AMPERSAND_DOT);
10810 }
10811
10812 pm_token_type_t type = PM_TOKEN_AMPERSAND;
10813 if (lex_state_spcarg_p(parser, space_seen)) {
10814 if ((peek(parser) != ':') || (peek_offset(parser, 1) == '\0')) {
10815 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
10816 } else {
10817 const uint8_t delim = peek_offset(parser, 1);
10818
10819 if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1, parser->end - (parser->current.end + 1))) {
10820 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
10821 }
10822 }
10823
10824 type = PM_TOKEN_UAMPERSAND;
10825 } else if (lex_state_beg_p(parser)) {
10826 type = PM_TOKEN_UAMPERSAND;
10827 } else if (ambiguous_operator_p(parser, space_seen)) {
10828 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
10829 }
10830
10831 if (lex_state_operator_p(parser)) {
10832 lex_state_set(parser, PM_LEX_STATE_ARG);
10833 } else {
10834 lex_state_set(parser, PM_LEX_STATE_BEG);
10835 }
10836
10837 LEX(type);
10838 }
10839
10840 // | || ||= |=
10841 case '|':
10842 if (match(parser, '|')) {
10843 if (match(parser, '=')) {
10844 lex_state_set(parser, PM_LEX_STATE_BEG);
10845 LEX(PM_TOKEN_PIPE_PIPE_EQUAL);
10846 }
10847
10848 if (lex_state_p(parser, PM_LEX_STATE_BEG)) {
10849 parser->current.end--;
10850 LEX(PM_TOKEN_PIPE);
10851 }
10852
10853 lex_state_set(parser, PM_LEX_STATE_BEG);
10854 LEX(PM_TOKEN_PIPE_PIPE);
10855 }
10856
10857 if (match(parser, '=')) {
10858 lex_state_set(parser, PM_LEX_STATE_BEG);
10859 LEX(PM_TOKEN_PIPE_EQUAL);
10860 }
10861
10862 if (lex_state_operator_p(parser)) {
10863 lex_state_set(parser, PM_LEX_STATE_ARG);
10864 } else {
10865 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10866 }
10867
10868 LEX(PM_TOKEN_PIPE);
10869
10870 // + += +@
10871 case '+': {
10872 if (lex_state_operator_p(parser)) {
10873 lex_state_set(parser, PM_LEX_STATE_ARG);
10874
10875 if (match(parser, '@')) {
10876 LEX(PM_TOKEN_UPLUS);
10877 }
10878
10879 LEX(PM_TOKEN_PLUS);
10880 }
10881
10882 if (match(parser, '=')) {
10883 lex_state_set(parser, PM_LEX_STATE_BEG);
10884 LEX(PM_TOKEN_PLUS_EQUAL);
10885 }
10886
10887 if (
10888 lex_state_beg_p(parser) ||
10889 (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
10890 ) {
10891 lex_state_set(parser, PM_LEX_STATE_BEG);
10892
10893 if (pm_char_is_decimal_digit(peek(parser))) {
10894 parser->current.end++;
10895 pm_token_type_t type = lex_numeric(parser);
10896 lex_state_set(parser, PM_LEX_STATE_END);
10897 LEX(type);
10898 }
10899
10900 LEX(PM_TOKEN_UPLUS);
10901 }
10902
10903 if (ambiguous_operator_p(parser, space_seen)) {
10904 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
10905 }
10906
10907 lex_state_set(parser, PM_LEX_STATE_BEG);
10908 LEX(PM_TOKEN_PLUS);
10909 }
10910
10911 // - -= -@
10912 case '-': {
10913 if (lex_state_operator_p(parser)) {
10914 lex_state_set(parser, PM_LEX_STATE_ARG);
10915
10916 if (match(parser, '@')) {
10917 LEX(PM_TOKEN_UMINUS);
10918 }
10919
10920 LEX(PM_TOKEN_MINUS);
10921 }
10922
10923 if (match(parser, '=')) {
10924 lex_state_set(parser, PM_LEX_STATE_BEG);
10925 LEX(PM_TOKEN_MINUS_EQUAL);
10926 }
10927
10928 if (match(parser, '>')) {
10929 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10930 LEX(PM_TOKEN_MINUS_GREATER);
10931 }
10932
10933 bool spcarg = lex_state_spcarg_p(parser, space_seen);
10934 bool is_beg = lex_state_beg_p(parser);
10935 if (!is_beg && spcarg) {
10936 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
10937 }
10938
10939 if (is_beg || spcarg) {
10940 lex_state_set(parser, PM_LEX_STATE_BEG);
10941 LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
10942 }
10943
10944 if (ambiguous_operator_p(parser, space_seen)) {
10945 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
10946 }
10947
10948 lex_state_set(parser, PM_LEX_STATE_BEG);
10949 LEX(PM_TOKEN_MINUS);
10950 }
10951
10952 // . .. ...
10953 case '.': {
10954 bool beg_p = lex_state_beg_p(parser);
10955
10956 if (match(parser, '.')) {
10957 if (match(parser, '.')) {
10958 // If we're _not_ inside a range within default parameters
10959 if (!context_p(parser, PM_CONTEXT_DEFAULT_PARAMS) && context_p(parser, PM_CONTEXT_DEF_PARAMS)) {
10960 if (lex_state_p(parser, PM_LEX_STATE_END)) {
10961 lex_state_set(parser, PM_LEX_STATE_BEG);
10962 } else {
10963 lex_state_set(parser, PM_LEX_STATE_ENDARG);
10964 }
10965 LEX(PM_TOKEN_UDOT_DOT_DOT);
10966 }
10967
10968 if (parser->enclosure_nesting == 0 && parser_end_of_line_p(parser)) {
10969 pm_parser_warn_token(parser, &parser->current, PM_WARN_DOT_DOT_DOT_EOL);
10970 }
10971
10972 lex_state_set(parser, PM_LEX_STATE_BEG);
10973 LEX(beg_p ? PM_TOKEN_UDOT_DOT_DOT : PM_TOKEN_DOT_DOT_DOT);
10974 }
10975
10976 lex_state_set(parser, PM_LEX_STATE_BEG);
10977 LEX(beg_p ? PM_TOKEN_UDOT_DOT : PM_TOKEN_DOT_DOT);
10978 }
10979
10980 lex_state_set(parser, PM_LEX_STATE_DOT);
10981 LEX(PM_TOKEN_DOT);
10982 }
10983
10984 // integer
10985 case '0':
10986 case '1':
10987 case '2':
10988 case '3':
10989 case '4':
10990 case '5':
10991 case '6':
10992 case '7':
10993 case '8':
10994 case '9': {
10995 pm_token_type_t type = lex_numeric(parser);
10996 lex_state_set(parser, PM_LEX_STATE_END);
10997 LEX(type);
10998 }
10999
11000 // :: symbol
11001 case ':':
11002 if (match(parser, ':')) {
11003 if (lex_state_beg_p(parser) || lex_state_p(parser, PM_LEX_STATE_CLASS) || (lex_state_p(parser, PM_LEX_STATE_ARG_ANY) && space_seen)) {
11004 lex_state_set(parser, PM_LEX_STATE_BEG);
11005 LEX(PM_TOKEN_UCOLON_COLON);
11006 }
11007
11008 lex_state_set(parser, PM_LEX_STATE_DOT);
11009 LEX(PM_TOKEN_COLON_COLON);
11010 }
11011
11012 if (lex_state_end_p(parser) || pm_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
11013 lex_state_set(parser, PM_LEX_STATE_BEG);
11014 LEX(PM_TOKEN_COLON);
11015 }
11016
11017 if (peek(parser) == '"' || peek(parser) == '\'') {
11018 lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
11019 parser->current.end++;
11020 }
11021
11022 lex_state_set(parser, PM_LEX_STATE_FNAME);
11023 LEX(PM_TOKEN_SYMBOL_BEGIN);
11024
11025 // / /=
11026 case '/':
11027 if (lex_state_beg_p(parser)) {
11028 lex_mode_push_regexp(parser, '\0', '/');
11029 LEX(PM_TOKEN_REGEXP_BEGIN);
11030 }
11031
11032 if (match(parser, '=')) {
11033 lex_state_set(parser, PM_LEX_STATE_BEG);
11034 LEX(PM_TOKEN_SLASH_EQUAL);
11035 }
11036
11037 if (lex_state_spcarg_p(parser, space_seen)) {
11038 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
11039 lex_mode_push_regexp(parser, '\0', '/');
11040 LEX(PM_TOKEN_REGEXP_BEGIN);
11041 }
11042
11043 if (ambiguous_operator_p(parser, space_seen)) {
11044 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
11045 }
11046
11047 if (lex_state_operator_p(parser)) {
11048 lex_state_set(parser, PM_LEX_STATE_ARG);
11049 } else {
11050 lex_state_set(parser, PM_LEX_STATE_BEG);
11051 }
11052
11053 LEX(PM_TOKEN_SLASH);
11054
11055 // ^ ^=
11056 case '^':
11057 if (lex_state_operator_p(parser)) {
11058 lex_state_set(parser, PM_LEX_STATE_ARG);
11059 } else {
11060 lex_state_set(parser, PM_LEX_STATE_BEG);
11061 }
11062 LEX(match(parser, '=') ? PM_TOKEN_CARET_EQUAL : PM_TOKEN_CARET);
11063
11064 // ~ ~@
11065 case '~':
11066 if (lex_state_operator_p(parser)) {
11067 (void) match(parser, '@');
11068 lex_state_set(parser, PM_LEX_STATE_ARG);
11069 } else {
11070 lex_state_set(parser, PM_LEX_STATE_BEG);
11071 }
11072
11073 LEX(PM_TOKEN_TILDE);
11074
11075 // % %= %i %I %q %Q %w %W
11076 case '%': {
11077 // If there is no subsequent character then we have an
11078 // invalid token. We're going to say it's the percent
11079 // operator because we don't want to move into the string
11080 // lex mode unnecessarily.
11081 if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
11082 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
11083 LEX(PM_TOKEN_PERCENT);
11084 }
11085
11086 if (!lex_state_beg_p(parser) && match(parser, '=')) {
11087 lex_state_set(parser, PM_LEX_STATE_BEG);
11088 LEX(PM_TOKEN_PERCENT_EQUAL);
11089 } else if (
11090 lex_state_beg_p(parser) ||
11091 (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
11092 lex_state_spcarg_p(parser, space_seen)
11093 ) {
11094 if (!parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end)) {
11095 if (*parser->current.end >= 0x80) {
11096 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11097 }
11098
11099 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11100 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11101 LEX(PM_TOKEN_STRING_BEGIN);
11102 }
11103
11104 // Delimiters for %-literals cannot be alphanumeric. We
11105 // validate that here.
11106 uint8_t delimiter = peek_offset(parser, 1);
11107 if (delimiter >= 0x80 || parser->encoding->alnum_char(&delimiter, 1)) {
11108 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11109 goto lex_next_token;
11110 }
11111
11112 switch (peek(parser)) {
11113 case 'i': {
11114 parser->current.end++;
11115
11116 if (parser->current.end < parser->end) {
11117 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11118 } else {
11119 lex_mode_push_list_eof(parser);
11120 }
11121
11122 LEX(PM_TOKEN_PERCENT_LOWER_I);
11123 }
11124 case 'I': {
11125 parser->current.end++;
11126
11127 if (parser->current.end < parser->end) {
11128 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11129 } else {
11130 lex_mode_push_list_eof(parser);
11131 }
11132
11133 LEX(PM_TOKEN_PERCENT_UPPER_I);
11134 }
11135 case 'r': {
11136 parser->current.end++;
11137
11138 if (parser->current.end < parser->end) {
11139 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11140 lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11141 } else {
11142 lex_mode_push_regexp(parser, '\0', '\0');
11143 }
11144
11145 LEX(PM_TOKEN_REGEXP_BEGIN);
11146 }
11147 case 'q': {
11148 parser->current.end++;
11149
11150 if (parser->current.end < parser->end) {
11151 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11152 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11153 } else {
11154 lex_mode_push_string_eof(parser);
11155 }
11156
11157 LEX(PM_TOKEN_STRING_BEGIN);
11158 }
11159 case 'Q': {
11160 parser->current.end++;
11161
11162 if (parser->current.end < parser->end) {
11163 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11164 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11165 } else {
11166 lex_mode_push_string_eof(parser);
11167 }
11168
11169 LEX(PM_TOKEN_STRING_BEGIN);
11170 }
11171 case 's': {
11172 parser->current.end++;
11173
11174 if (parser->current.end < parser->end) {
11175 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11176 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11177 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
11178 } else {
11179 lex_mode_push_string_eof(parser);
11180 }
11181
11182 LEX(PM_TOKEN_SYMBOL_BEGIN);
11183 }
11184 case 'w': {
11185 parser->current.end++;
11186
11187 if (parser->current.end < parser->end) {
11188 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11189 } else {
11190 lex_mode_push_list_eof(parser);
11191 }
11192
11193 LEX(PM_TOKEN_PERCENT_LOWER_W);
11194 }
11195 case 'W': {
11196 parser->current.end++;
11197
11198 if (parser->current.end < parser->end) {
11199 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11200 } else {
11201 lex_mode_push_list_eof(parser);
11202 }
11203
11204 LEX(PM_TOKEN_PERCENT_UPPER_W);
11205 }
11206 case 'x': {
11207 parser->current.end++;
11208
11209 if (parser->current.end < parser->end) {
11210 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11211 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11212 } else {
11213 lex_mode_push_string_eof(parser);
11214 }
11215
11216 LEX(PM_TOKEN_PERCENT_LOWER_X);
11217 }
11218 default:
11219 // If we get to this point, then we have a % that is completely
11220 // unparsable. In this case we'll just drop it from the parser
11221 // and skip past it and hope that the next token is something
11222 // that we can parse.
11223 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11224 goto lex_next_token;
11225 }
11226 }
11227
11228 if (ambiguous_operator_p(parser, space_seen)) {
11229 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
11230 }
11231
11232 lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
11233 LEX(PM_TOKEN_PERCENT);
11234 }
11235
11236 // global variable
11237 case '$': {
11238 pm_token_type_t type = lex_global_variable(parser);
11239
11240 // If we're lexing an embedded variable, then we need to pop back into
11241 // the parent lex context.
11242 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
11243 lex_mode_pop(parser);
11244 }
11245
11246 lex_state_set(parser, PM_LEX_STATE_END);
11247 LEX(type);
11248 }
11249
11250 // instance variable, class variable
11251 case '@':
11252 lex_state_set(parser, parser->lex_state & PM_LEX_STATE_FNAME ? PM_LEX_STATE_ENDFN : PM_LEX_STATE_END);
11253 LEX(lex_at_variable(parser));
11254
11255 default: {
11256 if (*parser->current.start != '_') {
11257 size_t width = char_is_identifier_start(parser, parser->current.start, parser->end - parser->current.start);
11258
11259 // If this isn't the beginning of an identifier, then
11260 // it's an invalid token as we've exhausted all of the
11261 // other options. We'll skip past it and return the next
11262 // token after adding an appropriate error message.
11263 if (!width) {
11264 if (*parser->current.start >= 0x80) {
11265 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
11266 } else if (*parser->current.start == '\\') {
11267 switch (peek_at(parser, parser->current.start + 1)) {
11268 case ' ':
11269 parser->current.end++;
11270 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
11271 break;
11272 case '\f':
11273 parser->current.end++;
11274 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
11275 break;
11276 case '\t':
11277 parser->current.end++;
11278 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
11279 break;
11280 case '\v':
11281 parser->current.end++;
11282 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
11283 break;
11284 case '\r':
11285 if (peek_at(parser, parser->current.start + 2) != '\n') {
11286 parser->current.end++;
11287 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
11288 break;
11289 }
11291 default:
11292 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
11293 break;
11294 }
11295 } else if (char_is_ascii_printable(*parser->current.start)) {
11296 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
11297 } else {
11298 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
11299 }
11300
11301 goto lex_next_token;
11302 }
11303
11304 parser->current.end = parser->current.start + width;
11305 }
11306
11307 pm_token_type_t type = lex_identifier(parser, previous_command_start);
11308
11309 // If we've hit a __END__ and it was at the start of the
11310 // line or the start of the file and it is followed by
11311 // either a \n or a \r\n, then this is the last token of the
11312 // file.
11313 if (
11314 ((parser->current.end - parser->current.start) == 7) &&
11315 current_token_starts_line(parser) &&
11316 (memcmp(parser->current.start, "__END__", 7) == 0) &&
11317 (parser->current.end == parser->end || match_eol(parser))
11318 ) {
11319 // Since we know we're about to add an __END__ comment,
11320 // we know we need to add all of the newlines to get the
11321 // correct column information for it.
11322 const uint8_t *cursor = parser->current.end;
11323 while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
11324 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(++cursor - parser->start));
11325 }
11326
11327 parser->current.end = parser->end;
11328 parser->current.type = PM_TOKEN___END__;
11329 parser_lex_callback(parser);
11330
11331 parser->data_loc.start = PM_TOKEN_START(parser, &parser->current);
11332 parser->data_loc.length = PM_TOKEN_LENGTH(&parser->current);
11333
11334 LEX(PM_TOKEN_EOF);
11335 }
11336
11337 pm_lex_state_t last_state = parser->lex_state;
11338
11339 if (type == PM_TOKEN_IDENTIFIER || type == PM_TOKEN_CONSTANT || type == PM_TOKEN_METHOD_NAME) {
11340 if (lex_state_p(parser, PM_LEX_STATE_BEG_ANY | PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_DOT)) {
11341 if (previous_command_start) {
11342 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11343 } else {
11344 lex_state_set(parser, PM_LEX_STATE_ARG);
11345 }
11346 } else if (parser->lex_state == PM_LEX_STATE_FNAME) {
11347 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11348 } else {
11349 lex_state_set(parser, PM_LEX_STATE_END);
11350 }
11351 }
11352
11353 if (
11354 !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
11355 (type == PM_TOKEN_IDENTIFIER) &&
11356 ((pm_parser_local_depth(parser, &parser->current) != -1) ||
11357 pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)))
11358 ) {
11359 lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
11360 }
11361
11362 LEX(type);
11363 }
11364 }
11365 }
11366 case PM_LEX_LIST: {
11367 if (parser->next_start != NULL) {
11368 parser->current.end = parser->next_start;
11369 parser->next_start = NULL;
11370 }
11371
11372 // First we'll set the beginning of the token.
11373 parser->current.start = parser->current.end;
11374
11375 // If there's any whitespace at the start of the list, then we're
11376 // going to trim it off the beginning and create a new token.
11377 size_t whitespace;
11378
11379 if (parser->heredoc_end) {
11380 whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
11381 if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') {
11382 whitespace += 1;
11383 }
11384 } else {
11385 whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
11386 }
11387
11388 if (whitespace > 0) {
11389 parser->current.end += whitespace;
11390 if (peek_offset(parser, -1) == '\n') {
11391 // mutates next_start
11392 parser_flush_heredoc_end(parser);
11393 }
11394 LEX(PM_TOKEN_WORDS_SEP);
11395 }
11396
11397 // We'll check if we're at the end of the file. If we are, then we
11398 // need to return the EOF token.
11399 if (parser->current.end >= parser->end) {
11400 LEX(PM_TOKEN_EOF);
11401 }
11402
11403 // Here we'll get a list of the places where strpbrk should break,
11404 // and then find the first one.
11405 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11406 const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
11407 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11408
11409 // If we haven't found an escape yet, then this buffer will be
11410 // unallocated since we can refer directly to the source string.
11411 pm_token_buffer_t token_buffer = { 0 };
11412
11413 while (breakpoint != NULL) {
11414 // If we hit whitespace, then we must have received content by
11415 // now, so we can return an element of the list.
11416 if (pm_char_is_whitespace(*breakpoint)) {
11417 parser->current.end = breakpoint;
11418 pm_token_buffer_flush(parser, &token_buffer);
11419 LEX(PM_TOKEN_STRING_CONTENT);
11420 }
11421
11422 // If we hit the terminator, we need to check which token to
11423 // return.
11424 if (*breakpoint == lex_mode->as.list.terminator) {
11425 // If this terminator doesn't actually close the list, then
11426 // we need to continue on past it.
11427 if (lex_mode->as.list.nesting > 0) {
11428 parser->current.end = breakpoint + 1;
11429 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11430 lex_mode->as.list.nesting--;
11431 continue;
11432 }
11433
11434 // If we've hit the terminator and we've already skipped
11435 // past content, then we can return a list node.
11436 if (breakpoint > parser->current.start) {
11437 parser->current.end = breakpoint;
11438 pm_token_buffer_flush(parser, &token_buffer);
11439 LEX(PM_TOKEN_STRING_CONTENT);
11440 }
11441
11442 // Otherwise, switch back to the default state and return
11443 // the end of the list.
11444 parser->current.end = breakpoint + 1;
11445 lex_mode_pop(parser);
11446 lex_state_set(parser, PM_LEX_STATE_END);
11447 LEX(PM_TOKEN_STRING_END);
11448 }
11449
11450 // If we hit a null byte, skip directly past it.
11451 if (*breakpoint == '\0') {
11452 breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
11453 continue;
11454 }
11455
11456 // If we hit escapes, then we need to treat the next token
11457 // literally. In this case we'll skip past the next character
11458 // and find the next breakpoint.
11459 if (*breakpoint == '\\') {
11460 parser->current.end = breakpoint + 1;
11461
11462 // If we've hit the end of the file, then break out of the
11463 // loop by setting the breakpoint to NULL.
11464 if (parser->current.end == parser->end) {
11465 breakpoint = NULL;
11466 continue;
11467 }
11468
11469 pm_token_buffer_escape(parser, &token_buffer);
11470 uint8_t peeked = peek(parser);
11471
11472 switch (peeked) {
11473 case ' ':
11474 case '\f':
11475 case '\t':
11476 case '\v':
11477 case '\\':
11478 pm_token_buffer_push_byte(&token_buffer, peeked);
11479 parser->current.end++;
11480 break;
11481 case '\r':
11482 parser->current.end++;
11483 if (peek(parser) != '\n') {
11484 pm_token_buffer_push_byte(&token_buffer, '\r');
11485 break;
11486 }
11488 case '\n':
11489 pm_token_buffer_push_byte(&token_buffer, '\n');
11490
11491 if (parser->heredoc_end) {
11492 // ... if we are on the same line as a heredoc,
11493 // flush the heredoc and continue parsing after
11494 // heredoc_end.
11495 parser_flush_heredoc_end(parser);
11496 pm_token_buffer_copy(parser, &token_buffer);
11497 LEX(PM_TOKEN_STRING_CONTENT);
11498 } else {
11499 // ... else track the newline.
11500 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
11501 }
11502
11503 parser->current.end++;
11504 break;
11505 default:
11506 if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
11507 pm_token_buffer_push_byte(&token_buffer, peeked);
11508 parser->current.end++;
11509 } else if (lex_mode->as.list.interpolation) {
11510 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
11511 } else {
11512 pm_token_buffer_push_byte(&token_buffer, '\\');
11513 pm_token_buffer_push_escaped(&token_buffer, parser);
11514 }
11515
11516 break;
11517 }
11518
11519 token_buffer.cursor = parser->current.end;
11520 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11521 continue;
11522 }
11523
11524 // If we hit a #, then we will attempt to lex interpolation.
11525 if (*breakpoint == '#') {
11526 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11527
11528 if (!type) {
11529 // If we haven't returned at this point then we had something
11530 // that looked like an interpolated class or instance variable
11531 // like "#@" but wasn't actually. In this case we'll just skip
11532 // to the next breakpoint.
11533 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11534 continue;
11535 }
11536
11537 if (type == PM_TOKEN_STRING_CONTENT) {
11538 pm_token_buffer_flush(parser, &token_buffer);
11539 }
11540
11541 LEX(type);
11542 }
11543
11544 // If we've hit the incrementor, then we need to skip past it
11545 // and find the next breakpoint.
11546 assert(*breakpoint == lex_mode->as.list.incrementor);
11547 parser->current.end = breakpoint + 1;
11548 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11549 lex_mode->as.list.nesting++;
11550 continue;
11551 }
11552
11553 if (parser->current.end > parser->current.start) {
11554 pm_token_buffer_flush(parser, &token_buffer);
11555 LEX(PM_TOKEN_STRING_CONTENT);
11556 }
11557
11558 // If we were unable to find a breakpoint, then this token hits the
11559 // end of the file.
11560 parser->current.end = parser->end;
11561 pm_token_buffer_flush(parser, &token_buffer);
11562 LEX(PM_TOKEN_STRING_CONTENT);
11563 }
11564 case PM_LEX_REGEXP: {
11565 // First, we'll set to start of this token to be the current end.
11566 if (parser->next_start == NULL) {
11567 parser->current.start = parser->current.end;
11568 } else {
11569 parser->current.start = parser->next_start;
11570 parser->current.end = parser->next_start;
11571 parser->next_start = NULL;
11572 }
11573
11574 // We'll check if we're at the end of the file. If we are, then we
11575 // need to return the EOF token.
11576 if (parser->current.end >= parser->end) {
11577 LEX(PM_TOKEN_EOF);
11578 }
11579
11580 // Get a reference to the current mode.
11581 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11582
11583 // These are the places where we need to split up the content of the
11584 // regular expression. We'll use strpbrk to find the first of these
11585 // characters.
11586 const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
11587 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11588 pm_regexp_token_buffer_t token_buffer = { 0 };
11589
11590 while (breakpoint != NULL) {
11591 uint8_t term = lex_mode->as.regexp.terminator;
11592 bool is_terminator = (*breakpoint == term);
11593
11594 // If the terminator is newline, we need to consider \r\n _also_ a newline
11595 // For example: `%\nfoo\r\n`
11596 // The string should be "foo", not "foo\r"
11597 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
11598 if (term == '\n') {
11599 is_terminator = true;
11600 }
11601
11602 // If the terminator is a CR, but we see a CRLF, we need to
11603 // treat the CRLF as a newline, meaning this is _not_ the
11604 // terminator
11605 if (term == '\r') {
11606 is_terminator = false;
11607 }
11608 }
11609
11610 // If we hit the terminator, we need to determine what kind of
11611 // token to return.
11612 if (is_terminator) {
11613 if (lex_mode->as.regexp.nesting > 0) {
11614 parser->current.end = breakpoint + 1;
11615 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11616 lex_mode->as.regexp.nesting--;
11617 continue;
11618 }
11619
11620 // Here we've hit the terminator. If we have already consumed
11621 // content then we need to return that content as string content
11622 // first.
11623 if (breakpoint > parser->current.start) {
11624 parser->current.end = breakpoint;
11625 pm_regexp_token_buffer_flush(parser, &token_buffer);
11626 LEX(PM_TOKEN_STRING_CONTENT);
11627 }
11628
11629 // Check here if we need to track the newline.
11630 size_t eol_length = match_eol_at(parser, breakpoint);
11631 if (eol_length) {
11632 parser->current.end = breakpoint + eol_length;
11633
11634 // Track the newline if we're not in a heredoc that
11635 // would have already have added the newline to the
11636 // list.
11637 if (parser->heredoc_end == NULL) {
11638 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
11639 }
11640 } else {
11641 parser->current.end = breakpoint + 1;
11642 }
11643
11644 // Since we've hit the terminator of the regular expression,
11645 // we now need to parse the options.
11646 parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
11647
11648 lex_mode_pop(parser);
11649 lex_state_set(parser, PM_LEX_STATE_END);
11650 LEX(PM_TOKEN_REGEXP_END);
11651 }
11652
11653 // If we've hit the incrementor, then we need to skip past it
11654 // and find the next breakpoint.
11655 if (*breakpoint && *breakpoint == lex_mode->as.regexp.incrementor) {
11656 parser->current.end = breakpoint + 1;
11657 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11658 lex_mode->as.regexp.nesting++;
11659 continue;
11660 }
11661
11662 switch (*breakpoint) {
11663 case '\0':
11664 // If we hit a null byte, skip directly past it.
11665 parser->current.end = breakpoint + 1;
11666 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11667 break;
11668 case '\r':
11669 if (peek_at(parser, breakpoint + 1) != '\n') {
11670 parser->current.end = breakpoint + 1;
11671 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11672 break;
11673 }
11674
11675 breakpoint++;
11676 parser->current.end = breakpoint;
11677 pm_regexp_token_buffer_escape(parser, &token_buffer);
11678 token_buffer.base.cursor = breakpoint;
11679
11681 case '\n':
11682 // If we've hit a newline, then we need to track that in
11683 // the list of newlines.
11684 if (parser->heredoc_end == NULL) {
11685 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1));
11686 parser->current.end = breakpoint + 1;
11687 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11688 break;
11689 }
11690
11691 parser->current.end = breakpoint + 1;
11692 parser_flush_heredoc_end(parser);
11693 pm_regexp_token_buffer_flush(parser, &token_buffer);
11694 LEX(PM_TOKEN_STRING_CONTENT);
11695 case '\\': {
11696 // If we hit escapes, then we need to treat the next
11697 // token literally. In this case we'll skip past the
11698 // next character and find the next breakpoint.
11699 parser->current.end = breakpoint + 1;
11700
11701 // If we've hit the end of the file, then break out of
11702 // the loop by setting the breakpoint to NULL.
11703 if (parser->current.end == parser->end) {
11704 breakpoint = NULL;
11705 break;
11706 }
11707
11708 pm_regexp_token_buffer_escape(parser, &token_buffer);
11709 uint8_t peeked = peek(parser);
11710
11711 switch (peeked) {
11712 case '\r':
11713 parser->current.end++;
11714 if (peek(parser) != '\n') {
11715 if (lex_mode->as.regexp.terminator != '\r') {
11716 pm_token_buffer_push_byte(&token_buffer.base, '\\');
11717 }
11718 pm_regexp_token_buffer_push_byte(&token_buffer, '\r');
11719 pm_token_buffer_push_byte(&token_buffer.base, '\r');
11720 break;
11721 }
11723 case '\n':
11724 if (parser->heredoc_end) {
11725 // ... if we are on the same line as a heredoc,
11726 // flush the heredoc and continue parsing after
11727 // heredoc_end.
11728 parser_flush_heredoc_end(parser);
11729 pm_regexp_token_buffer_copy(parser, &token_buffer);
11730 LEX(PM_TOKEN_STRING_CONTENT);
11731 } else {
11732 // ... else track the newline.
11733 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
11734 }
11735
11736 parser->current.end++;
11737 break;
11738 case 'c':
11739 case 'C':
11740 case 'M':
11741 case 'u':
11742 case 'x':
11743 escape_read(parser, &token_buffer.regexp_buffer, &token_buffer.base.buffer, PM_ESCAPE_FLAG_REGEXP);
11744 break;
11745 default:
11746 if (lex_mode->as.regexp.terminator == peeked) {
11747 // Some characters when they are used as the
11748 // terminator also receive an escape. They are
11749 // enumerated here.
11750 switch (peeked) {
11751 case '$': case ')': case '*': case '+':
11752 case '.': case '>': case '?': case ']':
11753 case '^': case '|': case '}':
11754 pm_token_buffer_push_byte(&token_buffer.base, '\\');
11755 break;
11756 default:
11757 break;
11758 }
11759
11760 pm_regexp_token_buffer_push_byte(&token_buffer, peeked);
11761 pm_token_buffer_push_byte(&token_buffer.base, peeked);
11762 parser->current.end++;
11763 break;
11764 }
11765
11766 if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer.base, '\\');
11767 pm_regexp_token_buffer_push_escaped(&token_buffer, parser);
11768 break;
11769 }
11770
11771 token_buffer.base.cursor = parser->current.end;
11772 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11773 break;
11774 }
11775 case '#': {
11776 // If we hit a #, then we will attempt to lex
11777 // interpolation.
11778 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11779
11780 if (!type) {
11781 // If we haven't returned at this point then we had
11782 // something that looked like an interpolated class or
11783 // instance variable like "#@" but wasn't actually. In
11784 // this case we'll just skip to the next breakpoint.
11785 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11786 break;
11787 }
11788
11789 if (type == PM_TOKEN_STRING_CONTENT) {
11790 pm_regexp_token_buffer_flush(parser, &token_buffer);
11791 }
11792
11793 LEX(type);
11794 }
11795 default:
11796 assert(false && "unreachable");
11797 break;
11798 }
11799 }
11800
11801 if (parser->current.end > parser->current.start) {
11802 pm_regexp_token_buffer_flush(parser, &token_buffer);
11803 LEX(PM_TOKEN_STRING_CONTENT);
11804 }
11805
11806 // If we were unable to find a breakpoint, then this token hits the
11807 // end of the file.
11808 parser->current.end = parser->end;
11809 pm_regexp_token_buffer_flush(parser, &token_buffer);
11810 LEX(PM_TOKEN_STRING_CONTENT);
11811 }
11812 case PM_LEX_STRING: {
11813 // First, we'll set to start of this token to be the current end.
11814 if (parser->next_start == NULL) {
11815 parser->current.start = parser->current.end;
11816 } else {
11817 parser->current.start = parser->next_start;
11818 parser->current.end = parser->next_start;
11819 parser->next_start = NULL;
11820 }
11821
11822 // We'll check if we're at the end of the file. If we are, then we need to
11823 // return the EOF token.
11824 if (parser->current.end >= parser->end) {
11825 LEX(PM_TOKEN_EOF);
11826 }
11827
11828 // These are the places where we need to split up the content of the
11829 // string. We'll use strpbrk to find the first of these characters.
11830 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11831 const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
11832 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11833
11834 // If we haven't found an escape yet, then this buffer will be
11835 // unallocated since we can refer directly to the source string.
11836 pm_token_buffer_t token_buffer = { 0 };
11837
11838 while (breakpoint != NULL) {
11839 // If we hit the incrementor, then we'll increment then nesting and
11840 // continue lexing.
11841 if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
11842 lex_mode->as.string.nesting++;
11843 parser->current.end = breakpoint + 1;
11844 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11845 continue;
11846 }
11847
11848 uint8_t term = lex_mode->as.string.terminator;
11849 bool is_terminator = (*breakpoint == term);
11850
11851 // If the terminator is newline, we need to consider \r\n _also_ a newline
11852 // For example: `%r\nfoo\r\n`
11853 // The string should be /foo/, not /foo\r/
11854 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
11855 if (term == '\n') {
11856 is_terminator = true;
11857 }
11858
11859 // If the terminator is a CR, but we see a CRLF, we need to
11860 // treat the CRLF as a newline, meaning this is _not_ the
11861 // terminator
11862 if (term == '\r') {
11863 is_terminator = false;
11864 }
11865 }
11866
11867 // Note that we have to check the terminator here first because we could
11868 // potentially be parsing a % string that has a # character as the
11869 // terminator.
11870 if (is_terminator) {
11871 // If this terminator doesn't actually close the string, then we need
11872 // to continue on past it.
11873 if (lex_mode->as.string.nesting > 0) {
11874 parser->current.end = breakpoint + 1;
11875 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11876 lex_mode->as.string.nesting--;
11877 continue;
11878 }
11879
11880 // Here we've hit the terminator. If we have already consumed content
11881 // then we need to return that content as string content first.
11882 if (breakpoint > parser->current.start) {
11883 parser->current.end = breakpoint;
11884 pm_token_buffer_flush(parser, &token_buffer);
11885 LEX(PM_TOKEN_STRING_CONTENT);
11886 }
11887
11888 // Otherwise we need to switch back to the parent lex mode and
11889 // return the end of the string.
11890 size_t eol_length = match_eol_at(parser, breakpoint);
11891 if (eol_length) {
11892 parser->current.end = breakpoint + eol_length;
11893
11894 // Track the newline if we're not in a heredoc that
11895 // would have already have added the newline to the
11896 // list.
11897 if (parser->heredoc_end == NULL) {
11898 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
11899 }
11900 } else {
11901 parser->current.end = breakpoint + 1;
11902 }
11903
11904 if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
11905 parser->current.end++;
11906 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
11907 lex_mode_pop(parser);
11908 LEX(PM_TOKEN_LABEL_END);
11909 }
11910
11911 // When the delimiter itself is a newline, we won't
11912 // get a chance to flush heredocs in the usual places since
11913 // the newline is already consumed.
11914 if (term == '\n' && parser->heredoc_end) {
11915 parser_flush_heredoc_end(parser);
11916 }
11917
11918 lex_state_set(parser, PM_LEX_STATE_END);
11919 lex_mode_pop(parser);
11920 LEX(PM_TOKEN_STRING_END);
11921 }
11922
11923 switch (*breakpoint) {
11924 case '\0':
11925 // Skip directly past the null character.
11926 parser->current.end = breakpoint + 1;
11927 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11928 break;
11929 case '\r':
11930 if (peek_at(parser, breakpoint + 1) != '\n') {
11931 parser->current.end = breakpoint + 1;
11932 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11933 break;
11934 }
11935
11936 // If we hit a \r\n sequence, then we need to treat it
11937 // as a newline.
11938 breakpoint++;
11939 parser->current.end = breakpoint;
11940 pm_token_buffer_escape(parser, &token_buffer);
11941 token_buffer.cursor = breakpoint;
11942
11944 case '\n':
11945 // When we hit a newline, we need to flush any potential
11946 // heredocs. Note that this has to happen after we check
11947 // for the terminator in case the terminator is a
11948 // newline character.
11949 if (parser->heredoc_end == NULL) {
11950 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1));
11951 parser->current.end = breakpoint + 1;
11952 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11953 break;
11954 }
11955
11956 parser->current.end = breakpoint + 1;
11957 parser_flush_heredoc_end(parser);
11958 pm_token_buffer_flush(parser, &token_buffer);
11959 LEX(PM_TOKEN_STRING_CONTENT);
11960 case '\\': {
11961 // Here we hit escapes.
11962 parser->current.end = breakpoint + 1;
11963
11964 // If we've hit the end of the file, then break out of
11965 // the loop by setting the breakpoint to NULL.
11966 if (parser->current.end == parser->end) {
11967 breakpoint = NULL;
11968 continue;
11969 }
11970
11971 pm_token_buffer_escape(parser, &token_buffer);
11972 uint8_t peeked = peek(parser);
11973
11974 switch (peeked) {
11975 case '\\':
11976 pm_token_buffer_push_byte(&token_buffer, '\\');
11977 parser->current.end++;
11978 break;
11979 case '\r':
11980 parser->current.end++;
11981 if (peek(parser) != '\n') {
11982 if (!lex_mode->as.string.interpolation) {
11983 pm_token_buffer_push_byte(&token_buffer, '\\');
11984 }
11985 pm_token_buffer_push_byte(&token_buffer, '\r');
11986 break;
11987 }
11989 case '\n':
11990 if (!lex_mode->as.string.interpolation) {
11991 pm_token_buffer_push_byte(&token_buffer, '\\');
11992 pm_token_buffer_push_byte(&token_buffer, '\n');
11993 }
11994
11995 if (parser->heredoc_end) {
11996 // ... if we are on the same line as a heredoc,
11997 // flush the heredoc and continue parsing after
11998 // heredoc_end.
11999 parser_flush_heredoc_end(parser);
12000 pm_token_buffer_copy(parser, &token_buffer);
12001 LEX(PM_TOKEN_STRING_CONTENT);
12002 } else {
12003 // ... else track the newline.
12004 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
12005 }
12006
12007 parser->current.end++;
12008 break;
12009 default:
12010 if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
12011 pm_token_buffer_push_byte(&token_buffer, peeked);
12012 parser->current.end++;
12013 } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
12014 pm_token_buffer_push_byte(&token_buffer, peeked);
12015 parser->current.end++;
12016 } else if (lex_mode->as.string.interpolation) {
12017 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12018 } else {
12019 pm_token_buffer_push_byte(&token_buffer, '\\');
12020 pm_token_buffer_push_escaped(&token_buffer, parser);
12021 }
12022
12023 break;
12024 }
12025
12026 token_buffer.cursor = parser->current.end;
12027 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12028 break;
12029 }
12030 case '#': {
12031 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12032
12033 if (!type) {
12034 // If we haven't returned at this point then we had something that
12035 // looked like an interpolated class or instance variable like "#@"
12036 // but wasn't actually. In this case we'll just skip to the next
12037 // breakpoint.
12038 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12039 break;
12040 }
12041
12042 if (type == PM_TOKEN_STRING_CONTENT) {
12043 pm_token_buffer_flush(parser, &token_buffer);
12044 }
12045
12046 LEX(type);
12047 }
12048 default:
12049 assert(false && "unreachable");
12050 }
12051 }
12052
12053 if (parser->current.end > parser->current.start) {
12054 pm_token_buffer_flush(parser, &token_buffer);
12055 LEX(PM_TOKEN_STRING_CONTENT);
12056 }
12057
12058 // If we've hit the end of the string, then this is an unterminated
12059 // string. In that case we'll return a string content token.
12060 parser->current.end = parser->end;
12061 pm_token_buffer_flush(parser, &token_buffer);
12062 LEX(PM_TOKEN_STRING_CONTENT);
12063 }
12064 case PM_LEX_HEREDOC: {
12065 // First, we'll set to start of this token.
12066 if (parser->next_start == NULL) {
12067 parser->current.start = parser->current.end;
12068 } else {
12069 parser->current.start = parser->next_start;
12070 parser->current.end = parser->next_start;
12071 parser->heredoc_end = NULL;
12072 parser->next_start = NULL;
12073 }
12074
12075 // Now let's grab the information about the identifier off of the
12076 // current lex mode.
12077 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12078 pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
12079
12080 bool line_continuation = lex_mode->as.heredoc.line_continuation;
12081 lex_mode->as.heredoc.line_continuation = false;
12082
12083 // We'll check if we're at the end of the file. If we are, then we
12084 // will add an error (because we weren't able to find the
12085 // terminator) but still continue parsing so that content after the
12086 // declaration of the heredoc can be parsed.
12087 if (parser->current.end >= parser->end) {
12088 pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
12089 parser->next_start = lex_mode->as.heredoc.next_start;
12090 parser->heredoc_end = parser->current.end;
12091 lex_state_set(parser, PM_LEX_STATE_END);
12092 lex_mode_pop(parser);
12093 LEX(PM_TOKEN_HEREDOC_END);
12094 }
12095
12096 const uint8_t *ident_start = heredoc_lex_mode->ident_start;
12097 size_t ident_length = heredoc_lex_mode->ident_length;
12098
12099 // If we are immediately following a newline and we have hit the
12100 // terminator, then we need to return the ending of the heredoc.
12101 if (current_token_starts_line(parser)) {
12102 const uint8_t *start = parser->current.start;
12103
12104 if (!line_continuation && (start + ident_length <= parser->end)) {
12105 const uint8_t *newline = next_newline(start, parser->end - start);
12106 const uint8_t *ident_end = newline;
12107 const uint8_t *terminator_end = newline;
12108
12109 if (newline == NULL) {
12110 terminator_end = parser->end;
12111 ident_end = parser->end;
12112 } else {
12113 terminator_end++;
12114 if (newline[-1] == '\r') {
12115 ident_end--; // Remove \r
12116 }
12117 }
12118
12119 const uint8_t *terminator_start = ident_end - ident_length;
12120 const uint8_t *cursor = start;
12121
12122 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12123 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12124 cursor++;
12125 }
12126 }
12127
12128 if (
12129 (cursor == terminator_start) &&
12130 (memcmp(terminator_start, ident_start, ident_length) == 0)
12131 ) {
12132 if (newline != NULL) {
12133 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
12134 }
12135
12136 parser->current.end = terminator_end;
12137 if (*lex_mode->as.heredoc.next_start == '\\') {
12138 parser->next_start = NULL;
12139 } else {
12140 parser->next_start = lex_mode->as.heredoc.next_start;
12141 parser->heredoc_end = parser->current.end;
12142 }
12143
12144 lex_state_set(parser, PM_LEX_STATE_END);
12145 lex_mode_pop(parser);
12146 LEX(PM_TOKEN_HEREDOC_END);
12147 }
12148 }
12149
12150 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
12151 if (
12152 heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
12153 lex_mode->as.heredoc.common_whitespace != NULL &&
12154 (*lex_mode->as.heredoc.common_whitespace > whitespace) &&
12155 peek_at(parser, start) != '\n'
12156 ) {
12157 *lex_mode->as.heredoc.common_whitespace = whitespace;
12158 }
12159 }
12160
12161 // Otherwise we'll be parsing string content. These are the places
12162 // where we need to split up the content of the heredoc. We'll use
12163 // strpbrk to find the first of these characters.
12164 uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE] = "\r\n\\#";
12165
12166 pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
12167 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12168 breakpoints[3] = '\0';
12169 }
12170
12171 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12172 pm_token_buffer_t token_buffer = { 0 };
12173 bool was_line_continuation = false;
12174
12175 while (breakpoint != NULL) {
12176 switch (*breakpoint) {
12177 case '\0':
12178 // Skip directly past the null character.
12179 parser->current.end = breakpoint + 1;
12180 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12181 break;
12182 case '\r':
12183 parser->current.end = breakpoint + 1;
12184
12185 if (peek_at(parser, breakpoint + 1) != '\n') {
12186 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12187 break;
12188 }
12189
12190 // If we hit a \r\n sequence, then we want to replace it
12191 // with a single \n character in the final string.
12192 breakpoint++;
12193 pm_token_buffer_escape(parser, &token_buffer);
12194 token_buffer.cursor = breakpoint;
12195
12197 case '\n': {
12198 if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
12199 parser_flush_heredoc_end(parser);
12200 parser->current.end = breakpoint + 1;
12201 pm_token_buffer_flush(parser, &token_buffer);
12202 LEX(PM_TOKEN_STRING_CONTENT);
12203 }
12204
12205 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1));
12206
12207 // If we have a - or ~ heredoc, then we can match after
12208 // some leading whitespace.
12209 const uint8_t *start = breakpoint + 1;
12210
12211 if (!was_line_continuation && (start + ident_length <= parser->end)) {
12212 // We want to match the terminator starting from the end of the line in case
12213 // there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
12214 const uint8_t *newline = next_newline(start, parser->end - start);
12215
12216 if (newline == NULL) {
12217 newline = parser->end;
12218 } else if (newline[-1] == '\r') {
12219 newline--; // Remove \r
12220 }
12221
12222 // Start of a possible terminator.
12223 const uint8_t *terminator_start = newline - ident_length;
12224
12225 // Cursor to check for the leading whitespace. We skip the
12226 // leading whitespace if we have a - or ~ heredoc.
12227 const uint8_t *cursor = start;
12228
12229 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12230 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12231 cursor++;
12232 }
12233 }
12234
12235 if (
12236 cursor == terminator_start &&
12237 (memcmp(terminator_start, ident_start, ident_length) == 0)
12238 ) {
12239 parser->current.end = breakpoint + 1;
12240 pm_token_buffer_flush(parser, &token_buffer);
12241 LEX(PM_TOKEN_STRING_CONTENT);
12242 }
12243 }
12244
12245 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
12246
12247 // If we have hit a newline that is followed by a valid
12248 // terminator, then we need to return the content of the
12249 // heredoc here as string content. Then, the next time a
12250 // token is lexed, it will match again and return the
12251 // end of the heredoc.
12252 if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
12253 if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
12254 *lex_mode->as.heredoc.common_whitespace = whitespace;
12255 }
12256
12257 parser->current.end = breakpoint + 1;
12258 pm_token_buffer_flush(parser, &token_buffer);
12259 LEX(PM_TOKEN_STRING_CONTENT);
12260 }
12261
12262 // Otherwise we hit a newline and it wasn't followed by
12263 // a terminator, so we can continue parsing.
12264 parser->current.end = breakpoint + 1;
12265 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12266 break;
12267 }
12268 case '\\': {
12269 // If we hit an escape, then we need to skip past
12270 // however many characters the escape takes up. However
12271 // it's important that if \n or \r\n are escaped, we
12272 // stop looping before the newline and not after the
12273 // newline so that we can still potentially find the
12274 // terminator of the heredoc.
12275 parser->current.end = breakpoint + 1;
12276
12277 // If we've hit the end of the file, then break out of
12278 // the loop by setting the breakpoint to NULL.
12279 if (parser->current.end == parser->end) {
12280 breakpoint = NULL;
12281 continue;
12282 }
12283
12284 pm_token_buffer_escape(parser, &token_buffer);
12285 uint8_t peeked = peek(parser);
12286
12287 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12288 switch (peeked) {
12289 case '\r':
12290 parser->current.end++;
12291 if (peek(parser) != '\n') {
12292 pm_token_buffer_push_byte(&token_buffer, '\\');
12293 pm_token_buffer_push_byte(&token_buffer, '\r');
12294 break;
12295 }
12297 case '\n':
12298 pm_token_buffer_push_byte(&token_buffer, '\\');
12299 pm_token_buffer_push_byte(&token_buffer, '\n');
12300 token_buffer.cursor = parser->current.end + 1;
12301 breakpoint = parser->current.end;
12302 continue;
12303 default:
12304 pm_token_buffer_push_byte(&token_buffer, '\\');
12305 pm_token_buffer_push_escaped(&token_buffer, parser);
12306 break;
12307 }
12308 } else {
12309 switch (peeked) {
12310 case '\r':
12311 parser->current.end++;
12312 if (peek(parser) != '\n') {
12313 pm_token_buffer_push_byte(&token_buffer, '\r');
12314 break;
12315 }
12317 case '\n':
12318 // If we are in a tilde here, we should
12319 // break out of the loop and return the
12320 // string content.
12321 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12322 const uint8_t *end = parser->current.end;
12323
12324 if (parser->heredoc_end == NULL) {
12325 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(end - parser->start + 1));
12326 }
12327
12328 // Here we want the buffer to only
12329 // include up to the backslash.
12330 parser->current.end = breakpoint;
12331 pm_token_buffer_flush(parser, &token_buffer);
12332
12333 // Now we can advance the end of the
12334 // token past the newline.
12335 parser->current.end = end + 1;
12336 lex_mode->as.heredoc.line_continuation = true;
12337 LEX(PM_TOKEN_STRING_CONTENT);
12338 }
12339
12340 was_line_continuation = true;
12341 token_buffer.cursor = parser->current.end + 1;
12342 breakpoint = parser->current.end;
12343 continue;
12344 default:
12345 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12346 break;
12347 }
12348 }
12349
12350 token_buffer.cursor = parser->current.end;
12351 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12352 break;
12353 }
12354 case '#': {
12355 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12356
12357 if (!type) {
12358 // If we haven't returned at this point then we had
12359 // something that looked like an interpolated class
12360 // or instance variable like "#@" but wasn't
12361 // actually. In this case we'll just skip to the
12362 // next breakpoint.
12363 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12364 break;
12365 }
12366
12367 if (type == PM_TOKEN_STRING_CONTENT) {
12368 pm_token_buffer_flush(parser, &token_buffer);
12369 }
12370
12371 LEX(type);
12372 }
12373 default:
12374 assert(false && "unreachable");
12375 }
12376
12377 was_line_continuation = false;
12378 }
12379
12380 if (parser->current.end > parser->current.start) {
12381 parser->current.end = parser->end;
12382 pm_token_buffer_flush(parser, &token_buffer);
12383 LEX(PM_TOKEN_STRING_CONTENT);
12384 }
12385
12386 // If we've hit the end of the string, then this is an unterminated
12387 // heredoc. In that case we'll return a string content token.
12388 parser->current.end = parser->end;
12389 pm_token_buffer_flush(parser, &token_buffer);
12390 LEX(PM_TOKEN_STRING_CONTENT);
12391 }
12392 }
12393
12394 assert(false && "unreachable");
12395}
12396
12397#undef LEX
12398
12399/******************************************************************************/
12400/* Parse functions */
12401/******************************************************************************/
12402
12411typedef enum {
12412 PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
12413 PM_BINDING_POWER_STATEMENT = 2,
12414 PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
12415 PM_BINDING_POWER_MODIFIER = 6, // if unless until while
12416 PM_BINDING_POWER_COMPOSITION = 8, // and or
12417 PM_BINDING_POWER_NOT = 10, // not
12418 PM_BINDING_POWER_MATCH = 12, // => in
12419 PM_BINDING_POWER_DEFINED = 14, // defined?
12420 PM_BINDING_POWER_MULTI_ASSIGNMENT = 16, // =
12421 PM_BINDING_POWER_ASSIGNMENT = 18, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
12422 PM_BINDING_POWER_TERNARY = 20, // ?:
12423 PM_BINDING_POWER_RANGE = 22, // .. ...
12424 PM_BINDING_POWER_LOGICAL_OR = 24, // ||
12425 PM_BINDING_POWER_LOGICAL_AND = 26, // &&
12426 PM_BINDING_POWER_EQUALITY = 28, // <=> == === != =~ !~
12427 PM_BINDING_POWER_COMPARISON = 30, // > >= < <=
12428 PM_BINDING_POWER_BITWISE_OR = 32, // | ^
12429 PM_BINDING_POWER_BITWISE_AND = 34, // &
12430 PM_BINDING_POWER_SHIFT = 36, // << >>
12431 PM_BINDING_POWER_TERM = 38, // + -
12432 PM_BINDING_POWER_FACTOR = 40, // * / %
12433 PM_BINDING_POWER_UMINUS = 42, // -@
12434 PM_BINDING_POWER_EXPONENT = 44, // **
12435 PM_BINDING_POWER_UNARY = 46, // ! ~ +@
12436 PM_BINDING_POWER_INDEX = 48, // [] []=
12437 PM_BINDING_POWER_CALL = 50, // :: .
12438 PM_BINDING_POWER_MAX = 52
12439} pm_binding_power_t;
12440
12445typedef struct {
12447 pm_binding_power_t left;
12448
12450 pm_binding_power_t right;
12451
12454
12461
12462#define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
12463#define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
12464#define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
12465#define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
12466#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
12467
12468pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
12469 // rescue
12470 [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = { PM_BINDING_POWER_MODIFIER_RESCUE, PM_BINDING_POWER_COMPOSITION, true, false },
12471
12472 // if unless until while
12473 [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12474 [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12475 [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12476 [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12477
12478 // and or
12479 [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12480 [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12481
12482 // => in
12483 [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12484 [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12485
12486 // &&= &= ^= = >>= <<= -= %= |= ||= += /= *= **=
12487 [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12488 [PM_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12489 [PM_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
12490 [PM_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT,
12491 [PM_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT,
12492 [PM_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12493 [PM_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12494 [PM_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT,
12495 [PM_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12496 [PM_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12497 [PM_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12498 [PM_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT,
12499 [PM_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12500 [PM_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12501
12502 // ?:
12503 [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
12504
12505 // .. ...
12506 [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12507 [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12508 [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12509 [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12510
12511 // ||
12512 [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
12513
12514 // &&
12515 [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
12516
12517 // != !~ == === =~ <=>
12518 [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12519 [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12520 [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12521 [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12522 [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12523 [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12524
12525 // > >= < <=
12526 [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12527 [PM_TOKEN_GREATER_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12528 [PM_TOKEN_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12529 [PM_TOKEN_LESS_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12530
12531 // ^ |
12532 [PM_TOKEN_CARET] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12533 [PM_TOKEN_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12534
12535 // &
12536 [PM_TOKEN_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_AND),
12537
12538 // >> <<
12539 [PM_TOKEN_GREATER_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12540 [PM_TOKEN_LESS_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12541
12542 // - +
12543 [PM_TOKEN_MINUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12544 [PM_TOKEN_PLUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12545
12546 // % / *
12547 [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12548 [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12549 [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12550 [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
12551
12552 // -@
12553 [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
12554 [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false, false },
12555
12556 // **
12557 [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
12558 [PM_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12559
12560 // ! ~ +@
12561 [PM_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12562 [PM_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12563 [PM_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12564
12565 // [
12566 [PM_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_INDEX),
12567
12568 // :: . &.
12569 [PM_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
12570 [PM_TOKEN_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
12571 [PM_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL)
12572};
12573
12574#undef BINDING_POWER_ASSIGNMENT
12575#undef LEFT_ASSOCIATIVE
12576#undef RIGHT_ASSOCIATIVE
12577#undef RIGHT_ASSOCIATIVE_UNARY
12578
12582static PRISM_INLINE bool
12583match1(const pm_parser_t *parser, pm_token_type_t type) {
12584 return parser->current.type == type;
12585}
12586
12590static PRISM_INLINE bool
12591match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
12592 return match1(parser, type1) || match1(parser, type2);
12593}
12594
12598static PRISM_INLINE bool
12599match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
12600 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
12601}
12602
12606static PRISM_INLINE bool
12607match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
12608 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
12609}
12610
12614static PRISM_INLINE bool
12615match6(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6) {
12616 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6);
12617}
12618
12622static PRISM_INLINE bool
12623match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
12624 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
12625}
12626
12630static PRISM_INLINE bool
12631match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
12632 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
12633}
12634
12641static bool
12642accept1(pm_parser_t *parser, pm_token_type_t type) {
12643 if (match1(parser, type)) {
12644 parser_lex(parser);
12645 return true;
12646 }
12647 return false;
12648}
12649
12654static PRISM_INLINE bool
12655accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
12656 if (match2(parser, type1, type2)) {
12657 parser_lex(parser);
12658 return true;
12659 }
12660 return false;
12661}
12662
12674static void
12675expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
12676 if (accept1(parser, type)) return;
12677
12678 const uint8_t *location = parser->previous.end;
12679 pm_parser_err(parser, U32(location - parser->start), 0, diag_id);
12680
12681 parser->previous.start = location;
12682 parser->previous.type = 0;
12683}
12684
12689static void
12690expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_diagnostic_id_t diag_id) {
12691 if (accept2(parser, type1, type2)) return;
12692
12693 const uint8_t *location = parser->previous.end;
12694 pm_parser_err(parser, U32(location - parser->start), 0, diag_id);
12695
12696 parser->previous.start = location;
12697 parser->previous.type = 0;
12698}
12699
12704static void
12705expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
12706 if (match1(parser, PM_TOKEN_HEREDOC_END)) {
12707 parser_lex(parser);
12708 } else {
12709 pm_parser_err_heredoc_term(parser, ident_start, ident_length);
12710 parser->previous.start = parser->previous.end;
12711 parser->previous.type = 0;
12712 }
12713}
12714
12721static void
12722expect1_opening(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id, const pm_token_t *opening) {
12723 if (accept1(parser, type)) return;
12724
12725 const uint8_t *start = opening->start;
12726 pm_parser_err(parser, U32(start - parser->start), U32(opening->end - start), diag_id);
12727
12728 parser->previous.start = parser->previous.end;
12729 parser->previous.type = 0;
12730}
12731
12733#define PM_PARSE_ACCEPTS_COMMAND_CALL ((uint8_t) 0x1)
12734#define PM_PARSE_ACCEPTS_LABEL ((uint8_t) 0x2)
12735#define PM_PARSE_ACCEPTS_DO_BLOCK ((uint8_t) 0x4)
12736#define PM_PARSE_IN_ENDLESS_DEF ((uint8_t) 0x8)
12737
12738static pm_node_t *
12739parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
12740
12745static pm_node_t *
12746parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
12747 pm_node_t *node = parse_expression(parser, binding_power, flags, diag_id, depth);
12748 pm_assert_value_expression(parser, node);
12749 return node;
12750}
12751
12770static PRISM_INLINE bool
12771token_begins_expression_p(pm_token_type_t type) {
12772 switch (type) {
12773 case PM_TOKEN_EQUAL_GREATER:
12774 case PM_TOKEN_KEYWORD_IN:
12775 // We need to special case this because it is a binary operator that
12776 // should not be marked as beginning an expression.
12777 return false;
12778 case PM_TOKEN_BRACE_RIGHT:
12779 case PM_TOKEN_BRACKET_RIGHT:
12780 case PM_TOKEN_COLON:
12781 case PM_TOKEN_COMMA:
12782 case PM_TOKEN_EMBEXPR_END:
12783 case PM_TOKEN_EOF:
12784 case PM_TOKEN_LAMBDA_BEGIN:
12785 case PM_TOKEN_KEYWORD_DO:
12786 case PM_TOKEN_KEYWORD_DO_BLOCK:
12787 case PM_TOKEN_KEYWORD_DO_LOOP:
12788 case PM_TOKEN_KEYWORD_END:
12789 case PM_TOKEN_KEYWORD_ELSE:
12790 case PM_TOKEN_KEYWORD_ELSIF:
12791 case PM_TOKEN_KEYWORD_ENSURE:
12792 case PM_TOKEN_KEYWORD_THEN:
12793 case PM_TOKEN_KEYWORD_RESCUE:
12794 case PM_TOKEN_KEYWORD_WHEN:
12795 case PM_TOKEN_NEWLINE:
12796 case PM_TOKEN_PARENTHESIS_RIGHT:
12797 case PM_TOKEN_SEMICOLON:
12798 // The reason we need this short-circuit is because we're using the
12799 // binding powers table to tell us if the subsequent token could
12800 // potentially be the start of an expression. If there _is_ a binding
12801 // power for one of these tokens, then we should remove it from this list
12802 // and let it be handled by the default case below.
12803 assert(pm_binding_powers[type].left == PM_BINDING_POWER_UNSET);
12804 return false;
12805 case PM_TOKEN_UAMPERSAND:
12806 // This is a special case because this unary operator cannot appear
12807 // as a general operator, it only appears in certain circumstances.
12808 return false;
12809 case PM_TOKEN_UCOLON_COLON:
12810 case PM_TOKEN_UMINUS:
12811 case PM_TOKEN_UMINUS_NUM:
12812 case PM_TOKEN_UPLUS:
12813 case PM_TOKEN_BANG:
12814 case PM_TOKEN_TILDE:
12815 case PM_TOKEN_UDOT_DOT:
12816 case PM_TOKEN_UDOT_DOT_DOT:
12817 // These unary tokens actually do have binding power associated with them
12818 // so that we can correctly place them into the precedence order. But we
12819 // want them to be marked as beginning an expression, so we need to
12820 // special case them here.
12821 return true;
12822 default:
12823 return pm_binding_powers[type].left == PM_BINDING_POWER_UNSET;
12824 }
12825}
12826
12831static pm_node_t *
12832parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
12833 if (accept1(parser, PM_TOKEN_USTAR)) {
12834 pm_token_t operator = parser->previous;
12835 pm_node_t *expression = parse_value_expression(parser, binding_power, (uint8_t) (flags & PM_PARSE_ACCEPTS_DO_BLOCK), PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
12836 return UP(pm_splat_node_create(parser, &operator, expression));
12837 }
12838
12839 return parse_value_expression(parser, binding_power, flags, diag_id, depth);
12840}
12841
12842static bool
12843pm_node_unreference_each(const pm_node_t *node, void *data) {
12844 switch (PM_NODE_TYPE(node)) {
12845 /* When we are about to destroy a set of nodes that could potentially
12846 * contain block exits for the current scope, we need to check if they
12847 * are contained in the list of block exits and remove them if they are.
12848 */
12849 case PM_BREAK_NODE:
12850 case PM_NEXT_NODE:
12851 case PM_REDO_NODE: {
12852 pm_parser_t *parser = (pm_parser_t *) data;
12853 size_t index = 0;
12854
12855 while (index < parser->current_block_exits->size) {
12856 pm_node_t *block_exit = parser->current_block_exits->nodes[index];
12857
12858 if (block_exit == node) {
12859 if (index + 1 < parser->current_block_exits->size) {
12860 memmove(
12861 &parser->current_block_exits->nodes[index],
12862 &parser->current_block_exits->nodes[index + 1],
12863 (parser->current_block_exits->size - index - 1) * sizeof(pm_node_t *)
12864 );
12865 }
12866 parser->current_block_exits->size--;
12867
12868 /* Note returning true here because these nodes could have
12869 * arguments that are themselves block exits. */
12870 return true;
12871 }
12872
12873 index++;
12874 }
12875
12876 return true;
12877 }
12878 /* When an implicit local variable is written to or targeted, it becomes
12879 * a regular, named local variable. This branch removes it from the list
12880 * of implicit parameters when that happens. */
12881 case PM_LOCAL_VARIABLE_READ_NODE:
12882 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12883 pm_parser_t *parser = (pm_parser_t *) data;
12884 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
12885
12886 for (size_t index = 0; index < implicit_parameters->size; index++) {
12887 if (implicit_parameters->nodes[index] == node) {
12888 /* If the node is not the last one in the list, we need to
12889 * shift the remaining nodes down to fill the gap. This is
12890 * extremely unlikely to happen. */
12891 if (index != implicit_parameters->size - 1) {
12892 memmove(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
12893 }
12894
12895 implicit_parameters->size--;
12896 break;
12897 }
12898 }
12899
12900 return false;
12901 }
12902 default:
12903 return true;
12904 }
12905}
12906
12912static void
12913pm_node_unreference(pm_parser_t *parser, const pm_node_t *node) {
12914 pm_visit_node(node, pm_node_unreference_each, parser);
12915}
12916
12921static void
12922parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
12923 // The method name needs to change. If we previously had
12924 // foo, we now need foo=. In this case we'll allocate a new
12925 // owned string, copy the previous method name in, and
12926 // append an =.
12927 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
12928 size_t length = constant->length;
12929 uint8_t *name = (uint8_t *) pm_arena_alloc(parser->arena, length + 1, 1);
12930
12931 memcpy(name, constant->start, length);
12932 name[length] = '=';
12933
12934 *name_field = pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, name, length + 1);
12935}
12936
12943static pm_node_t *
12944parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
12945 switch (PM_NODE_TYPE(target)) {
12946 case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
12947 case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
12948 case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
12949 case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
12950 case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
12951 case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
12952 case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
12953 default: break;
12954 }
12955
12956 pm_constant_id_t name = pm_parser_constant_id_raw(parser, parser->start + PM_NODE_START(target), parser->start + PM_NODE_END(target));
12957 pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
12958
12959 return UP(result);
12960}
12961
12970static pm_node_t *
12971parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
12972 switch (PM_NODE_TYPE(target)) {
12973 case PM_ERROR_RECOVERY_NODE:
12974 return target;
12975 case PM_SOURCE_ENCODING_NODE:
12976 case PM_FALSE_NODE:
12977 case PM_SOURCE_FILE_NODE:
12978 case PM_SOURCE_LINE_NODE:
12979 case PM_NIL_NODE:
12980 case PM_SELF_NODE:
12981 case PM_TRUE_NODE: {
12982 // In these special cases, we have specific error messages and we
12983 // will replace them with local variable writes.
12984 return parse_unwriteable_target(parser, target);
12985 }
12986 case PM_CLASS_VARIABLE_READ_NODE:
12988 target->type = PM_CLASS_VARIABLE_TARGET_NODE;
12989 return target;
12990 case PM_CONSTANT_PATH_NODE:
12991 if (context_def_p(parser)) {
12992 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
12993 }
12994
12996 target->type = PM_CONSTANT_PATH_TARGET_NODE;
12997
12998 return target;
12999 case PM_CONSTANT_READ_NODE:
13000 if (context_def_p(parser)) {
13001 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13002 }
13003
13004 assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
13005 target->type = PM_CONSTANT_TARGET_NODE;
13006
13007 return target;
13008 case PM_BACK_REFERENCE_READ_NODE:
13009 case PM_NUMBERED_REFERENCE_READ_NODE:
13010 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13011 return UP(pm_error_recovery_node_create_unexpected(parser, target));
13012 case PM_GLOBAL_VARIABLE_READ_NODE:
13014 target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
13015 return target;
13016 case PM_LOCAL_VARIABLE_READ_NODE: {
13017 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(target), PM_NODE_LENGTH(target))) {
13018 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(target), PM_NODE_LENGTH(target), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(target));
13019 pm_node_unreference(parser, target);
13020 }
13021
13022 const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
13023 uint32_t name = cast->name;
13024 uint32_t depth = cast->depth;
13025 pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
13026
13028 target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
13029
13030 return target;
13031 }
13032 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
13033 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13034 pm_node_t *node = UP(pm_local_variable_target_node_create(parser, &target->location, name, 0));
13035
13036 pm_node_unreference(parser, target);
13037
13038 return node;
13039 }
13040 case PM_INSTANCE_VARIABLE_READ_NODE:
13042 target->type = PM_INSTANCE_VARIABLE_TARGET_NODE;
13043 return target;
13044 case PM_MULTI_TARGET_NODE:
13045 if (splat_parent) {
13046 // Multi target is not accepted in all positions. If this is one
13047 // of them, then we need to add an error.
13048 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13049 }
13050
13051 return target;
13052 case PM_SPLAT_NODE: {
13053 pm_splat_node_t *splat = (pm_splat_node_t *) target;
13054
13055 if (splat->expression != NULL) {
13056 splat->expression = parse_target(parser, splat->expression, multiple, true);
13057 }
13058
13059 return UP(splat);
13060 }
13061 case PM_CALL_NODE: {
13062 pm_call_node_t *call = (pm_call_node_t *) target;
13063
13064 // If we have no arguments to the call node and we need this to be a
13065 // target then this is either a method call or a local variable
13066 // write.
13067 if (
13068 (call->message_loc.length > 0) &&
13069 (parser->start[call->message_loc.start + call->message_loc.length - 1] != '!') &&
13070 (parser->start[call->message_loc.start + call->message_loc.length - 1] != '?') &&
13071 (call->opening_loc.length == 0) &&
13072 (call->arguments == NULL) &&
13073 (call->block == NULL)
13074 ) {
13075 if (call->receiver == NULL) {
13076 // When we get here, we have a local variable write, because it
13077 // was previously marked as a method call but now we have an =.
13078 // This looks like:
13079 //
13080 // foo = 1
13081 //
13082 // When it was parsed in the prefix position, foo was seen as a
13083 // method call with no receiver and no arguments. Now we have an
13084 // =, so we know it's a local variable write.
13085 pm_location_t message_loc = call->message_loc;
13086 pm_constant_id_t name = pm_parser_local_add_location(parser, &message_loc, 0);
13087
13088 return UP(pm_local_variable_target_node_create(parser, &message_loc, name, 0));
13089 }
13090
13091 if (peek_at(parser, parser->start + call->message_loc.start) == '_' || parser->encoding->alnum_char(parser->start + call->message_loc.start, (ptrdiff_t) call->message_loc.length)) {
13092 if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
13093 pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
13094 }
13095
13096 parse_write_name(parser, &call->name);
13097 return UP(pm_call_target_node_create(parser, call));
13098 }
13099 }
13100
13101 // If there is no call operator and the message is "[]" then this is
13102 // an aref expression, and we can transform it into an aset
13103 // expression.
13104 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13105 return UP(pm_index_target_node_create(parser, call));
13106 }
13107 }
13109 default:
13110 // In this case we have a node that we don't know how to convert
13111 // into a target. We need to treat it as an error. For now, we'll
13112 // mark it as an error and just skip right past it.
13113 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13114 return target;
13115 }
13116}
13117
13122static pm_node_t *
13123parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13124 pm_node_t *result = parse_target(parser, target, multiple, false);
13125
13126 // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
13127 // parens after the targets.
13128 if (
13129 !match1(parser, PM_TOKEN_EQUAL) &&
13130 !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
13131 !(context_p(parser, PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
13132 ) {
13133 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13134 }
13135
13136 return result;
13137}
13138
13143static pm_node_t *
13144parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
13145 pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
13146
13147 if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
13148 return UP(pm_shareable_constant_node_create(parser, write, shareable_constant));
13149 }
13150
13151 return write;
13152}
13153
13157static pm_node_t *
13158parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
13159 switch (PM_NODE_TYPE(target)) {
13160 case PM_ERROR_RECOVERY_NODE:
13161 return target;
13162 case PM_CLASS_VARIABLE_READ_NODE: {
13163 pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
13164 return UP(node);
13165 }
13166 case PM_CONSTANT_PATH_NODE: {
13167 pm_node_t *node = UP(pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value));
13168
13169 if (context_def_p(parser)) {
13170 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13171 }
13172
13173 return parse_shareable_constant_write(parser, node);
13174 }
13175 case PM_CONSTANT_READ_NODE: {
13176 pm_node_t *node = UP(pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value));
13177
13178 if (context_def_p(parser)) {
13179 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13180 }
13181
13182 return parse_shareable_constant_write(parser, node);
13183 }
13184 case PM_BACK_REFERENCE_READ_NODE:
13185 case PM_NUMBERED_REFERENCE_READ_NODE:
13186 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13188 case PM_GLOBAL_VARIABLE_READ_NODE: {
13189 pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
13190 return UP(node);
13191 }
13192 case PM_LOCAL_VARIABLE_READ_NODE: {
13194
13195 pm_location_t location = target->location;
13196 pm_constant_id_t name = local_read->name;
13197 uint32_t depth = local_read->depth;
13198 pm_scope_t *scope = pm_parser_scope_find(parser, depth);
13199
13200 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(target), PM_NODE_LENGTH(target))) {
13201 pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
13202 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(target), PM_NODE_LENGTH(target), diag_id, parser->start + PM_NODE_START(target));
13203 pm_node_unreference(parser, target);
13204 }
13205
13206 pm_locals_unread(&scope->locals, name);
13207
13208 return UP(pm_local_variable_write_node_create(parser, name, depth, value, &location, operator));
13209 }
13210 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
13211 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13212 pm_node_t *node = UP(pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator));
13213
13214 pm_node_unreference(parser, target);
13215
13216 return node;
13217 }
13218 case PM_INSTANCE_VARIABLE_READ_NODE: {
13219 pm_node_t *write_node = UP(pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value));
13220 return write_node;
13221 }
13222 case PM_MULTI_TARGET_NODE:
13223 return UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value));
13224 case PM_SPLAT_NODE: {
13225 pm_splat_node_t *splat = (pm_splat_node_t *) target;
13226
13227 if (splat->expression != NULL) {
13228 splat->expression = parse_write(parser, splat->expression, operator, value);
13229 }
13230
13231 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
13232 pm_multi_target_node_targets_append(parser, multi_target, UP(splat));
13233
13234 return UP(pm_multi_write_node_create(parser, multi_target, operator, value));
13235 }
13236 case PM_CALL_NODE: {
13237 pm_call_node_t *call = (pm_call_node_t *) target;
13238
13239 // If we have no arguments to the call node and we need this to be a
13240 // target then this is either a method call or a local variable
13241 // write.
13242 if (
13243 (call->message_loc.length > 0) &&
13244 (parser->start[call->message_loc.start + call->message_loc.length - 1] != '!') &&
13245 (parser->start[call->message_loc.start + call->message_loc.length - 1] != '?') &&
13246 (call->opening_loc.length == 0) &&
13247 (call->arguments == NULL) &&
13248 (call->block == NULL)
13249 ) {
13250 if (call->receiver == NULL) {
13251 // When we get here, we have a local variable write, because it
13252 // was previously marked as a method call but now we have an =.
13253 // This looks like:
13254 //
13255 // foo = 1
13256 //
13257 // When it was parsed in the prefix position, foo was seen as a
13258 // method call with no receiver and no arguments. Now we have an
13259 // =, so we know it's a local variable write.
13260 pm_location_t message_loc = call->message_loc;
13261
13262 pm_refute_numbered_parameter(parser, message_loc.start, message_loc.length);
13263 pm_parser_local_add_location(parser, &message_loc, 0);
13264
13265 pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, parser->start + PM_LOCATION_START(&message_loc), parser->start + PM_LOCATION_END(&message_loc));
13266 target = UP(pm_local_variable_write_node_create(parser, constant_id, 0, value, &message_loc, operator));
13267
13268 return target;
13269 }
13270
13271 if (char_is_identifier_start(parser, parser->start + call->message_loc.start, (ptrdiff_t) call->message_loc.length)) {
13272 // When we get here, we have a method call, because it was
13273 // previously marked as a method call but now we have an =. This
13274 // looks like:
13275 //
13276 // foo.bar = 1
13277 //
13278 // When it was parsed in the prefix position, foo.bar was seen as a
13279 // method call with no arguments. Now we have an =, so we know it's
13280 // a method call with an argument. In this case we will create the
13281 // arguments node, parse the argument, and add it to the list.
13282 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
13283 call->arguments = arguments;
13284
13285 pm_arguments_node_arguments_append(parser->arena, arguments, value);
13286 PM_NODE_LENGTH_SET_NODE(call, arguments);
13287 call->equal_loc = TOK2LOC(parser, operator);
13288
13289 parse_write_name(parser, &call->name);
13290 pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13291
13292 return UP(call);
13293 }
13294 }
13295
13296 // If there is no call operator and the message is "[]" then this is
13297 // an aref expression, and we can transform it into an aset
13298 // expression.
13299 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13300 if (call->arguments == NULL) {
13301 call->arguments = pm_arguments_node_create(parser);
13302 }
13303
13304 pm_arguments_node_arguments_append(parser->arena, call->arguments, value);
13305 PM_NODE_LENGTH_SET_NODE(target, value);
13306
13307 // Replace the name with "[]=".
13308 call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
13309 call->equal_loc = TOK2LOC(parser, operator);
13310
13311 // Ensure that the arguments for []= don't contain keywords
13312 pm_index_arguments_check(parser, call->arguments, call->block);
13313 pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13314
13315 return target;
13316 }
13317
13318 // If there are arguments on the call node, then it can't be a
13319 // method call ending with = or a local variable write, so it must
13320 // be a syntax error. In this case we'll fall through to our default
13321 // handling. We need to free the value that we parsed because there
13322 // is no way for us to attach it to the tree at this point.
13323 //
13324 // Since it is possible for the value to contain an implicit
13325 // parameter somewhere in its subtree, we need to walk it and remove
13326 // any implicit parameters from the list of implicit parameters for
13327 // the current scope.
13328 pm_node_unreference(parser, value);
13329 }
13331 default:
13332 // In this case we have a node that we don't know how to convert into a
13333 // target. We need to treat it as an error. For now, we'll mark it as an
13334 // error and just skip right past it.
13335 pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
13336 return target;
13337 }
13338}
13339
13346static pm_node_t *
13347parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t *equals, pm_node_t *value) {
13348 switch (PM_NODE_TYPE(target)) {
13349 case PM_SOURCE_ENCODING_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13350 case PM_FALSE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13351 case PM_SOURCE_FILE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13352 case PM_SOURCE_LINE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13353 case PM_NIL_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13354 case PM_SELF_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13355 case PM_TRUE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13356 default: break;
13357 }
13358
13359 pm_constant_id_t name = pm_parser_local_add_location(parser, &target->location, 1);
13360 pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
13361
13362 return UP(result);
13363}
13364
13375static pm_node_t *
13376parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13377 bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13378
13379 pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13380 pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
13381
13382 while (accept1(parser, PM_TOKEN_COMMA)) {
13383 if (accept1(parser, PM_TOKEN_USTAR)) {
13384 // Here we have a splat operator. It can have a name or be
13385 // anonymous. It can be the final target or be in the middle if
13386 // there haven't been any others yet.
13387 if (has_rest) {
13388 pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
13389 }
13390
13391 pm_token_t star_operator = parser->previous;
13392 pm_node_t *name = NULL;
13393
13394 if (token_begins_expression_p(parser->current.type)) {
13395 name = parse_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13396 name = parse_target(parser, name, true, true);
13397 }
13398
13399 pm_node_t *splat = UP(pm_splat_node_create(parser, &star_operator, name));
13400 pm_multi_target_node_targets_append(parser, result, splat);
13401 has_rest = true;
13402 } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13403 context_push(parser, PM_CONTEXT_MULTI_TARGET);
13404 pm_node_t *target = parse_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13405 target = parse_target(parser, target, true, false);
13406
13407 pm_multi_target_node_targets_append(parser, result, target);
13408 context_pop(parser);
13409 } else if (token_begins_expression_p(parser->current.type)) {
13410 pm_node_t *target = parse_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13411 target = parse_target(parser, target, true, false);
13412
13413 pm_multi_target_node_targets_append(parser, result, target);
13414 } else if (!match1(parser, PM_TOKEN_EOF)) {
13415 // If we get here, then we have a trailing , in a multi target node.
13416 // We'll add an implicit rest node to represent this.
13417 pm_node_t *rest = UP(pm_implicit_rest_node_create(parser, &parser->previous));
13418 pm_multi_target_node_targets_append(parser, result, rest);
13419 break;
13420 }
13421 }
13422
13423 return UP(result);
13424}
13425
13430static pm_node_t *
13431parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13432 pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
13433
13434 // If we're inside parentheses, then we allow a newline before the
13435 // closing parenthesis or equals sign. Outside of parentheses, a newline
13436 // is not allowed (e.g., `a, b\n= 1, 2` is not valid).
13437 if (context_p(parser, PM_CONTEXT_PARENS) || context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
13438 accept1(parser, PM_TOKEN_NEWLINE);
13439 }
13440
13441 // Ensure that we have either an = or a ) after the targets.
13442 if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
13443 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13444 }
13445
13446 return result;
13447}
13448
13452static pm_statements_node_t *
13453parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
13454 // First, skip past any optional terminators that might be at the beginning
13455 // of the statements.
13456 while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13457
13458 // If we have a terminator, then we can just return NULL.
13459 if (context_terminator(context, &parser->current)) return NULL;
13460
13461 pm_statements_node_t *statements = pm_statements_node_create(parser);
13462
13463 // At this point we know we have at least one statement, and that it
13464 // immediately follows the current token.
13465 context_push(parser, context);
13466
13467 while (true) {
13468 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
13469 pm_statements_node_body_append(parser, statements, node, true);
13470
13471 // If we're recovering from a syntax error, then we need to stop parsing
13472 // the statements now.
13473 if (parser->recovering) {
13474 // If this is the level of context where the recovery has happened,
13475 // then we can mark the parser as done recovering.
13476 if (context_terminator(context, &parser->current)) parser->recovering = false;
13477 break;
13478 }
13479
13480 // If we have a terminator, then we will parse all consecutive
13481 // terminators and then continue parsing the statements list.
13482 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13483 // If we have a terminator, then we will continue parsing the
13484 // statements list.
13485 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13486 if (context_terminator(context, &parser->current)) break;
13487
13488 // Now we can continue parsing the list of statements.
13489 continue;
13490 }
13491
13492 // At this point we have a list of statements that are not terminated by
13493 // a newline or semicolon. At this point we need to check if we're at
13494 // the end of the statements list. If we are, then we should break out
13495 // of the loop.
13496 if (context_terminator(context, &parser->current)) break;
13497
13498 // At this point, we have a syntax error, because the statement was not
13499 // terminated by a newline or semicolon, and we're not at the end of the
13500 // statements list. Ideally we should scan forward to determine if we
13501 // should insert a missing terminator or break out of parsing the
13502 // statements list at this point.
13503 //
13504 // We don't have that yet, so instead we'll do a more naive approach. If
13505 // we were unable to parse an expression, then we will skip past this
13506 // token and continue parsing the statements list. Otherwise we'll add
13507 // an error and continue parsing the statements list.
13508 if (PM_NODE_TYPE_P(node, PM_ERROR_RECOVERY_NODE)) {
13509 parser_lex(parser);
13510
13511 // If we are at the end of the file, then we need to stop parsing
13512 // the statements entirely at this point. Mark the parser as
13513 // recovering, as we know that EOF closes the top-level context, and
13514 // then break out of the loop.
13515 if (match1(parser, PM_TOKEN_EOF)) {
13516 parser->recovering = true;
13517 break;
13518 }
13519
13520 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13521 if (context_terminator(context, &parser->current)) break;
13522 } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
13523 // This is an inlined version of accept1 because the error that we
13524 // want to add has varargs. If this happens again, we should
13525 // probably extract a helper function.
13526 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
13527 parser->previous.start = parser->previous.end;
13528 parser->previous.type = 0;
13529 }
13530 }
13531
13532 context_pop(parser);
13533
13534 bool last_value = true;
13535 switch (context) {
13536 case PM_CONTEXT_BEGIN_ENSURE:
13537 case PM_CONTEXT_DEF_ENSURE:
13538 last_value = false;
13539 break;
13540 default:
13541 break;
13542 }
13543 pm_void_statements_check(parser, statements, last_value);
13544
13545 return statements;
13546}
13547
13552static void
13553pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13554 const pm_node_t *duplicated = pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, literals, node, true);
13555
13556 if (duplicated != NULL) {
13557 pm_buffer_t buffer = { 0 };
13558 pm_static_literal_inspect(&buffer, &parser->line_offsets, parser->start, parser->start_line, parser->encoding->name, duplicated);
13559
13560 pm_diagnostic_list_append_format(
13561 &parser->metadata_arena,
13562 &parser->warning_list,
13563 duplicated->location.start,
13564 duplicated->location.length,
13565 PM_WARN_DUPLICATED_HASH_KEY,
13566 (int) pm_buffer_length(&buffer),
13567 pm_buffer_value(&buffer),
13568 pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(node), parser->start_line).line
13569 );
13570
13571 pm_buffer_cleanup(&buffer);
13572 }
13573}
13574
13579static void
13580pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13581 pm_node_t *previous;
13582
13583 if ((previous = pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, literals, node, false)) != NULL) {
13584 pm_diagnostic_list_append_format(
13585 &parser->metadata_arena,
13586 &parser->warning_list,
13587 PM_NODE_START(node),
13588 PM_NODE_LENGTH(node),
13589 PM_WARN_DUPLICATED_WHEN_CLAUSE,
13590 pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(node), parser->start_line).line,
13591 pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(previous), parser->start_line).line
13592 );
13593 }
13594}
13595
13599static bool
13600parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node, uint16_t depth) {
13601 assert(PM_NODE_TYPE_P(node, PM_HASH_NODE) || PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE));
13602 bool contains_keyword_splat = false;
13603
13604 while (true) {
13605 pm_node_t *element;
13606
13607 switch (parser->current.type) {
13608 case PM_TOKEN_USTAR_STAR: {
13609 parser_lex(parser);
13610 pm_token_t operator = parser->previous;
13611 pm_node_t *value = NULL;
13612
13613 if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
13614 // If we're about to parse a nested hash that is being
13615 // pushed into this hash directly with **, then we want the
13616 // inner hash to share the static literals with the outer
13617 // hash.
13618 parser->current_hash_keys = literals;
13619 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
13620 } else if (token_begins_expression_p(parser->current.type)) {
13621 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
13622 } else {
13623 pm_parser_scope_forwarding_keywords_check(parser, &operator);
13624 }
13625
13626 element = UP(pm_assoc_splat_node_create(parser, value, &operator));
13627 contains_keyword_splat = true;
13628 break;
13629 }
13630 case PM_TOKEN_LABEL: {
13631 pm_token_t label = parser->current;
13632 parser_lex(parser);
13633
13634 pm_node_t *key = UP(pm_symbol_node_label_create(parser, &label));
13635 pm_hash_key_static_literals_add(parser, literals, key);
13636
13637 pm_node_t *value = NULL;
13638
13639 if (token_begins_expression_p(parser->current.type)) {
13640 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
13641 } else {
13642 if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
13643 pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
13644 value = UP(pm_constant_read_node_create(parser, &constant));
13645 } else {
13646 int depth = -1;
13647 pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
13648
13649 if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
13650 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
13651 } else {
13652 depth = pm_parser_local_depth(parser, &identifier);
13653 }
13654
13655 if (depth == -1) {
13656 value = UP(pm_call_node_variable_call_create(parser, &identifier));
13657 } else {
13658 value = UP(pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth));
13659 }
13660 }
13661
13662 value->location.length++;
13663 value = UP(pm_implicit_node_create(parser, value));
13664 }
13665
13666 element = UP(pm_assoc_node_create(parser, key, NULL, value));
13667 break;
13668 }
13669 default: {
13670 pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK | PM_PARSE_ACCEPTS_LABEL, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
13671
13672 // Hash keys that are strings are automatically frozen. We will
13673 // mark that here.
13674 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
13675 pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
13676 }
13677
13678 pm_hash_key_static_literals_add(parser, literals, key);
13679
13680 pm_token_t operator = { 0 };
13681 if (!pm_symbol_node_label_p(parser, key)) {
13682 expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
13683 operator = parser->previous;
13684 }
13685
13686 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
13687 element = UP(pm_assoc_node_create(parser, key, NTOK2PTR(operator), value));
13688 break;
13689 }
13690 }
13691
13692 if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
13693 pm_hash_node_elements_append(parser->arena, (pm_hash_node_t *) node, element);
13694 } else {
13695 pm_keyword_hash_node_elements_append(parser->arena, (pm_keyword_hash_node_t *) node, element);
13696 }
13697
13698 // If there's no comma after the element, then we're done.
13699 if (!accept1(parser, PM_TOKEN_COMMA)) break;
13700
13701 // If the next element starts with a label or a **, then we know we have
13702 // another element in the hash, so we'll continue parsing.
13703 if (match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)) continue;
13704
13705 // Otherwise we need to check if the subsequent token begins an expression.
13706 // If it does, then we'll continue parsing.
13707 if (token_begins_expression_p(parser->current.type)) continue;
13708
13709 // Otherwise by default we will exit out of this loop.
13710 break;
13711 }
13712
13713 return contains_keyword_splat;
13714}
13715
13716static PRISM_INLINE bool
13717argument_allowed_for_bare_hash(pm_parser_t *parser, pm_node_t *argument) {
13718 if (pm_symbol_node_label_p(parser, argument)) {
13719 return true;
13720 }
13721
13722 switch (PM_NODE_TYPE(argument)) {
13723 case PM_CALL_NODE: {
13724 pm_call_node_t *cast = (pm_call_node_t *) argument;
13725 if (cast->opening_loc.length == 0 && cast->arguments != NULL) {
13726 if (PM_NODE_FLAG_P(cast->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS | PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
13727 return false;
13728 }
13729 if (cast->block != NULL) {
13730 return false;
13731 }
13732 }
13733 break;
13734 }
13735 default: break;
13736 }
13737 return accept1(parser, PM_TOKEN_EQUAL_GREATER);
13738}
13739
13743static PRISM_INLINE void
13744parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
13745 if (arguments->arguments == NULL) {
13746 arguments->arguments = pm_arguments_node_create(parser);
13747 }
13748
13749 pm_arguments_node_arguments_append(parser->arena, arguments->arguments, argument);
13750}
13751
13755static void
13756parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint8_t flags, uint16_t depth) {
13757 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
13758
13759 // First we need to check if the next token is one that could be the start
13760 // of an argument. If it's not, then we can just return.
13761 if (
13762 match2(parser, terminator, PM_TOKEN_EOF) ||
13763 (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
13764 context_terminator(parser->current_context->context, &parser->current)
13765 ) {
13766 return;
13767 }
13768
13769 bool parsed_first_argument = false;
13770 bool parsed_bare_hash = false;
13771 bool parsed_block_argument = false;
13772 bool parsed_forwarding_arguments = false;
13773
13774 while (!match1(parser, PM_TOKEN_EOF)) {
13775 if (parsed_forwarding_arguments) {
13776 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
13777 }
13778
13779 pm_node_t *argument = NULL;
13780
13781 switch (parser->current.type) {
13782 case PM_TOKEN_USTAR_STAR:
13783 case PM_TOKEN_LABEL: {
13784 if (parsed_bare_hash) {
13785 pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
13786 }
13787
13788 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
13789 argument = UP(hash);
13790
13791 pm_static_literals_t hash_keys = { 0 };
13792 bool contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(hash), (uint16_t) (depth + 1));
13793
13794 parse_arguments_append(parser, arguments, argument);
13795
13796 pm_node_flags_t node_flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13797 if (contains_keyword_splat) node_flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13798 pm_node_flag_set(UP(arguments->arguments), node_flags);
13799
13800 pm_static_literals_free(&hash_keys);
13801 parsed_bare_hash = true;
13802
13803 break;
13804 }
13805 case PM_TOKEN_UAMPERSAND: {
13806 parser_lex(parser);
13807 pm_token_t operator = parser->previous;
13808 pm_node_t *expression = NULL;
13809
13810 if (token_begins_expression_p(parser->current.type)) {
13811 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
13812 } else {
13813 pm_parser_scope_forwarding_block_check(parser, &operator);
13814 }
13815
13816 argument = UP(pm_block_argument_node_create(parser, &operator, expression));
13817 if (parsed_block_argument) {
13818 parse_arguments_append(parser, arguments, argument);
13819 } else {
13820 arguments->block = argument;
13821 }
13822
13823 if (match1(parser, PM_TOKEN_COMMA)) {
13824 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
13825 }
13826
13827 parsed_block_argument = true;
13828 break;
13829 }
13830 case PM_TOKEN_USTAR: {
13831 parser_lex(parser);
13832 pm_token_t operator = parser->previous;
13833
13834 if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
13835 pm_parser_scope_forwarding_positionals_check(parser, &operator);
13836 argument = UP(pm_splat_node_create(parser, &operator, NULL));
13837 if (parsed_bare_hash) {
13838 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
13839 }
13840 } else {
13841 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
13842
13843 if (parsed_bare_hash) {
13844 pm_parser_err(parser, PM_TOKEN_START(parser, &operator), PM_NODE_END(expression) - PM_TOKEN_START(parser, &operator), PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
13845 }
13846
13847 argument = UP(pm_splat_node_create(parser, &operator, expression));
13848 }
13849
13850 parse_arguments_append(parser, arguments, argument);
13851 break;
13852 }
13853 case PM_TOKEN_UDOT_DOT_DOT: {
13854 if (accepts_forwarding) {
13855 parser_lex(parser);
13856
13857 if (token_begins_expression_p(parser->current.type)) {
13858 // If the token begins an expression then this ... was
13859 // not actually argument forwarding but was instead a
13860 // range.
13861 pm_token_t operator = parser->previous;
13862 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
13863
13864 // If we parse a range, we need to validate that we
13865 // didn't accidentally violate the nonassoc rules of the
13866 // ... operator.
13867 if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
13868 pm_range_node_t *range = (pm_range_node_t *) right;
13869 pm_parser_err(parser, range->operator_loc.start, range->operator_loc.length, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
13870 }
13871
13872 argument = UP(pm_range_node_create(parser, NULL, &operator, right));
13873 } else {
13874 pm_parser_scope_forwarding_all_check(parser, &parser->previous);
13875 if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
13876 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
13877 }
13878
13879 argument = UP(pm_forwarding_arguments_node_create(parser, &parser->previous));
13880 parse_arguments_append(parser, arguments, argument);
13881 pm_node_flag_set(UP(arguments->arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
13882 arguments->has_forwarding = true;
13883 parsed_forwarding_arguments = true;
13884 break;
13885 }
13886 }
13887 }
13889 default: {
13890 if (argument == NULL) {
13891 argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (!parsed_first_argument ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0u) | PM_PARSE_ACCEPTS_LABEL), PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
13892 }
13893
13894 bool contains_keywords = false;
13895 bool contains_keyword_splat = false;
13896
13897 if (argument_allowed_for_bare_hash(parser, argument)) {
13898 if (parsed_bare_hash) {
13899 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
13900 }
13901
13902 pm_token_t operator = { 0 };
13903 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
13904 operator = parser->previous;
13905 }
13906
13907 pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
13908 contains_keywords = true;
13909
13910 // Create the set of static literals for this hash.
13911 pm_static_literals_t hash_keys = { 0 };
13912 pm_hash_key_static_literals_add(parser, &hash_keys, argument);
13913
13914 // Finish parsing the one we are part way through.
13915 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
13916 argument = UP(pm_assoc_node_create(parser, argument, NTOK2PTR(operator), value));
13917
13918 pm_keyword_hash_node_elements_append(parser->arena, bare_hash, argument);
13919 argument = UP(bare_hash);
13920
13921 // Then parse more if we have a comma
13922 if (accept1(parser, PM_TOKEN_COMMA) && (
13923 token_begins_expression_p(parser->current.type) ||
13924 match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
13925 )) {
13926 contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(bare_hash), (uint16_t) (depth + 1));
13927 }
13928
13929 pm_static_literals_free(&hash_keys);
13930 parsed_bare_hash = true;
13931 }
13932
13933 parse_arguments_append(parser, arguments, argument);
13934
13935 pm_node_flags_t node_flags = 0;
13936 if (contains_keywords) node_flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13937 if (contains_keyword_splat) node_flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13938 pm_node_flag_set(UP(arguments->arguments), node_flags);
13939
13940 break;
13941 }
13942 }
13943
13944 parsed_first_argument = true;
13945
13946 // If parsing the argument failed, we need to stop parsing arguments.
13947 if (PM_NODE_TYPE_P(argument, PM_ERROR_RECOVERY_NODE) || parser->recovering) break;
13948
13949 // If the terminator of these arguments is not EOF, then we have a
13950 // specific token we're looking for. In that case we can accept a
13951 // newline here because it is not functioning as a statement terminator.
13952 bool accepted_newline = false;
13953 if (terminator != PM_TOKEN_EOF) {
13954 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
13955 }
13956
13957 if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
13958 // If we previously were on a comma and we just parsed a bare hash,
13959 // then we want to continue parsing arguments. This is because the
13960 // comma was grabbed up by the hash parser.
13961 } else if (accept1(parser, PM_TOKEN_COMMA)) {
13962 // If there was a comma, then we need to check if we also accepted a
13963 // newline. If we did, then this is a syntax error.
13964 if (accepted_newline) {
13965 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
13966 }
13967
13968 // If this is a command call and an argument takes a block,
13969 // there can be no further arguments. For example,
13970 // `foo(bar 1 do end, 2)` should be rejected.
13971 if (PM_NODE_TYPE_P(argument, PM_CALL_NODE)) {
13972 pm_call_node_t *call = (pm_call_node_t *) argument;
13973 if (call->opening_loc.length == 0 && call->arguments != NULL && call->block != NULL) {
13974 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
13975 break;
13976 }
13977 }
13978 } else {
13979 // If there is no comma at the end of the argument list then we're
13980 // done parsing arguments and can break out of this loop.
13981 break;
13982 }
13983
13984 // If we hit the terminator, then that means we have a trailing comma so
13985 // we can accept that output as well.
13986 if (match1(parser, terminator)) break;
13987 }
13988}
13989
14001parse_required_destructured_parameter(pm_parser_t *parser) {
14002 expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
14003
14004 pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
14005 pm_multi_target_node_opening_set(parser, node, &parser->previous);
14006
14007 do {
14008 pm_node_t *param;
14009
14010 // If we get here then we have a trailing comma, which isn't allowed in
14011 // the grammar. In other places, multi targets _do_ allow trailing
14012 // commas, so here we'll assume this is a mistake of the user not
14013 // knowing it's not allowed here.
14014 if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14015 param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
14016 pm_multi_target_node_targets_append(parser, node, param);
14017 pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14018 break;
14019 }
14020
14021 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
14022 param = UP(parse_required_destructured_parameter(parser));
14023 } else if (accept1(parser, PM_TOKEN_USTAR)) {
14024 pm_token_t star = parser->previous;
14025 pm_node_t *value = NULL;
14026
14027 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14028 pm_token_t name = parser->previous;
14029 value = UP(pm_required_parameter_node_create(parser, &name));
14030 if (pm_parser_parameter_name_check(parser, &name)) {
14031 pm_node_flag_set_repeated_parameter(value);
14032 }
14033 pm_parser_local_add_token(parser, &name, 1);
14034 }
14035
14036 param = UP(pm_splat_node_create(parser, &star, value));
14037 } else {
14038 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
14039 pm_token_t name = parser->previous;
14040
14041 param = UP(pm_required_parameter_node_create(parser, &name));
14042 if (pm_parser_parameter_name_check(parser, &name)) {
14043 pm_node_flag_set_repeated_parameter(param);
14044 }
14045 pm_parser_local_add_token(parser, &name, 1);
14046 }
14047
14048 pm_multi_target_node_targets_append(parser, node, param);
14049 } while (accept1(parser, PM_TOKEN_COMMA));
14050
14051 accept1(parser, PM_TOKEN_NEWLINE);
14052 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
14053 pm_multi_target_node_closing_set(parser, node, &parser->previous);
14054
14055 return node;
14056}
14057
14062typedef enum {
14063 PM_PARAMETERS_NO_CHANGE = 0, // Extra state for tokens that should not change the state
14064 PM_PARAMETERS_ORDER_NOTHING_AFTER = 1,
14065 PM_PARAMETERS_ORDER_KEYWORDS_REST,
14066 PM_PARAMETERS_ORDER_KEYWORDS,
14067 PM_PARAMETERS_ORDER_REST,
14068 PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14069 PM_PARAMETERS_ORDER_OPTIONAL,
14070 PM_PARAMETERS_ORDER_NAMED,
14071 PM_PARAMETERS_ORDER_NONE,
14072} pm_parameters_order_t;
14073
14077static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
14078 [0] = PM_PARAMETERS_NO_CHANGE,
14079 [PM_TOKEN_UAMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14080 [PM_TOKEN_AMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14081 [PM_TOKEN_UDOT_DOT_DOT] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14082 [PM_TOKEN_IDENTIFIER] = PM_PARAMETERS_ORDER_NAMED,
14083 [PM_TOKEN_PARENTHESIS_LEFT] = PM_PARAMETERS_ORDER_NAMED,
14084 [PM_TOKEN_EQUAL] = PM_PARAMETERS_ORDER_OPTIONAL,
14085 [PM_TOKEN_LABEL] = PM_PARAMETERS_ORDER_KEYWORDS,
14086 [PM_TOKEN_USTAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14087 [PM_TOKEN_STAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14088 [PM_TOKEN_USTAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST,
14089 [PM_TOKEN_STAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST
14090};
14091
14099static bool
14100update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
14101 pm_parameters_order_t state = parameters_ordering[token->type];
14102 if (state == PM_PARAMETERS_NO_CHANGE) return true;
14103
14104 // If we see another ordered argument after a optional argument
14105 // we only continue parsing ordered arguments until we stop seeing ordered arguments.
14106 if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14107 *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
14108 return true;
14109 } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14110 return true;
14111 }
14112
14113 if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14114 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
14115 return false;
14116 } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
14117 pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
14118 return false;
14119 } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
14120 // We know what transition we failed on, so we can provide a better error here.
14121 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
14122 return false;
14123 }
14124
14125 if (state < *current) *current = state;
14126 return true;
14127}
14128
14129static PRISM_INLINE void
14130parse_parameters_handle_trailing_comma(
14131 pm_parser_t *parser,
14132 pm_parameters_node_t *params,
14133 pm_parameters_order_t order,
14134 bool in_block,
14135 bool allows_trailing_comma
14136) {
14137 if (!allows_trailing_comma) {
14138 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14139 return;
14140 }
14141
14142 if (in_block) {
14143 if (order >= PM_PARAMETERS_ORDER_NAMED) {
14144 // foo do |bar,|; end
14145 pm_node_t *param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
14146
14147 if (params->rest == NULL) {
14148 pm_parameters_node_rest_set(params, param);
14149 } else {
14150 pm_parser_err_node(parser, UP(param), PM_ERR_PARAMETER_SPLAT_MULTI);
14151 pm_parameters_node_posts_append(parser->arena, params, UP(param));
14152 }
14153 } else {
14154 // foo do |*bar,|; end
14155 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14156 }
14157 } else {
14158 // https://bugs.ruby-lang.org/issues/19107
14159 // Allow `def foo(bar,); end`, `def foo(*bar,); end`, etc. but not `def foo(...,); end`
14160 if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1 || order == PM_PARAMETERS_ORDER_NOTHING_AFTER) {
14161 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14162 }
14163 }
14164}
14165
14169static pm_parameters_node_t *
14170parse_parameters(
14171 pm_parser_t *parser,
14172 pm_binding_power_t binding_power,
14173 bool uses_parentheses,
14174 bool allows_trailing_comma,
14175 bool allows_forwarding_parameters,
14176 bool accepts_blocks_in_defaults,
14177 bool in_block,
14178 pm_diagnostic_id_t diag_id_forwarding,
14179 uint16_t depth
14180) {
14181 pm_do_loop_stack_push(parser, false);
14182
14183 pm_parameters_node_t *params = pm_parameters_node_create(parser);
14184 pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
14185
14186 while (true) {
14187 bool parsing = true;
14188
14189 switch (parser->current.type) {
14190 case PM_TOKEN_PARENTHESIS_LEFT: {
14191 update_parameter_state(parser, &parser->current, &order);
14192 pm_node_t *param = UP(parse_required_destructured_parameter(parser));
14193
14194 if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14195 pm_parameters_node_requireds_append(parser->arena, params, param);
14196 } else {
14197 pm_parameters_node_posts_append(parser->arena, params, param);
14198 }
14199 break;
14200 }
14201 case PM_TOKEN_UAMPERSAND:
14202 case PM_TOKEN_AMPERSAND: {
14203 update_parameter_state(parser, &parser->current, &order);
14204 parser_lex(parser);
14205
14206 pm_token_t operator = parser->previous;
14207 pm_node_t *param;
14208
14209 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1 && accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14210 param = (pm_node_t *) pm_no_block_parameter_node_create(parser, &operator, &parser->previous);
14211 } else {
14212 pm_token_t name = {0};
14213
14214 bool repeated = false;
14215 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14216 name = parser->previous;
14217 repeated = pm_parser_parameter_name_check(parser, &name);
14218 pm_parser_local_add_token(parser, &name, 1);
14219 } else {
14220 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
14221 }
14222
14223 param = (pm_node_t *) pm_block_parameter_node_create(parser, NTOK2PTR(name), &operator);
14224 if (repeated) {
14225 pm_node_flag_set_repeated_parameter(param);
14226 }
14227 }
14228
14229 if (params->block == NULL) {
14230 pm_parameters_node_block_set(params, param);
14231 } else {
14232 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_BLOCK_MULTI);
14233 pm_parameters_node_posts_append(parser->arena, params, UP(pm_error_recovery_node_create_unexpected(parser, param)));
14234 }
14235
14236 break;
14237 }
14238 case PM_TOKEN_UDOT_DOT_DOT: {
14239 if (!allows_forwarding_parameters) {
14240 pm_parser_err_current(parser, diag_id_forwarding);
14241 }
14242
14243 bool succeeded = update_parameter_state(parser, &parser->current, &order);
14244 parser_lex(parser);
14245
14246 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14247 pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
14248
14249 if (params->keyword_rest != NULL) {
14250 // If we already have a keyword rest parameter, then we replace it with the
14251 // forwarding parameter and move the keyword rest parameter to the posts list.
14252 pm_node_t *keyword_rest = params->keyword_rest;
14253 pm_parameters_node_posts_append(parser->arena, params, UP(pm_error_recovery_node_create_unexpected(parser, keyword_rest)));
14254 if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
14255 params->keyword_rest = NULL;
14256 }
14257
14258 pm_parameters_node_keyword_rest_set(params, UP(param));
14259 break;
14260 }
14261 case PM_TOKEN_CLASS_VARIABLE:
14262 case PM_TOKEN_IDENTIFIER:
14263 case PM_TOKEN_CONSTANT:
14264 case PM_TOKEN_INSTANCE_VARIABLE:
14265 case PM_TOKEN_GLOBAL_VARIABLE:
14266 case PM_TOKEN_METHOD_NAME: {
14267 parser_lex(parser);
14268 switch (parser->previous.type) {
14269 case PM_TOKEN_CONSTANT:
14270 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14271 break;
14272 case PM_TOKEN_INSTANCE_VARIABLE:
14273 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14274 break;
14275 case PM_TOKEN_GLOBAL_VARIABLE:
14276 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14277 break;
14278 case PM_TOKEN_CLASS_VARIABLE:
14279 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14280 break;
14281 case PM_TOKEN_METHOD_NAME:
14282 pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
14283 break;
14284 default: break;
14285 }
14286
14287 if (parser->current.type == PM_TOKEN_EQUAL) {
14288 update_parameter_state(parser, &parser->current, &order);
14289 } else {
14290 update_parameter_state(parser, &parser->previous, &order);
14291 }
14292
14293 pm_token_t name = parser->previous;
14294 bool repeated = pm_parser_parameter_name_check(parser, &name);
14295 pm_parser_local_add_token(parser, &name, 1);
14296
14297 if (match1(parser, PM_TOKEN_EQUAL)) {
14298 pm_token_t operator = parser->current;
14299 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14300 parser_lex(parser);
14301
14302 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
14303 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14304
14305 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14306 pm_node_t *value = parse_value_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
14307 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14308
14309 pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
14310
14311 if (repeated) {
14312 pm_node_flag_set_repeated_parameter(UP(param));
14313 }
14314 pm_parameters_node_optionals_append(parser->arena, params, param);
14315
14316 // If the value of the parameter increased the number of
14317 // reads of that parameter, then we need to warn that we
14318 // have a circular definition.
14319 if ((parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14320 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &name, PM_ERR_PARAMETER_CIRCULAR);
14321 }
14322
14323 context_pop(parser);
14324
14325 // If parsing the value of the parameter resulted in error recovery,
14326 // then we can put a missing node in its place and stop parsing the
14327 // parameters entirely now.
14328 if (parser->recovering) {
14329 parsing = false;
14330 break;
14331 }
14332 } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14333 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14334 if (repeated) {
14335 pm_node_flag_set_repeated_parameter(UP(param));
14336 }
14337 pm_parameters_node_requireds_append(parser->arena, params, UP(param));
14338 } else {
14339 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14340 if (repeated) {
14341 pm_node_flag_set_repeated_parameter(UP(param));
14342 }
14343 pm_parameters_node_posts_append(parser->arena, params, UP(param));
14344 }
14345
14346 break;
14347 }
14348 case PM_TOKEN_LABEL: {
14349 if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
14350 update_parameter_state(parser, &parser->current, &order);
14351
14352 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14353 parser_lex(parser);
14354
14355 pm_token_t name = parser->previous;
14356 pm_token_t local = name;
14357 local.end -= 1;
14358
14359 if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
14360 pm_parser_err(parser, PM_TOKEN_START(parser, &local), PM_TOKEN_LENGTH(&local), PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14361 } else if (local.end[-1] == '!' || local.end[-1] == '?') {
14362 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14363 }
14364
14365 bool repeated = pm_parser_parameter_name_check(parser, &local);
14366 pm_parser_local_add_token(parser, &local, 1);
14367
14368 switch (parser->current.type) {
14369 case PM_TOKEN_COMMA:
14370 case PM_TOKEN_PARENTHESIS_RIGHT:
14371 case PM_TOKEN_PIPE: {
14372 context_pop(parser);
14373
14374 pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14375 if (repeated) {
14376 pm_node_flag_set_repeated_parameter(param);
14377 }
14378
14379 pm_parameters_node_keywords_append(parser->arena, params, param);
14380 break;
14381 }
14382 case PM_TOKEN_SEMICOLON:
14383 case PM_TOKEN_NEWLINE: {
14384 context_pop(parser);
14385
14386 if (uses_parentheses) {
14387 parsing = false;
14388 break;
14389 }
14390
14391 pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14392 if (repeated) {
14393 pm_node_flag_set_repeated_parameter(param);
14394 }
14395
14396 pm_parameters_node_keywords_append(parser->arena, params, param);
14397 break;
14398 }
14399 default: {
14400 pm_node_t *param;
14401
14402 if (token_begins_expression_p(parser->current.type)) {
14403 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14404 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14405
14406 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14407 pm_node_t *value = parse_value_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
14408 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14409
14410 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14411 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &local, PM_ERR_PARAMETER_CIRCULAR);
14412 }
14413
14414 param = UP(pm_optional_keyword_parameter_node_create(parser, &name, value));
14415 }
14416 else {
14417 param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14418 }
14419
14420 if (repeated) {
14421 pm_node_flag_set_repeated_parameter(param);
14422 }
14423
14424 context_pop(parser);
14425 pm_parameters_node_keywords_append(parser->arena, params, param);
14426
14427 // If parsing the value of the parameter resulted in error recovery,
14428 // then we can put a missing node in its place and stop parsing the
14429 // parameters entirely now.
14430 if (parser->recovering) {
14431 parsing = false;
14432 break;
14433 }
14434 }
14435 }
14436
14437 parser->in_keyword_arg = false;
14438 break;
14439 }
14440 case PM_TOKEN_USTAR:
14441 case PM_TOKEN_STAR: {
14442 update_parameter_state(parser, &parser->current, &order);
14443 parser_lex(parser);
14444
14445 pm_token_t operator = parser->previous;
14446 pm_token_t name = { 0 };
14447 bool repeated = false;
14448
14449 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14450 name = parser->previous;
14451 repeated = pm_parser_parameter_name_check(parser, &name);
14452 pm_parser_local_add_token(parser, &name, 1);
14453 } else {
14454 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
14455 }
14456
14457 pm_node_t *param = UP(pm_rest_parameter_node_create(parser, &operator, NTOK2PTR(name)));
14458 if (repeated) {
14459 pm_node_flag_set_repeated_parameter(param);
14460 }
14461
14462 if (params->rest == NULL) {
14463 pm_parameters_node_rest_set(params, param);
14464 } else {
14465 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
14466 pm_parameters_node_posts_append(parser->arena, params, param);
14467 }
14468
14469 break;
14470 }
14471 case PM_TOKEN_STAR_STAR:
14472 case PM_TOKEN_USTAR_STAR: {
14473 pm_parameters_order_t previous_order = order;
14474 update_parameter_state(parser, &parser->current, &order);
14475 parser_lex(parser);
14476
14477 pm_token_t operator = parser->previous;
14478 pm_node_t *param;
14479
14480 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14481 if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14482 pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14483 }
14484
14485 param = UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
14486 } else {
14487 pm_token_t name = { 0 };
14488
14489 bool repeated = false;
14490 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14491 name = parser->previous;
14492 repeated = pm_parser_parameter_name_check(parser, &name);
14493 pm_parser_local_add_token(parser, &name, 1);
14494 } else {
14495 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
14496 }
14497
14498 param = UP(pm_keyword_rest_parameter_node_create(parser, &operator, NTOK2PTR(name)));
14499 if (repeated) {
14500 pm_node_flag_set_repeated_parameter(param);
14501 }
14502 }
14503
14504 if (params->keyword_rest == NULL) {
14505 pm_parameters_node_keyword_rest_set(params, param);
14506 } else {
14507 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
14508 pm_parameters_node_posts_append(parser->arena, params, UP(pm_error_recovery_node_create_unexpected(parser, param)));
14509 }
14510
14511 break;
14512 }
14513 default:
14514 if (parser->previous.type == PM_TOKEN_COMMA) {
14515 parse_parameters_handle_trailing_comma(parser, params, order, in_block, allows_trailing_comma);
14516 }
14517
14518 parsing = false;
14519 break;
14520 }
14521
14522 // If we hit some kind of issue while parsing the parameter, this would
14523 // have been set to false. In that case, we need to break out of the
14524 // loop.
14525 if (!parsing) break;
14526
14527 bool accepted_newline = false;
14528 if (uses_parentheses) {
14529 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14530 }
14531
14532 if (accept1(parser, PM_TOKEN_COMMA)) {
14533 // If there was a comma, but we also accepted a newline, then this
14534 // is a syntax error.
14535 if (accepted_newline) {
14536 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14537 }
14538 } else {
14539 // If there was no comma, then we're done parsing parameters.
14540 break;
14541 }
14542 }
14543
14544 pm_do_loop_stack_pop(parser);
14545
14546 // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
14547 if (PM_NODE_START(params) == PM_NODE_END(params)) {
14548 return NULL;
14549 }
14550
14551 return params;
14552}
14553
14558static size_t
14559token_newline_index(const pm_parser_t *parser) {
14560 if (parser->heredoc_end == NULL) {
14561 // This is the common case. In this case we can look at the previously
14562 // recorded newline in the newline list and subtract from the current
14563 // offset.
14564 return parser->line_offsets.size - 1;
14565 } else {
14566 // This is unlikely. This is the case that we have already parsed the
14567 // start of a heredoc, so we cannot rely on looking at the previous
14568 // offset of the newline list, and instead must go through the whole
14569 // process of a binary search for the line number.
14570 return (size_t) pm_line_offset_list_line(&parser->line_offsets, PM_TOKEN_START(parser, &parser->current), 0);
14571 }
14572}
14573
14578static int64_t
14579token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
14580 const uint8_t *cursor = parser->start + parser->line_offsets.offsets[newline_index];
14581 const uint8_t *end = token->start;
14582
14583 // Skip over the BOM if it is present.
14584 if (
14585 newline_index == 0 &&
14586 parser->start[0] == 0xef &&
14587 parser->start[1] == 0xbb &&
14588 parser->start[2] == 0xbf
14589 ) cursor += 3;
14590
14591 int64_t column = 0;
14592 for (; cursor < end; cursor++) {
14593 switch (*cursor) {
14594 case '\t':
14595 column = ((column / PM_TAB_WHITESPACE_SIZE) + 1) * PM_TAB_WHITESPACE_SIZE;
14596 break;
14597 case ' ':
14598 column++;
14599 break;
14600 default:
14601 column++;
14602 if (break_on_non_space) return -1;
14603 break;
14604 }
14605 }
14606
14607 return column;
14608}
14609
14614static void
14615parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening_token, bool if_after_else, bool allow_indent) {
14616 // If these warnings are disabled (unlikely), then we can just return.
14617 if (!parser->warn_mismatched_indentation) return;
14618
14619 // If the tokens are on the same line, we do not warn.
14620 size_t closing_newline_index = token_newline_index(parser);
14621 if (opening_newline_index == closing_newline_index) return;
14622
14623 // If the opening token has anything other than spaces or tabs before it,
14624 // then we do not warn. This is unless we are matching up an `if`/`end` pair
14625 // and the `if` immediately follows an `else` keyword.
14626 int64_t opening_column = token_column(parser, opening_newline_index, opening_token, !if_after_else);
14627 if (!if_after_else && (opening_column == -1)) return;
14628
14629 // Get a reference to the closing token off the current parser. This assumes
14630 // that the caller has placed this in the correct position.
14631 pm_token_t *closing_token = &parser->current;
14632
14633 // If the tokens are at the same indentation, we do not warn.
14634 int64_t closing_column = token_column(parser, closing_newline_index, closing_token, true);
14635 if ((closing_column == -1) || (opening_column == closing_column)) return;
14636
14637 // If the closing column is greater than the opening column and we are
14638 // allowing indentation, then we do not warn.
14639 if (allow_indent && (closing_column > opening_column)) return;
14640
14641 // Otherwise, add a warning.
14642 PM_PARSER_WARN_FORMAT(
14643 parser,
14644 PM_TOKEN_START(parser, closing_token),
14645 PM_TOKEN_LENGTH(closing_token),
14646 PM_WARN_INDENTATION_MISMATCH,
14647 (int) (closing_token->end - closing_token->start),
14648 (const char *) closing_token->start,
14649 (int) (opening_token->end - opening_token->start),
14650 (const char *) opening_token->start,
14651 ((int32_t) opening_newline_index) + parser->start_line
14652 );
14653}
14654
14655typedef enum {
14656 PM_RESCUES_BEGIN = 1,
14657 PM_RESCUES_BLOCK,
14658 PM_RESCUES_CLASS,
14659 PM_RESCUES_DEF,
14660 PM_RESCUES_LAMBDA,
14661 PM_RESCUES_MODULE,
14662 PM_RESCUES_SCLASS
14663} pm_rescues_type_t;
14664
14669static PRISM_INLINE void
14670parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
14671 pm_rescue_node_t *current = NULL;
14672
14673 while (match1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
14674 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14675 parser_lex(parser);
14676
14677 pm_rescue_node_t *rescue = pm_rescue_node_create(parser, &parser->previous);
14678
14679 switch (parser->current.type) {
14680 case PM_TOKEN_EQUAL_GREATER: {
14681 // Here we have an immediate => after the rescue keyword, in which case
14682 // we're going to have an empty list of exceptions to rescue (which
14683 // implies StandardError).
14684 parser_lex(parser);
14685 pm_rescue_node_operator_set(parser, rescue, &parser->previous);
14686
14687 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
14688 reference = parse_target(parser, reference, false, false);
14689
14690 pm_rescue_node_reference_set(rescue, reference);
14691 break;
14692 }
14693 case PM_TOKEN_NEWLINE:
14694 case PM_TOKEN_SEMICOLON:
14695 case PM_TOKEN_KEYWORD_THEN:
14696 // Here we have a terminator for the rescue keyword, in which
14697 // case we're going to just continue on.
14698 break;
14699 default: {
14700 if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
14701 // Here we have something that could be an exception expression, so
14702 // we'll attempt to parse it here and any others delimited by commas.
14703
14704 do {
14705 pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
14706 pm_rescue_node_exceptions_append(parser->arena, rescue, expression);
14707
14708 // If we hit a newline, then this is the end of the rescue expression. We
14709 // can continue on to parse the statements.
14710 if (match3(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_THEN)) break;
14711
14712 // If we hit a `=>` then we're going to parse the exception variable. Once
14713 // we've done that, we'll break out of the loop and parse the statements.
14714 if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
14715 pm_rescue_node_operator_set(parser, rescue, &parser->previous);
14716
14717 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
14718 reference = parse_target(parser, reference, false, false);
14719
14720 pm_rescue_node_reference_set(rescue, reference);
14721 break;
14722 }
14723 } while (accept1(parser, PM_TOKEN_COMMA));
14724 }
14725 }
14726 }
14727
14728 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
14729 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
14730 rescue->then_keyword_loc = TOK2LOC(parser, &parser->previous);
14731 }
14732 } else {
14733 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
14734 rescue->then_keyword_loc = TOK2LOC(parser, &parser->previous);
14735 }
14736
14737 if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
14738 pm_accepts_block_stack_push(parser, true);
14739 pm_context_t context;
14740
14741 switch (type) {
14742 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_RESCUE; break;
14743 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_RESCUE; break;
14744 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_RESCUE; break;
14745 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_RESCUE; break;
14746 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_RESCUE; break;
14747 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_RESCUE; break;
14748 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_RESCUE; break;
14749 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
14750 }
14751
14752 pm_statements_node_t *statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14753 if (statements != NULL) pm_rescue_node_statements_set(rescue, statements);
14754
14755 pm_accepts_block_stack_pop(parser);
14756 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14757 }
14758
14759 if (current == NULL) {
14760 pm_begin_node_rescue_clause_set(parent_node, rescue);
14761 } else {
14762 pm_rescue_node_subsequent_set(current, rescue);
14763 }
14764
14765 current = rescue;
14766 }
14767
14768 // The end node locations on rescue nodes will not be set correctly
14769 // since we won't know the end until we've found all subsequent
14770 // clauses. This sets the end location on all rescues once we know it.
14771 if (current != NULL) {
14772 pm_rescue_node_t *clause = parent_node->rescue_clause;
14773
14774 while (clause != NULL) {
14775 PM_NODE_LENGTH_SET_NODE(clause, current);
14776 clause = clause->subsequent;
14777 }
14778 }
14779
14780 pm_token_t else_keyword;
14781 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
14782 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14783 opening_newline_index = token_newline_index(parser);
14784
14785 else_keyword = parser->current;
14786 opening = &else_keyword;
14787
14788 parser_lex(parser);
14789 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14790
14791 pm_statements_node_t *else_statements = NULL;
14792 if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
14793 pm_accepts_block_stack_push(parser, true);
14794 pm_context_t context;
14795
14796 switch (type) {
14797 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ELSE; break;
14798 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ELSE; break;
14799 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ELSE; break;
14800 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ELSE; break;
14801 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
14802 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
14803 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
14804 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
14805 }
14806
14807 else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14808 pm_accepts_block_stack_pop(parser);
14809
14810 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14811 }
14812
14813 pm_else_node_t *else_clause = pm_else_node_create(parser, &else_keyword, else_statements, &parser->current);
14814 pm_begin_node_else_clause_set(parent_node, else_clause);
14815
14816 // If we don't have a `current` rescue node, then this is a dangling
14817 // else, and it's an error.
14818 if (current == NULL) pm_parser_err_node(parser, UP(else_clause), PM_ERR_BEGIN_LONELY_ELSE);
14819 }
14820
14821 if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
14822 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14823 pm_token_t ensure_keyword = parser->current;
14824
14825 parser_lex(parser);
14826 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14827
14828 pm_statements_node_t *ensure_statements = NULL;
14829 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
14830 pm_accepts_block_stack_push(parser, true);
14831 pm_context_t context;
14832
14833 switch (type) {
14834 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ENSURE; break;
14835 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ENSURE; break;
14836 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ENSURE; break;
14837 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ENSURE; break;
14838 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ENSURE; break;
14839 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ENSURE; break;
14840 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ENSURE; break;
14841 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
14842 }
14843
14844 ensure_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14845 pm_accepts_block_stack_pop(parser);
14846
14847 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14848 }
14849
14850 pm_ensure_node_t *ensure_clause = pm_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->current);
14851 pm_begin_node_ensure_clause_set(parent_node, ensure_clause);
14852 }
14853
14854 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
14855 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14856 pm_begin_node_end_keyword_set(parser, parent_node, &parser->current);
14857 } else {
14858 pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_KEYWORD_END, .start = parser->previous.end, .end = parser->previous.end };
14859 pm_begin_node_end_keyword_set(parser, parent_node, &end_keyword);
14860 }
14861}
14862
14867static pm_begin_node_t *
14868parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
14869 pm_begin_node_t *node = pm_begin_node_create(parser, NULL, statements);
14870 parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
14871
14872 node->base.location.start = U32(start - parser->start);
14873 PM_NODE_LENGTH_SET_TOKEN(parser, node, &parser->current);
14874
14875 return node;
14876}
14877
14882parse_block_parameters(
14883 pm_parser_t *parser,
14884 bool allows_trailing_comma,
14885 const pm_token_t *opening,
14886 bool is_lambda_literal,
14887 bool accepts_blocks_in_defaults,
14888 uint16_t depth
14889) {
14890 pm_parameters_node_t *parameters = NULL;
14891 if (!match1(parser, PM_TOKEN_SEMICOLON)) {
14892 if (!is_lambda_literal) {
14893 context_push(parser, PM_CONTEXT_BLOCK_PARAMETERS);
14894 }
14895 parameters = parse_parameters(
14896 parser,
14897 is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
14898 false,
14899 allows_trailing_comma,
14900 false,
14901 accepts_blocks_in_defaults,
14902 true,
14903 is_lambda_literal ? PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_LAMBDA : PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_BLOCK,
14904 (uint16_t) (depth + 1)
14905 );
14906 if (!is_lambda_literal) {
14907 context_pop(parser);
14908 }
14909 }
14910
14911 pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
14912 if (opening != NULL) {
14913 accept1(parser, PM_TOKEN_NEWLINE);
14914
14915 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
14916 do {
14917 switch (parser->current.type) {
14918 case PM_TOKEN_CONSTANT:
14919 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14920 parser_lex(parser);
14921 break;
14922 case PM_TOKEN_INSTANCE_VARIABLE:
14923 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14924 parser_lex(parser);
14925 break;
14926 case PM_TOKEN_GLOBAL_VARIABLE:
14927 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14928 parser_lex(parser);
14929 break;
14930 case PM_TOKEN_CLASS_VARIABLE:
14931 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14932 parser_lex(parser);
14933 break;
14934 default:
14935 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
14936 break;
14937 }
14938
14939 bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
14940 pm_parser_local_add_token(parser, &parser->previous, 1);
14941
14942 pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
14943 if (repeated) pm_node_flag_set_repeated_parameter(UP(local));
14944
14945 pm_block_parameters_node_append_local(parser->arena, block_parameters, local);
14946 } while (accept1(parser, PM_TOKEN_COMMA));
14947 }
14948 }
14949
14950 return block_parameters;
14951}
14952
14957static bool
14958outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
14959 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
14960 if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
14961 }
14962
14963 return false;
14964}
14965
14971static const char * const pm_numbered_parameter_names[] = {
14972 "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
14973};
14974
14980static pm_node_t *
14981parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
14982 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
14983
14984 // If we have ordinary parameters, then we will return them as the set of
14985 // parameters.
14986 if (parameters != NULL) {
14987 // If we also have implicit parameters, then this is an error.
14988 if (implicit_parameters->size > 0) {
14989 pm_node_t *node = implicit_parameters->nodes[0];
14990
14991 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14992 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
14993 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14994 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
14995 } else {
14996 assert(false && "unreachable");
14997 }
14998 }
14999
15000 return parameters;
15001 }
15002
15003 // If we don't have any implicit parameters, then the set of parameters is
15004 // NULL.
15005 if (implicit_parameters->size == 0) {
15006 return NULL;
15007 }
15008
15009 // If we don't have ordinary parameters, then we now must validate our set
15010 // of implicit parameters. We can only have numbered parameters or it, but
15011 // they cannot be mixed.
15012 uint8_t numbered_parameter = 0;
15013 bool it_parameter = false;
15014
15015 for (size_t index = 0; index < implicit_parameters->size; index++) {
15016 pm_node_t *node = implicit_parameters->nodes[index];
15017
15018 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
15019 if (it_parameter) {
15020 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
15021 } else if (outer_scope_using_numbered_parameters_p(parser)) {
15022 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
15023 } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
15024 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
15025 } else if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
15026 numbered_parameter = MAX(numbered_parameter, (uint8_t) (parser->start[node->location.start + 1] - '0'));
15027 } else {
15028 assert(false && "unreachable");
15029 }
15030 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
15031 if (numbered_parameter > 0) {
15032 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
15033 } else {
15034 it_parameter = true;
15035 }
15036 }
15037 }
15038
15039 if (numbered_parameter > 0) {
15040 // Go through the parent scopes and mark them as being disallowed from
15041 // using numbered parameters because this inner scope is using them.
15042 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15043 scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
15044 }
15045 return UP(pm_numbered_parameters_node_create(parser, opening, closing, numbered_parameter));
15046 }
15047
15048 if (it_parameter) {
15049 return UP(pm_it_parameters_node_create(parser, opening, closing));
15050 }
15051
15052 return NULL;
15053}
15054
15058static pm_block_node_t *
15059parse_block(pm_parser_t *parser, uint16_t depth) {
15060 pm_token_t opening = parser->previous;
15061 accept1(parser, PM_TOKEN_NEWLINE);
15062
15063 pm_accepts_block_stack_push(parser, true);
15064 pm_parser_scope_push(parser, false);
15065
15066 pm_block_parameters_node_t *block_parameters = NULL;
15067
15068 if (accept1(parser, PM_TOKEN_PIPE)) {
15069 pm_token_t block_parameters_opening = parser->previous;
15070 if (match1(parser, PM_TOKEN_PIPE)) {
15071 block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
15072 parser->command_start = true;
15073 parser_lex(parser);
15074 } else {
15075 block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false, true, (uint16_t) (depth + 1));
15076 accept1(parser, PM_TOKEN_NEWLINE);
15077 parser->command_start = true;
15078 expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
15079 }
15080
15081 pm_block_parameters_node_closing_set(parser, block_parameters, &parser->previous);
15082 }
15083
15084 accept1(parser, PM_TOKEN_NEWLINE);
15085 pm_node_t *statements = NULL;
15086
15087 if (opening.type == PM_TOKEN_BRACE_LEFT) {
15088 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
15089 statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1)));
15090 }
15091
15092 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE, &opening);
15093 } else {
15094 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15095 if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE)) {
15096 pm_accepts_block_stack_push(parser, true);
15097 statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1)));
15098 pm_accepts_block_stack_pop(parser);
15099 }
15100
15101 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
15102 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
15103 statements = UP(parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1)));
15104 }
15105 }
15106
15107 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END, &opening);
15108 }
15109
15110 pm_constant_id_list_t locals;
15111 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
15112 pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &opening, &parser->previous);
15113
15114 pm_parser_scope_pop(parser);
15115 pm_accepts_block_stack_pop(parser);
15116
15117 return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
15118}
15119
15125static bool
15126parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, uint8_t flags, uint16_t depth) {
15127 /* Fast path: if the current token can't begin an expression and isn't
15128 * a parenthesis, block opener, or splat/block-pass operator, there are
15129 * no arguments to parse. */
15130 if (
15131 !token_begins_expression_p(parser->current.type) &&
15132 !match6(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_KEYWORD_DO_BLOCK, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)
15133 ) {
15134 return false;
15135 }
15136
15137 bool found = false;
15138 bool parsed_command_args = false;
15139
15140 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
15141 found |= true;
15142 arguments->opening_loc = TOK2LOC(parser, &parser->previous);
15143
15144 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15145 arguments->closing_loc = TOK2LOC(parser, &parser->previous);
15146 } else {
15147 pm_accepts_block_stack_push(parser, true);
15148 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint8_t) (flags & ~PM_PARSE_ACCEPTS_DO_BLOCK), (uint16_t) (depth + 1));
15149
15150 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15151 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_str(parser->current.type));
15152 parser->previous.start = parser->previous.end;
15153 parser->previous.type = 0;
15154 }
15155
15156 pm_accepts_block_stack_pop(parser);
15157 arguments->closing_loc = TOK2LOC(parser, &parser->previous);
15158 }
15159 } else if ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
15160 found |= true;
15161 parsed_command_args = true;
15162 pm_accepts_block_stack_push(parser, false);
15163
15164 // If we get here, then the subsequent token cannot be used as an infix
15165 // operator. In this case we assume the subsequent token is part of an
15166 // argument to this method call.
15167 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, flags, (uint16_t) (depth + 1));
15168
15169 // If we have done with the arguments and still not consumed the comma,
15170 // then we have a trailing comma where we need to check whether it is
15171 // allowed or not.
15172 if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
15173 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_str(parser->current.type));
15174 }
15175
15176 pm_accepts_block_stack_pop(parser);
15177 }
15178
15179 // If we're at the end of the arguments, we can now check if there is a block
15180 // node that starts with a {. If there is, then we can parse it and add it to
15181 // the arguments.
15182 if (accepts_block) {
15183 pm_block_node_t *block = NULL;
15184
15185 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
15186 found |= true;
15187 block = parse_block(parser, (uint16_t) (depth + 1));
15188 pm_arguments_validate_block(parser, arguments, block);
15189 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
15190 found |= true;
15191 block = parse_block(parser, (uint16_t) (depth + 1));
15192 } else if (parsed_command_args && pm_accepts_block_stack_p(parser) && (flags & PM_PARSE_ACCEPTS_DO_BLOCK) && accept1(parser, PM_TOKEN_KEYWORD_DO_BLOCK)) {
15193 found |= true;
15194 block = parse_block(parser, (uint16_t) (depth + 1));
15195 }
15196
15197 if (block != NULL) {
15198 if (arguments->block == NULL && !arguments->has_forwarding) {
15199 arguments->block = UP(block);
15200 } else {
15201 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_BLOCK_MULTI);
15202
15203 if (arguments->block != NULL) {
15204 if (arguments->arguments == NULL) {
15205 arguments->arguments = pm_arguments_node_create(parser);
15206 }
15207 pm_arguments_node_arguments_append(parser->arena, arguments->arguments, arguments->block);
15208 }
15209 arguments->block = UP(block);
15210 }
15211 }
15212 }
15213
15214 return found;
15215}
15216
15221static void
15222parse_return(pm_parser_t *parser, pm_node_t *node) {
15223 bool in_sclass = false;
15224 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15225 switch (context_node->context) {
15226 case PM_CONTEXT_BEGIN_ELSE:
15227 case PM_CONTEXT_BEGIN_ENSURE:
15228 case PM_CONTEXT_BEGIN_RESCUE:
15229 case PM_CONTEXT_BEGIN:
15230 case PM_CONTEXT_CASE_IN:
15231 case PM_CONTEXT_CASE_WHEN:
15232 case PM_CONTEXT_DEFAULT_PARAMS:
15233 case PM_CONTEXT_DEFINED:
15234 case PM_CONTEXT_ELSE:
15235 case PM_CONTEXT_ELSIF:
15236 case PM_CONTEXT_EMBEXPR:
15237 case PM_CONTEXT_FOR_INDEX:
15238 case PM_CONTEXT_FOR:
15239 case PM_CONTEXT_IF:
15240 case PM_CONTEXT_LOOP_PREDICATE:
15241 case PM_CONTEXT_MAIN:
15242 case PM_CONTEXT_MULTI_TARGET:
15243 case PM_CONTEXT_PARENS:
15244 case PM_CONTEXT_POSTEXE:
15245 case PM_CONTEXT_PREDICATE:
15246 case PM_CONTEXT_PREEXE:
15247 case PM_CONTEXT_RESCUE_MODIFIER:
15248 case PM_CONTEXT_TERNARY:
15249 case PM_CONTEXT_UNLESS:
15250 case PM_CONTEXT_UNTIL:
15251 case PM_CONTEXT_WHILE:
15252 // Keep iterating up the lists of contexts, because returns can
15253 // see through these.
15254 continue;
15255 case PM_CONTEXT_SCLASS_ELSE:
15256 case PM_CONTEXT_SCLASS_ENSURE:
15257 case PM_CONTEXT_SCLASS_RESCUE:
15258 case PM_CONTEXT_SCLASS:
15259 in_sclass = true;
15260 continue;
15261 case PM_CONTEXT_CLASS_ELSE:
15262 case PM_CONTEXT_CLASS_ENSURE:
15263 case PM_CONTEXT_CLASS_RESCUE:
15264 case PM_CONTEXT_CLASS:
15265 case PM_CONTEXT_MODULE_ELSE:
15266 case PM_CONTEXT_MODULE_ENSURE:
15267 case PM_CONTEXT_MODULE_RESCUE:
15268 case PM_CONTEXT_MODULE:
15269 // These contexts are invalid for a return.
15270 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15271 return;
15272 case PM_CONTEXT_BLOCK_BRACES:
15273 case PM_CONTEXT_BLOCK_ELSE:
15274 case PM_CONTEXT_BLOCK_ENSURE:
15275 case PM_CONTEXT_BLOCK_KEYWORDS:
15276 case PM_CONTEXT_BLOCK_RESCUE:
15277 case PM_CONTEXT_BLOCK_PARAMETERS:
15278 case PM_CONTEXT_DEF_ELSE:
15279 case PM_CONTEXT_DEF_ENSURE:
15280 case PM_CONTEXT_DEF_PARAMS:
15281 case PM_CONTEXT_DEF_RESCUE:
15282 case PM_CONTEXT_DEF:
15283 case PM_CONTEXT_LAMBDA_BRACES:
15284 case PM_CONTEXT_LAMBDA_DO_END:
15285 case PM_CONTEXT_LAMBDA_ELSE:
15286 case PM_CONTEXT_LAMBDA_ENSURE:
15287 case PM_CONTEXT_LAMBDA_RESCUE:
15288 // These contexts are valid for a return, and we should not
15289 // continue to loop.
15290 return;
15291 case PM_CONTEXT_NONE:
15292 // This case should never happen.
15293 assert(false && "unreachable");
15294 break;
15295 }
15296 }
15297 if (in_sclass && parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
15298 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15299 }
15300}
15301
15306static void
15307parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
15308 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15309 switch (context_node->context) {
15310 case PM_CONTEXT_BLOCK_BRACES:
15311 case PM_CONTEXT_BLOCK_KEYWORDS:
15312 case PM_CONTEXT_BLOCK_ELSE:
15313 case PM_CONTEXT_BLOCK_ENSURE:
15314 case PM_CONTEXT_BLOCK_PARAMETERS:
15315 case PM_CONTEXT_BLOCK_RESCUE:
15316 case PM_CONTEXT_DEFINED:
15317 case PM_CONTEXT_FOR:
15318 case PM_CONTEXT_LAMBDA_BRACES:
15319 case PM_CONTEXT_LAMBDA_DO_END:
15320 case PM_CONTEXT_LAMBDA_ELSE:
15321 case PM_CONTEXT_LAMBDA_ENSURE:
15322 case PM_CONTEXT_LAMBDA_RESCUE:
15323 case PM_CONTEXT_LOOP_PREDICATE:
15324 case PM_CONTEXT_UNTIL:
15325 case PM_CONTEXT_WHILE:
15326 // These are the good cases. We're allowed to have a block exit
15327 // in these contexts.
15328 return;
15329 case PM_CONTEXT_POSTEXE:
15330 // https://bugs.ruby-lang.org/issues/20409
15331 if (context_node->context == PM_CONTEXT_POSTEXE) {
15332 if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
15333 return;
15334 }
15335 }
15337 case PM_CONTEXT_DEF:
15338 case PM_CONTEXT_DEF_PARAMS:
15339 case PM_CONTEXT_DEF_ELSE:
15340 case PM_CONTEXT_DEF_ENSURE:
15341 case PM_CONTEXT_DEF_RESCUE:
15342 case PM_CONTEXT_MAIN:
15343 case PM_CONTEXT_PREEXE:
15344 case PM_CONTEXT_SCLASS:
15345 case PM_CONTEXT_SCLASS_ELSE:
15346 case PM_CONTEXT_SCLASS_ENSURE:
15347 case PM_CONTEXT_SCLASS_RESCUE:
15348 // These are the bad cases. We're not allowed to have a block
15349 // exit in these contexts.
15350 //
15351 // If we get here, then we're about to mark this block exit
15352 // as invalid. However, it could later _become_ valid if we
15353 // find a trailing while/until on the expression. In this
15354 // case instead of adding the error here, we'll add the
15355 // block exit to the list of exits for the expression, and
15356 // the node parsing will handle validating it instead.
15357 assert(parser->current_block_exits != NULL);
15358 pm_node_list_append(parser->arena, parser->current_block_exits, node);
15359 return;
15360 case PM_CONTEXT_BEGIN_ELSE:
15361 case PM_CONTEXT_BEGIN_ENSURE:
15362 case PM_CONTEXT_BEGIN_RESCUE:
15363 case PM_CONTEXT_BEGIN:
15364 case PM_CONTEXT_CASE_IN:
15365 case PM_CONTEXT_CASE_WHEN:
15366 case PM_CONTEXT_CLASS_ELSE:
15367 case PM_CONTEXT_CLASS_ENSURE:
15368 case PM_CONTEXT_CLASS_RESCUE:
15369 case PM_CONTEXT_CLASS:
15370 case PM_CONTEXT_DEFAULT_PARAMS:
15371 case PM_CONTEXT_ELSE:
15372 case PM_CONTEXT_ELSIF:
15373 case PM_CONTEXT_EMBEXPR:
15374 case PM_CONTEXT_FOR_INDEX:
15375 case PM_CONTEXT_IF:
15376 case PM_CONTEXT_MODULE_ELSE:
15377 case PM_CONTEXT_MODULE_ENSURE:
15378 case PM_CONTEXT_MODULE_RESCUE:
15379 case PM_CONTEXT_MODULE:
15380 case PM_CONTEXT_MULTI_TARGET:
15381 case PM_CONTEXT_PARENS:
15382 case PM_CONTEXT_PREDICATE:
15383 case PM_CONTEXT_RESCUE_MODIFIER:
15384 case PM_CONTEXT_TERNARY:
15385 case PM_CONTEXT_UNLESS:
15386 // In these contexts we should continue walking up the list of
15387 // contexts.
15388 break;
15389 case PM_CONTEXT_NONE:
15390 // This case should never happen.
15391 assert(false && "unreachable");
15392 break;
15393 }
15394 }
15395}
15396
15401static pm_node_list_t *
15402push_block_exits(pm_parser_t *parser, pm_node_list_t *current_block_exits) {
15403 pm_node_list_t *previous_block_exits = parser->current_block_exits;
15404 parser->current_block_exits = current_block_exits;
15405 return previous_block_exits;
15406}
15407
15413static void
15414flush_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15415 pm_node_t *block_exit;
15416 PM_NODE_LIST_FOREACH(parser->current_block_exits, index, block_exit) {
15417 const char *type;
15418
15419 switch (PM_NODE_TYPE(block_exit)) {
15420 case PM_BREAK_NODE: type = "break"; break;
15421 case PM_NEXT_NODE: type = "next"; break;
15422 case PM_REDO_NODE: type = "redo"; break;
15423 default: assert(false && "unreachable"); type = ""; break;
15424 }
15425
15426 PM_PARSER_ERR_NODE_FORMAT(parser, block_exit, PM_ERR_INVALID_BLOCK_EXIT, type);
15427 }
15428
15429 parser->current_block_exits = previous_block_exits;
15430}
15431
15436static void
15437pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15438 if (match2(parser, PM_TOKEN_KEYWORD_WHILE_MODIFIER, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) {
15439 // If we matched a trailing while/until, then all of the block exits in
15440 // the contained list are valid. In this case we do not need to do
15441 // anything.
15442 parser->current_block_exits = previous_block_exits;
15443 } else if (previous_block_exits != NULL) {
15444 // If we did not matching a trailing while/until, then all of the block
15445 // exits contained in the list are invalid for this specific context.
15446 // However, they could still become valid in a higher level context if
15447 // there is another list above this one. In this case we'll push all of
15448 // the block exits up to the previous list.
15449 pm_node_list_concat(parser->arena, previous_block_exits, parser->current_block_exits);
15450 parser->current_block_exits = previous_block_exits;
15451 } else {
15452 // If we did not match a trailing while/until and this was the last
15453 // chance to do so, then all of the block exits in the list are invalid
15454 // and we need to add an error for each of them.
15455 flush_block_exits(parser, previous_block_exits);
15456 }
15457}
15458
15459static PRISM_INLINE pm_node_t *
15460parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
15461 context_push(parser, PM_CONTEXT_PREDICATE);
15462 pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
15463 pm_node_t *predicate = parse_value_expression(parser, binding_power, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, error_id, (uint16_t) (depth + 1));
15464
15465 // Predicates are closed by a term, a "then", or a term and then a "then".
15466 bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15467
15468 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15469 predicate_closed = true;
15470 *then_keyword = parser->previous;
15471 }
15472
15473 if (!predicate_closed) {
15474 pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
15475 }
15476
15477 context_pop(parser);
15478 return predicate;
15479}
15480
15481static PRISM_INLINE pm_node_t *
15482parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
15483 pm_node_list_t current_block_exits = { 0 };
15484 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
15485
15486 pm_token_t keyword = parser->previous;
15487 pm_token_t then_keyword = { 0 };
15488
15489 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
15490 pm_statements_node_t *statements = NULL;
15491
15492 if (!match3(parser, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
15493 pm_accepts_block_stack_push(parser, true);
15494 statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15495 pm_accepts_block_stack_pop(parser);
15496 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15497 }
15498
15499 pm_node_t *parent = NULL;
15500
15501 switch (context) {
15502 case PM_CONTEXT_IF:
15503 parent = UP(pm_if_node_create(parser, &keyword, predicate, NTOK2PTR(then_keyword), statements, NULL, NULL));
15504 break;
15505 case PM_CONTEXT_UNLESS:
15506 parent = UP(pm_unless_node_create(parser, &keyword, predicate, NTOK2PTR(then_keyword), statements));
15507 break;
15508 default:
15509 assert(false && "unreachable");
15510 break;
15511 }
15512
15513 pm_node_t *current = parent;
15514
15515 // Parse any number of elsif clauses. This will form a linked list of if
15516 // nodes pointing to each other from the top.
15517 if (context == PM_CONTEXT_IF) {
15518 while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
15519 if (parser_end_of_line_p(parser)) {
15520 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_KEYWORD_EOL);
15521 }
15522
15523 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15524 pm_token_t elsif_keyword = parser->current;
15525 parser_lex(parser);
15526
15527 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
15528 pm_accepts_block_stack_push(parser, true);
15529
15530 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF, (uint16_t) (depth + 1));
15531 pm_accepts_block_stack_pop(parser);
15532 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15533
15534 pm_node_t *elsif = UP(pm_if_node_create(parser, &elsif_keyword, predicate, NTOK2PTR(then_keyword), statements, NULL, NULL));
15535 ((pm_if_node_t *) current)->subsequent = elsif;
15536 current = elsif;
15537 }
15538 }
15539
15540 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15541 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15542 opening_newline_index = token_newline_index(parser);
15543
15544 parser_lex(parser);
15545 pm_token_t else_keyword = parser->previous;
15546
15547 pm_accepts_block_stack_push(parser, true);
15548 pm_statements_node_t *else_statements = parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1));
15549 pm_accepts_block_stack_pop(parser);
15550
15551 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15552 parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
15553 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE, &keyword);
15554
15555 pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
15556
15557 switch (context) {
15558 case PM_CONTEXT_IF:
15559 ((pm_if_node_t *) current)->subsequent = UP(else_node);
15560 break;
15561 case PM_CONTEXT_UNLESS:
15562 ((pm_unless_node_t *) parent)->else_clause = else_node;
15563 break;
15564 default:
15565 assert(false && "unreachable");
15566 break;
15567 }
15568 } else {
15569 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
15570 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM, &keyword);
15571 }
15572
15573 // Set the appropriate end location for all of the nodes in the subtree.
15574 switch (context) {
15575 case PM_CONTEXT_IF: {
15576 pm_node_t *current = parent;
15577 bool recursing = true;
15578
15579 while (recursing) {
15580 switch (PM_NODE_TYPE(current)) {
15581 case PM_IF_NODE:
15582 pm_if_node_end_keyword_loc_set(parser, (pm_if_node_t *) current, &parser->previous);
15583 current = ((pm_if_node_t *) current)->subsequent;
15584 recursing = current != NULL;
15585 break;
15586 case PM_ELSE_NODE:
15587 pm_else_node_end_keyword_loc_set(parser, (pm_else_node_t *) current, &parser->previous);
15588 recursing = false;
15589 break;
15590 default: {
15591 recursing = false;
15592 break;
15593 }
15594 }
15595 }
15596 break;
15597 }
15598 case PM_CONTEXT_UNLESS:
15599 pm_unless_node_end_keyword_loc_set(parser, (pm_unless_node_t *) parent, &parser->previous);
15600 break;
15601 default:
15602 assert(false && "unreachable");
15603 break;
15604 }
15605
15606 pop_block_exits(parser, previous_block_exits);
15607 return parent;
15608}
15609
15614#define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15615 case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
15616 case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
15617 case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_BLOCK: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
15618 case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
15619 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
15620 case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
15621 case PM_TOKEN_KEYWORD_OR: case PM_TOKEN_KEYWORD_REDO: case PM_TOKEN_KEYWORD_RESCUE: case PM_TOKEN_KEYWORD_RETRY: \
15622 case PM_TOKEN_KEYWORD_RETURN: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_SUPER: case PM_TOKEN_KEYWORD_THEN: \
15623 case PM_TOKEN_KEYWORD_TRUE: case PM_TOKEN_KEYWORD_UNDEF: case PM_TOKEN_KEYWORD_UNLESS: case PM_TOKEN_KEYWORD_UNTIL: \
15624 case PM_TOKEN_KEYWORD_WHEN: case PM_TOKEN_KEYWORD_WHILE: case PM_TOKEN_KEYWORD_YIELD
15625
15630#define PM_CASE_OPERATOR PM_TOKEN_AMPERSAND: case PM_TOKEN_BACKTICK: case PM_TOKEN_BANG_EQUAL: \
15631 case PM_TOKEN_BANG_TILDE: case PM_TOKEN_BANG: case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \
15632 case PM_TOKEN_BRACKET_LEFT_RIGHT: case PM_TOKEN_CARET: case PM_TOKEN_EQUAL_EQUAL_EQUAL: case PM_TOKEN_EQUAL_EQUAL: \
15633 case PM_TOKEN_EQUAL_TILDE: case PM_TOKEN_GREATER_EQUAL: case PM_TOKEN_GREATER_GREATER: case PM_TOKEN_GREATER: \
15634 case PM_TOKEN_LESS_EQUAL_GREATER: case PM_TOKEN_LESS_EQUAL: case PM_TOKEN_LESS_LESS: case PM_TOKEN_LESS: \
15635 case PM_TOKEN_MINUS: case PM_TOKEN_PERCENT: case PM_TOKEN_PIPE: case PM_TOKEN_PLUS: case PM_TOKEN_SLASH: \
15636 case PM_TOKEN_STAR_STAR: case PM_TOKEN_STAR: case PM_TOKEN_TILDE: case PM_TOKEN_UAMPERSAND: case PM_TOKEN_UMINUS: \
15637 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_UPLUS: case PM_TOKEN_USTAR: case PM_TOKEN_USTAR_STAR
15638
15644#define PM_CASE_PRIMITIVE PM_TOKEN_INTEGER: case PM_TOKEN_INTEGER_IMAGINARY: case PM_TOKEN_INTEGER_RATIONAL: \
15645 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: case PM_TOKEN_FLOAT: case PM_TOKEN_FLOAT_IMAGINARY: \
15646 case PM_TOKEN_FLOAT_RATIONAL: case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: case PM_TOKEN_SYMBOL_BEGIN: \
15647 case PM_TOKEN_REGEXP_BEGIN: case PM_TOKEN_BACKTICK: case PM_TOKEN_PERCENT_LOWER_X: case PM_TOKEN_PERCENT_LOWER_I: \
15648 case PM_TOKEN_PERCENT_LOWER_W: case PM_TOKEN_PERCENT_UPPER_I: case PM_TOKEN_PERCENT_UPPER_W: \
15649 case PM_TOKEN_STRING_BEGIN: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_TRUE: \
15650 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15651 case PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_MINUS_GREATER: case PM_TOKEN_HEREDOC_START: \
15652 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_CHARACTER_LITERAL
15653
15658#define PM_CASE_PARAMETER PM_TOKEN_UAMPERSAND: case PM_TOKEN_AMPERSAND: case PM_TOKEN_UDOT_DOT_DOT: \
15659 case PM_TOKEN_IDENTIFIER: case PM_TOKEN_LABEL: case PM_TOKEN_USTAR: case PM_TOKEN_STAR: case PM_TOKEN_STAR_STAR: \
15660 case PM_TOKEN_USTAR_STAR: case PM_TOKEN_CONSTANT: case PM_TOKEN_INSTANCE_VARIABLE: case PM_TOKEN_GLOBAL_VARIABLE: \
15661 case PM_TOKEN_CLASS_VARIABLE
15662
15667#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
15668 case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
15669 case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
15670 case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
15671
15672// Assert here that the flags are the same so that we can safely switch the type
15673// of the node without having to move the flags.
15674PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING), "Expected the flags to match.");
15675
15680static PRISM_INLINE pm_node_flags_t
15681parse_unescaped_encoding(const pm_parser_t *parser) {
15682 if (parser->explicit_encoding != NULL) {
15683 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
15684 // If the there's an explicit encoding and it's using a UTF-8 escape
15685 // sequence, then mark the string as UTF-8.
15686 return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
15687 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
15688 // If there's a non-UTF-8 escape sequence being used, then the
15689 // string uses the source encoding, unless the source is marked as
15690 // US-ASCII. In that case the string is forced as ASCII-8BIT in
15691 // order to keep the string valid.
15692 return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
15693 }
15694 }
15695 return 0;
15696}
15697
15702static pm_node_t *
15703parse_string_part(pm_parser_t *parser, uint16_t depth) {
15704 switch (parser->current.type) {
15705 // Here the lexer has returned to us plain string content. In this case
15706 // we'll create a string node that has no opening or closing and return that
15707 // as the part. These kinds of parts look like:
15708 //
15709 // "aaa #{bbb} #@ccc ddd"
15710 // ^^^^ ^ ^^^^
15711 case PM_TOKEN_STRING_CONTENT: {
15712 pm_node_t *node = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
15713 pm_node_flag_set(node, parse_unescaped_encoding(parser));
15714
15715 parser_lex(parser);
15716 return node;
15717 }
15718 // Here the lexer has returned the beginning of an embedded expression. In
15719 // that case we'll parse the inner statements and return that as the part.
15720 // These kinds of parts look like:
15721 //
15722 // "aaa #{bbb} #@ccc ddd"
15723 // ^^^^^^
15724 case PM_TOKEN_EMBEXPR_BEGIN: {
15725 // Ruby disallows seeing encoding around interpolation in strings,
15726 // even though it is known at parse time.
15727 parser->explicit_encoding = NULL;
15728
15729 pm_lex_state_t state = parser->lex_state;
15730 int brace_nesting = parser->brace_nesting;
15731
15732 parser->brace_nesting = 0;
15733 lex_state_set(parser, PM_LEX_STATE_BEG);
15734 parser_lex(parser);
15735
15736 pm_token_t opening = parser->previous;
15737 pm_statements_node_t *statements = NULL;
15738
15739 if (!match3(parser, PM_TOKEN_EMBEXPR_END, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
15740 pm_accepts_block_stack_push(parser, true);
15741 statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
15742 pm_accepts_block_stack_pop(parser);
15743 }
15744
15745 parser->brace_nesting = brace_nesting;
15746 lex_state_set(parser, state);
15747 expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
15748
15749 // If this set of embedded statements only contains a single
15750 // statement, then Ruby does not consider it as a possible statement
15751 // that could emit a line event.
15752 if (statements != NULL && statements->body.size == 1) {
15753 pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
15754 }
15755
15756 return UP(pm_embedded_statements_node_create(parser, &opening, statements, &parser->previous));
15757 }
15758
15759 // Here the lexer has returned the beginning of an embedded variable.
15760 // In that case we'll parse the variable and create an appropriate node
15761 // for it and then return that node. These kinds of parts look like:
15762 //
15763 // "aaa #{bbb} #@ccc ddd"
15764 // ^^^^^
15765 case PM_TOKEN_EMBVAR: {
15766 // Ruby disallows seeing encoding around interpolation in strings,
15767 // even though it is known at parse time.
15768 parser->explicit_encoding = NULL;
15769
15770 lex_state_set(parser, PM_LEX_STATE_BEG);
15771 parser_lex(parser);
15772
15773 pm_token_t operator = parser->previous;
15774 pm_node_t *variable;
15775
15776 switch (parser->current.type) {
15777 // In this case a back reference is being interpolated. We'll
15778 // create a global variable read node.
15779 case PM_TOKEN_BACK_REFERENCE:
15780 parser_lex(parser);
15781 variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
15782 break;
15783 // In this case an nth reference is being interpolated. We'll
15784 // create a global variable read node.
15785 case PM_TOKEN_NUMBERED_REFERENCE:
15786 parser_lex(parser);
15787 variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
15788 break;
15789 // In this case a global variable is being interpolated. We'll
15790 // create a global variable read node.
15791 case PM_TOKEN_GLOBAL_VARIABLE:
15792 parser_lex(parser);
15793 variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
15794 break;
15795 // In this case an instance variable is being interpolated.
15796 // We'll create an instance variable read node.
15797 case PM_TOKEN_INSTANCE_VARIABLE:
15798 parser_lex(parser);
15799 variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
15800 break;
15801 // In this case a class variable is being interpolated. We'll
15802 // create a class variable read node.
15803 case PM_TOKEN_CLASS_VARIABLE:
15804 parser_lex(parser);
15805 variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
15806 break;
15807 // We can hit here if we got an invalid token. In that case
15808 // we'll not attempt to lex this token and instead just return a
15809 // missing node.
15810 default:
15811 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
15812 variable = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
15813 break;
15814 }
15815
15816 return UP(pm_embedded_variable_node_create(parser, &operator, variable));
15817 }
15818 default:
15819 parser_lex(parser);
15820 pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
15821 return NULL;
15822 }
15823}
15824
15830static const uint8_t *
15831parse_operator_symbol_name(const pm_token_t *name) {
15832 switch (name->type) {
15833 case PM_TOKEN_TILDE:
15834 case PM_TOKEN_BANG:
15835 if (name->end[-1] == '@') return name->end - 1;
15837 default:
15838 return name->end;
15839 }
15840}
15841
15842static pm_node_t *
15843parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
15844 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, NULL);
15845 const uint8_t *end = parse_operator_symbol_name(&parser->current);
15846
15847 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15848 parser_lex(parser);
15849
15850 pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
15851 pm_node_flag_set(UP(symbol), PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
15852
15853 return UP(symbol);
15854}
15855
15861static pm_node_t *
15862parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state, uint16_t depth) {
15863 const pm_token_t opening = parser->previous;
15864
15865 if (lex_mode->mode != PM_LEX_STRING) {
15866 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15867
15868 switch (parser->current.type) {
15869 case PM_CASE_OPERATOR:
15870 return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
15871 case PM_TOKEN_IDENTIFIER:
15872 case PM_TOKEN_CONSTANT:
15873 case PM_TOKEN_INSTANCE_VARIABLE:
15874 case PM_TOKEN_METHOD_NAME:
15875 case PM_TOKEN_CLASS_VARIABLE:
15876 case PM_TOKEN_GLOBAL_VARIABLE:
15877 case PM_TOKEN_NUMBERED_REFERENCE:
15878 case PM_TOKEN_BACK_REFERENCE:
15879 case PM_CASE_KEYWORD:
15880 parser_lex(parser);
15881 break;
15882 default:
15883 expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
15884 break;
15885 }
15886
15887 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, NULL);
15888 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15889 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15890
15891 return UP(symbol);
15892 }
15893
15894 if (lex_mode->as.string.interpolation) {
15895 // If we have the end of the symbol, then we can return an empty symbol.
15896 if (match1(parser, PM_TOKEN_STRING_END)) {
15897 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15898 parser_lex(parser);
15899 pm_token_t content = {
15900 .type = PM_TOKEN_STRING_CONTENT,
15901 .start = parser->previous.start,
15902 .end = parser->previous.start
15903 };
15904
15905 return UP(pm_symbol_node_create(parser, &opening, &content, &parser->previous));
15906 }
15907
15908 // Now we can parse the first part of the symbol.
15909 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
15910
15911 // If we got a string part, then it's possible that we could transform
15912 // what looks like an interpolated symbol into a regular symbol.
15913 if (part && PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15914 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15915 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15916
15917 return UP(pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous));
15918 }
15919
15920 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
15921 if (part) pm_interpolated_symbol_node_append(parser->arena, symbol, part);
15922
15923 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15924 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
15925 pm_interpolated_symbol_node_append(parser->arena, symbol, part);
15926 }
15927 }
15928
15929 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15930 if (match1(parser, PM_TOKEN_EOF)) {
15931 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15932 } else {
15933 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15934 }
15935
15936 pm_interpolated_symbol_node_closing_loc_set(parser, symbol, &parser->previous);
15937 return UP(symbol);
15938 }
15939
15940 pm_token_t content;
15941 pm_string_t unescaped;
15942
15943 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15944 content = parser->current;
15945 unescaped = parser->current_string;
15946 parser_lex(parser);
15947
15948 // If we have two string contents in a row, then the content of this
15949 // symbol is split because of heredoc contents. This looks like:
15950 //
15951 // <<A; :'a
15952 // A
15953 // b'
15954 //
15955 // In this case, the best way we have to represent this is as an
15956 // interpolated string node, so that's what we'll do here.
15957 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15958 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
15959 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &unescaped));
15960 pm_interpolated_symbol_node_append(parser->arena, symbol, part);
15961
15962 part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->current, NULL, &parser->current_string));
15963 pm_interpolated_symbol_node_append(parser->arena, symbol, part);
15964
15965 if (next_state != PM_LEX_STATE_NONE) {
15966 lex_state_set(parser, next_state);
15967 }
15968
15969 parser_lex(parser);
15970 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
15971
15972 pm_interpolated_symbol_node_closing_loc_set(parser, symbol, &parser->previous);
15973 return UP(symbol);
15974 }
15975 } else {
15976 content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
15977 pm_string_shared_init(&unescaped, content.start, content.end);
15978 }
15979
15980 if (next_state != PM_LEX_STATE_NONE) {
15981 lex_state_set(parser, next_state);
15982 }
15983
15984 if (match1(parser, PM_TOKEN_EOF)) {
15985 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
15986 } else {
15987 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
15988 }
15989
15990 return UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false)));
15991}
15992
15997static PRISM_INLINE pm_node_t *
15998parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
15999 switch (parser->current.type) {
16000 case PM_CASE_OPERATOR:
16001 return parse_operator_symbol(parser, NULL, PM_LEX_STATE_NONE);
16002 case PM_CASE_KEYWORD:
16003 case PM_TOKEN_CONSTANT:
16004 case PM_TOKEN_IDENTIFIER:
16005 case PM_TOKEN_METHOD_NAME: {
16006 parser_lex(parser);
16007
16008 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, NULL, &parser->previous, NULL);
16009 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16010 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16011
16012 return UP(symbol);
16013 }
16014 case PM_TOKEN_SYMBOL_BEGIN: {
16015 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16016 parser_lex(parser);
16017
16018 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16019 }
16020 default:
16021 pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
16022 return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
16023 }
16024}
16025
16032static PRISM_INLINE pm_node_t *
16033parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
16034 switch (parser->current.type) {
16035 case PM_CASE_OPERATOR:
16036 return parse_operator_symbol(parser, NULL, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
16037 case PM_CASE_KEYWORD:
16038 case PM_TOKEN_CONSTANT:
16039 case PM_TOKEN_IDENTIFIER:
16040 case PM_TOKEN_METHOD_NAME: {
16041 if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
16042 parser_lex(parser);
16043
16044 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, NULL, &parser->previous, NULL);
16045 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16046 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16047
16048 return UP(symbol);
16049 }
16050 case PM_TOKEN_SYMBOL_BEGIN: {
16051 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16052 parser_lex(parser);
16053
16054 return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16055 }
16056 case PM_TOKEN_BACK_REFERENCE:
16057 parser_lex(parser);
16058 return UP(pm_back_reference_read_node_create(parser, &parser->previous));
16059 case PM_TOKEN_NUMBERED_REFERENCE:
16060 parser_lex(parser);
16061 return UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
16062 case PM_TOKEN_GLOBAL_VARIABLE:
16063 parser_lex(parser);
16064 return UP(pm_global_variable_read_node_create(parser, &parser->previous));
16065 default:
16066 pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
16067 return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
16068 }
16069}
16070
16075static pm_node_t *
16076parse_variable(pm_parser_t *parser) {
16077 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
16078 int depth;
16079 bool is_numbered_param = pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous));
16080
16081 if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
16082 return UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false));
16083 }
16084
16085 pm_scope_t *current_scope = parser->current_scope;
16086 if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
16087 if (is_numbered_param) {
16088 // When you use a numbered parameter, it implies the existence of
16089 // all of the locals that exist before it. For example, referencing
16090 // _2 means that _1 must exist. Therefore here we loop through all
16091 // of the possibilities and add them into the constant pool.
16092 uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
16093 for (uint8_t number = 1; number <= maximum; number++) {
16094 pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
16095 }
16096
16097 if (!match1(parser, PM_TOKEN_EQUAL)) {
16098 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
16099 }
16100
16101 pm_node_t *node = UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false));
16102 pm_node_list_append(parser->arena, &current_scope->implicit_parameters, node);
16103
16104 return node;
16105 } else if ((parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
16106 pm_node_t *node = UP(pm_it_local_variable_read_node_create(parser, &parser->previous));
16107 pm_node_list_append(parser->arena, &current_scope->implicit_parameters, node);
16108
16109 return node;
16110 }
16111 }
16112
16113 return NULL;
16114}
16115
16119static pm_node_t *
16120parse_variable_call(pm_parser_t *parser) {
16121 pm_node_flags_t flags = 0;
16122
16123 if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
16124 pm_node_t *node = parse_variable(parser);
16125 if (node != NULL) return node;
16126 flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
16127 }
16128
16129 pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
16130 pm_node_flag_set(UP(node), flags);
16131
16132 return UP(node);
16133}
16134
16141parse_method_definition_name(pm_parser_t *parser) {
16142 switch (parser->current.type) {
16143 case PM_CASE_KEYWORD:
16144 case PM_TOKEN_CONSTANT:
16145 case PM_TOKEN_METHOD_NAME:
16146 parser_lex(parser);
16147 return parser->previous;
16148 case PM_TOKEN_IDENTIFIER:
16149 pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current));
16150 parser_lex(parser);
16151 return parser->previous;
16152 case PM_CASE_OPERATOR:
16153 lex_state_set(parser, PM_LEX_STATE_ENDFN);
16154 parser_lex(parser);
16155 return parser->previous;
16156 default:
16157 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_NAME, pm_token_str(parser->current.type));
16158 return (pm_token_t) { .type = 0, .start = parser->current.start, .end = parser->current.end };
16159 }
16160}
16161
16162static void
16163parse_heredoc_dedent_string(pm_arena_t *arena, pm_string_t *string, size_t common_whitespace) {
16164 // Make a writable copy in the arena if the string isn't already writable.
16165 // We keep a mutable pointer to the arena memory so we can memmove into it
16166 // below without casting away const from the string's source field.
16167 uint8_t *writable;
16168
16169 if (string->type != PM_STRING_OWNED) {
16170 size_t length = pm_string_length(string);
16171 writable = (uint8_t *) pm_arena_memdup(arena, pm_string_source(string), length, PRISM_ALIGNOF(uint8_t));
16172 pm_string_constant_init(string, (const char *) writable, length);
16173 } else {
16174 writable = (uint8_t *) string->source;
16175 }
16176
16177 // Now get the bounds of the existing string. We'll use this as a
16178 // destination to move bytes into. We'll also use it for bounds checking
16179 // since we don't require that these strings be null terminated.
16180 size_t dest_length = pm_string_length(string);
16181 const uint8_t *source_cursor = writable;
16182 const uint8_t *source_end = source_cursor + dest_length;
16183
16184 // We're going to move bytes backward in the string when we get leading
16185 // whitespace, so we'll maintain a pointer to the current position in the
16186 // string that we're writing to.
16187 size_t trimmed_whitespace = 0;
16188
16189 // While we haven't reached the amount of common whitespace that we need to
16190 // trim and we haven't reached the end of the string, we'll keep trimming
16191 // whitespace. Trimming in this context means skipping over these bytes such
16192 // that they aren't copied into the new string.
16193 while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
16194 if (*source_cursor == '\t') {
16195 trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
16196 if (trimmed_whitespace > common_whitespace) break;
16197 } else {
16198 trimmed_whitespace++;
16199 }
16200
16201 source_cursor++;
16202 dest_length--;
16203 }
16204
16205 memmove(writable, source_cursor, (size_t) (source_end - source_cursor));
16206 string->length = dest_length;
16207}
16208
16213static PRISM_INLINE bool
16214heredoc_dedent_discard_string_node(pm_parser_t *parser, pm_string_node_t *string_node) {
16215 if (string_node->unescaped.length == 0) {
16216 const uint8_t *cursor = parser->start + PM_LOCATION_START(&string_node->content_loc);
16217 return pm_memchr(cursor, '\\', string_node->content_loc.length, parser->encoding_changed, parser->encoding) == NULL;
16218 }
16219 return false;
16220}
16221
16225static void
16226parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
16227 // The next node should be dedented if it's the first node in the list or if
16228 // it follows a string node.
16229 bool dedent_next = true;
16230
16231 // Iterate over all nodes, and trim whitespace accordingly. We're going to
16232 // keep around two indices: a read and a write.
16233 size_t write_index = 0;
16234
16235 pm_node_t *node;
16236 PM_NODE_LIST_FOREACH(nodes, read_index, node) {
16237 // We're not manipulating child nodes that aren't strings. In this case
16238 // we'll skip past it and indicate that the subsequent node should not
16239 // be dedented.
16240 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) {
16241 nodes->nodes[write_index++] = node;
16242 dedent_next = false;
16243 continue;
16244 }
16245
16246 pm_string_node_t *string_node = ((pm_string_node_t *) node);
16247 if (dedent_next) {
16248 parse_heredoc_dedent_string(parser->arena, &string_node->unescaped, common_whitespace);
16249 }
16250
16251 if (heredoc_dedent_discard_string_node(parser, string_node)) {
16252 } else {
16253 nodes->nodes[write_index++] = node;
16254 }
16255
16256 // We always dedent the next node if it follows a string node.
16257 dedent_next = true;
16258 }
16259
16260 nodes->size = write_index;
16261}
16262
16266static pm_token_t
16267parse_strings_empty_content(const uint8_t *location) {
16268 return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
16269}
16270
16274static PRISM_INLINE pm_node_t *
16275parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
16276 assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
16277 bool concating = false;
16278
16279 while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16280 pm_node_t *node = NULL;
16281
16282 // Here we have found a string literal. We'll parse it and add it to
16283 // the list of strings.
16284 const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
16285 assert(lex_mode->mode == PM_LEX_STRING);
16286 bool lex_interpolation = lex_mode->as.string.interpolation;
16287 bool label_allowed = lex_mode->as.string.label_allowed && accepts_label;
16288
16289 pm_token_t opening = parser->current;
16290 parser_lex(parser);
16291
16292 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16293 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16294 // If we get here, then we have an end immediately after a
16295 // start. In that case we'll create an empty content token and
16296 // return an uninterpolated string.
16297 pm_token_t content = parse_strings_empty_content(parser->previous.start);
16298 pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
16299
16300 pm_string_shared_init(&string->unescaped, content.start, content.end);
16301 node = UP(string);
16302 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16303 // If we get here, then we have an end of a label immediately
16304 // after a start. In that case we'll create an empty symbol
16305 // node.
16306 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, NULL, &parser->previous);
16307 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.start);
16308 node = UP(symbol);
16309
16310 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16311 } else if (!lex_interpolation) {
16312 // If we don't accept interpolation then we expect the string to
16313 // start with a single string content node.
16314 pm_string_t unescaped;
16315 pm_token_t content;
16316
16317 if (match1(parser, PM_TOKEN_EOF)) {
16318 unescaped = PM_STRING_EMPTY;
16319 content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->start, .end = parser->start };
16320 } else {
16321 unescaped = parser->current_string;
16322 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16323 content = parser->previous;
16324 }
16325
16326 // It is unfortunately possible to have multiple string content
16327 // nodes in a row in the case that there's heredoc content in
16328 // the middle of the string, like this cursed example:
16329 //
16330 // <<-END+'b
16331 // a
16332 // END
16333 // c'+'d'
16334 //
16335 // In that case we need to switch to an interpolated string to
16336 // be able to contain all of the parts.
16337 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16338 pm_node_list_t parts = { 0 };
16339 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &unescaped));
16340 pm_node_list_append(parser->arena, &parts, part);
16341
16342 do {
16343 part = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
16344 pm_node_list_append(parser->arena, &parts, part);
16345 parser_lex(parser);
16346 } while (match1(parser, PM_TOKEN_STRING_CONTENT));
16347
16348 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16349 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16350 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16351 node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
16352 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16353 } else if (match1(parser, PM_TOKEN_EOF)) {
16354 pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16355 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
16356 } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16357 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
16358 } else {
16359 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_str(parser->previous.type));
16360 parser->previous.start = parser->previous.end;
16361 parser->previous.type = 0;
16362 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
16363 }
16364 } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16365 // In this case we've hit string content so we know the string
16366 // at least has something in it. We'll need to check if the
16367 // following token is the end (in which case we can return a
16368 // plain string) or if it's not then it has interpolation.
16369 pm_token_t content = parser->current;
16370 pm_string_t unescaped = parser->current_string;
16371 parser_lex(parser);
16372
16373 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16374 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
16375 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16376
16377 // Kind of odd behavior, but basically if we have an
16378 // unterminated string and it ends in a newline, we back up one
16379 // character so that the error message is on the last line of
16380 // content in the string.
16381 if (!accept1(parser, PM_TOKEN_STRING_END)) {
16382 const uint8_t *location = parser->previous.end;
16383 if (location > parser->start && location[-1] == '\n') location--;
16384 pm_parser_err(parser, U32(location - parser->start), 0, PM_ERR_STRING_LITERAL_EOF);
16385
16386 parser->previous.start = parser->previous.end;
16387 parser->previous.type = 0;
16388 }
16389 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16390 node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
16391 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16392 } else {
16393 // If we get here, then we have interpolation so we'll need
16394 // to create a string or symbol node with interpolation.
16395 pm_node_list_t parts = { 0 };
16396 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
16397 pm_node_flag_set(part, parse_unescaped_encoding(parser));
16398 pm_node_list_append(parser->arena, &parts, part);
16399
16400 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16401 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16402 pm_node_list_append(parser->arena, &parts, part);
16403 }
16404 }
16405
16406 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16407 node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
16408 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16409 } else if (match1(parser, PM_TOKEN_EOF)) {
16410 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16411 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
16412 } else {
16413 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16414 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16415 }
16416 }
16417 } else {
16418 // If we get here, then the first part of the string is not plain
16419 // string content, in which case we need to parse the string as an
16420 // interpolated string.
16421 pm_node_list_t parts = { 0 };
16422 pm_node_t *part;
16423
16424 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16425 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16426 pm_node_list_append(parser->arena, &parts, part);
16427 }
16428 }
16429
16430 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16431 node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
16432 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16433 } else if (match1(parser, PM_TOKEN_EOF)) {
16434 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16435 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
16436 } else {
16437 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16438 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16439 }
16440 }
16441
16442 if (current == NULL) {
16443 // If the node we just parsed is a symbol node, then we can't
16444 // concatenate it with anything else, so we can now return that
16445 // node.
16446 if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
16447 return node;
16448 }
16449
16450 // If we don't already have a node, then it's fine and we can just
16451 // set the result to be the node we just parsed.
16452 current = node;
16453 } else {
16454 // Otherwise we need to check the type of the node we just parsed.
16455 // If it cannot be concatenated with the previous node, then we'll
16456 // need to add a syntax error.
16457 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
16458 pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16459 }
16460
16461 // If we haven't already created our container for concatenation,
16462 // we'll do that now.
16463 if (!concating) {
16464 if (!PM_NODE_TYPE_P(current, PM_STRING_NODE) && !PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
16465 pm_parser_err_node(parser, current, PM_ERR_STRING_CONCATENATION);
16466 }
16467
16468 concating = true;
16469 pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
16470 pm_interpolated_string_node_append(parser, container, current);
16471 current = UP(container);
16472 }
16473
16474 pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, node);
16475 }
16476 }
16477
16478 return current;
16479}
16480
16481#define PM_PARSE_PATTERN_SINGLE 0
16482#define PM_PARSE_PATTERN_TOP 1
16483#define PM_PARSE_PATTERN_MULTI 2
16484
16485static pm_node_t *
16486parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
16487
16493static void
16494parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
16495 // Skip this capture if it starts with an underscore.
16496 if (peek_at(parser, parser->start + location->start) == '_') return;
16497
16498 if (pm_constant_id_list_includes(captures, capture)) {
16499 pm_parser_err(parser, location->start, location->length, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
16500 } else {
16501 pm_constant_id_list_append(parser->arena, captures, capture);
16502 }
16503}
16504
16508static pm_node_t *
16509parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node, uint16_t depth) {
16510 // Now, if there are any :: operators that follow, parse them as constant
16511 // path nodes.
16512 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
16513 pm_token_t delimiter = parser->previous;
16514 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16515 node = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
16516 }
16517
16518 // If there is a [ or ( that follows, then this is part of a larger pattern
16519 // expression. We'll parse the inner pattern here, then modify the returned
16520 // inner pattern with our constant path attached.
16521 if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
16522 return node;
16523 }
16524
16525 pm_token_t opening;
16526 pm_token_t closing;
16527 pm_node_t *inner = NULL;
16528
16529 if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
16530 opening = parser->previous;
16531 accept1(parser, PM_TOKEN_NEWLINE);
16532
16533 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16534 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16535 accept1(parser, PM_TOKEN_NEWLINE);
16536 expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
16537 }
16538
16539 closing = parser->previous;
16540 } else {
16541 parser_lex(parser);
16542 opening = parser->previous;
16543 accept1(parser, PM_TOKEN_NEWLINE);
16544
16545 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
16546 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16547 accept1(parser, PM_TOKEN_NEWLINE);
16548 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
16549 }
16550
16551 closing = parser->previous;
16552 }
16553
16554 if (!inner) {
16555 // If there was no inner pattern, then we have something like Foo() or
16556 // Foo[]. In that case we'll create an array pattern with no requireds.
16557 return UP(pm_array_pattern_node_constant_create(parser, node, &opening, &closing));
16558 }
16559
16560 // Now that we have the inner pattern, check to see if it's an array, find,
16561 // or hash pattern. If it is, then we'll attach our constant path to it if
16562 // it doesn't already have a constant. If it's not one of those node types
16563 // or it does have a constant, then we'll create an array pattern.
16564 switch (PM_NODE_TYPE(inner)) {
16565 case PM_ARRAY_PATTERN_NODE: {
16566 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16567
16568 if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
16569 PM_NODE_START_SET_NODE(pattern_node, node);
16570 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16571
16572 pattern_node->constant = node;
16573 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16574 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16575
16576 return UP(pattern_node);
16577 }
16578
16579 break;
16580 }
16581 case PM_FIND_PATTERN_NODE: {
16582 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16583
16584 if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
16585 PM_NODE_START_SET_NODE(pattern_node, node);
16586 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16587
16588 pattern_node->constant = node;
16589 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16590 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16591
16592 return UP(pattern_node);
16593 }
16594
16595 break;
16596 }
16597 case PM_HASH_PATTERN_NODE: {
16598 pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
16599
16600 if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
16601 PM_NODE_START_SET_NODE(pattern_node, node);
16602 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16603
16604 pattern_node->constant = node;
16605 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16606 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16607
16608 return UP(pattern_node);
16609 }
16610
16611 break;
16612 }
16613 default:
16614 break;
16615 }
16616
16617 // If we got here, then we didn't return one of the inner patterns by
16618 // attaching its constant. In this case we'll create an array pattern and
16619 // attach our constant to it.
16620 pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16621 pm_array_pattern_node_requireds_append(parser->arena, pattern_node, inner);
16622 return UP(pattern_node);
16623}
16624
16628static pm_splat_node_t *
16629parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16630 assert(parser->previous.type == PM_TOKEN_USTAR);
16631 pm_token_t operator = parser->previous;
16632 pm_node_t *name = NULL;
16633
16634 // Rest patterns don't necessarily have a name associated with them. So we
16635 // will check for that here. If they do, then we'll add it to the local
16636 // table since this pattern will cause it to become a local variable.
16637 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16638 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16639
16640 int depth;
16641 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16642 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16643 }
16644
16645 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
16646 name = UP(pm_local_variable_target_node_create(
16647 parser,
16648 &TOK2LOC(parser, &parser->previous),
16649 constant_id,
16650 (uint32_t) (depth == -1 ? 0 : depth)
16651 ));
16652 }
16653
16654 // Finally we can return the created node.
16655 return pm_splat_node_create(parser, &operator, name);
16656}
16657
16661static pm_node_t *
16662parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16663 assert(parser->current.type == PM_TOKEN_USTAR_STAR);
16664 parser_lex(parser);
16665
16666 pm_token_t operator = parser->previous;
16667 pm_node_t *value = NULL;
16668
16669 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
16670 return UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
16671 }
16672
16673 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16674 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16675
16676 int depth;
16677 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16678 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16679 }
16680
16681 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
16682 value = UP(pm_local_variable_target_node_create(
16683 parser,
16684 &TOK2LOC(parser, &parser->previous),
16685 constant_id,
16686 (uint32_t) (depth == -1 ? 0 : depth)
16687 ));
16688 }
16689
16690 return UP(pm_assoc_splat_node_create(parser, value, &operator));
16691}
16692
16697static bool
16698pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
16699 ptrdiff_t length = end - start;
16700 if (length == 0) return false;
16701
16702 // First ensure that it starts with a valid identifier starting character.
16703 size_t width = char_is_identifier_start(parser, start, end - start);
16704 if (width == 0) return false;
16705
16706 // Next, ensure that it's not an uppercase character.
16707 if (parser->encoding_changed) {
16708 if (parser->encoding->isupper_char(start, length)) return false;
16709 } else {
16710 if (pm_encoding_utf_8_isupper_char(start, length)) return false;
16711 }
16712
16713 // Next, iterate through all of the bytes of the string to ensure that they
16714 // are all valid identifier characters.
16715 const uint8_t *cursor = start + width;
16716 while ((width = char_is_identifier(parser, cursor, end - cursor))) cursor += width;
16717 return cursor == end;
16718}
16719
16724static pm_node_t *
16725parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
16726 const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
16727 const uint8_t *start = parser->start + PM_LOCATION_START(value_loc);
16728 const uint8_t *end = parser->start + PM_LOCATION_END(value_loc);
16729
16730 pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, start, end);
16731 int depth = -1;
16732
16733 if (pm_slice_is_valid_local(parser, start, end)) {
16734 depth = pm_parser_local_depth_constant_id(parser, constant_id);
16735 } else {
16736 pm_parser_err(parser, PM_NODE_START(key), PM_NODE_LENGTH(key), PM_ERR_PATTERN_HASH_KEY_LOCALS);
16737
16738 if ((end > start) && ((end[-1] == '!') || (end[-1] == '?'))) {
16739 PM_PARSER_ERR_FORMAT(parser, value_loc->start, value_loc->length, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (end - start), (const char *) start);
16740 }
16741 }
16742
16743 if (depth == -1) {
16744 pm_parser_local_add(parser, constant_id, start, end, 0);
16745 }
16746
16747 parse_pattern_capture(parser, captures, constant_id, value_loc);
16748 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
16749 parser,
16750 value_loc,
16751 constant_id,
16752 (uint32_t) (depth == -1 ? 0 : depth)
16753 );
16754
16755 return UP(pm_implicit_node_create(parser, UP(target)));
16756}
16757
16762static void
16763parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
16764 if (pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, keys, node, true) != NULL) {
16765 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
16766 }
16767}
16768
16773parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, uint16_t depth) {
16774 pm_node_list_t assocs = { 0 };
16775 pm_static_literals_t keys = { 0 };
16776 pm_node_t *rest = NULL;
16777
16778 switch (PM_NODE_TYPE(first_node)) {
16779 case PM_ASSOC_SPLAT_NODE:
16780 case PM_NO_KEYWORDS_PARAMETER_NODE:
16781 rest = first_node;
16782 break;
16783 case PM_INTERPOLATED_SYMBOL_NODE:
16784 case PM_SYMBOL_NODE: {
16785 if (pm_symbol_node_label_p(parser, first_node)) {
16786 if (PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE)) {
16787 pm_parser_err_node(parser, first_node, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
16788 } else {
16789 parse_pattern_hash_key(parser, &keys, first_node);
16790 }
16791
16792 pm_node_t *value;
16793
16794 if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
16795 if (PM_NODE_TYPE_P(first_node, PM_SYMBOL_NODE)) {
16796 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
16797 } else {
16798 value = UP(pm_error_recovery_node_create(parser, PM_NODE_END(first_node), 0));
16799 }
16800 } else {
16801 // Here we have a value for the first assoc in the list, so
16802 // we will parse it now.
16803 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
16804 }
16805
16806 pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, NULL, value));
16807 pm_node_list_append(parser->arena, &assocs, assoc);
16808 break;
16809 }
16810 }
16812 default: {
16813 // If we get anything else, then this is an error. For this we'll
16814 // create a missing node for the value and create an assoc node for
16815 // the first node in the list.
16816 pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
16817 pm_parser_err_node(parser, first_node, diag_id);
16818
16819 pm_node_t *value = UP(pm_error_recovery_node_create(parser, PM_NODE_START(first_node), PM_NODE_LENGTH(first_node)));
16820 pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, NULL, value));
16821
16822 pm_node_list_append(parser->arena, &assocs, assoc);
16823 break;
16824 }
16825 }
16826
16827 // If there are any other assocs, then we'll parse them now.
16828 while (accept1(parser, PM_TOKEN_COMMA)) {
16829 // Here we need to break to support trailing commas.
16830 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
16831 // Trailing commas are not allowed to follow a rest pattern.
16832 if (rest != NULL) {
16833 pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16834 }
16835
16836 break;
16837 }
16838
16839 if (match1(parser, PM_TOKEN_USTAR_STAR)) {
16840 pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures);
16841
16842 if (rest == NULL) {
16843 rest = assoc;
16844 } else {
16845 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16846 pm_node_list_append(parser->arena, &assocs, assoc);
16847 }
16848 } else {
16849 pm_node_t *key;
16850
16851 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16852 key = parse_strings(parser, NULL, true, (uint16_t) (depth + 1));
16853
16854 if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
16855 pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
16856 } else if (!pm_symbol_node_label_p(parser, key)) {
16857 pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16858 }
16859 } else if (accept1(parser, PM_TOKEN_LABEL)) {
16860 key = UP(pm_symbol_node_label_create(parser, &parser->previous));
16861 } else {
16862 expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16863
16864 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = parser->previous.end, .end = parser->previous.end };
16865 key = UP(pm_symbol_node_create(parser, NULL, &label, NULL));
16866 }
16867
16868 parse_pattern_hash_key(parser, &keys, key);
16869 pm_node_t *value = NULL;
16870
16871 if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
16872 if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) {
16873 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
16874 } else {
16875 value = UP(pm_error_recovery_node_create(parser, PM_NODE_END(key), 0));
16876 }
16877 } else {
16878 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
16879 }
16880
16881 pm_node_t *assoc = UP(pm_assoc_node_create(parser, key, NULL, value));
16882
16883 if (rest != NULL) {
16884 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16885 }
16886
16887 pm_node_list_append(parser->arena, &assocs, assoc);
16888 }
16889 }
16890
16891 pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
16892 // assocs.nodes is arena-allocated; no explicit free needed.
16893
16894 pm_static_literals_free(&keys);
16895 return node;
16896}
16897
16901static pm_node_t *
16902parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id, uint16_t depth) {
16903 switch (parser->current.type) {
16904 case PM_TOKEN_IDENTIFIER:
16905 case PM_TOKEN_METHOD_NAME: {
16906 parser_lex(parser);
16907 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16908
16909 int depth;
16910 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16911 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16912 }
16913
16914 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
16915 return UP(pm_local_variable_target_node_create(
16916 parser,
16917 &TOK2LOC(parser, &parser->previous),
16918 constant_id,
16919 (uint32_t) (depth == -1 ? 0 : depth)
16920 ));
16921 }
16922 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
16923 pm_token_t opening = parser->current;
16924 parser_lex(parser);
16925
16926 if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16927 // If we have an empty array pattern, then we'll just return a new
16928 // array pattern node.
16929 return UP(pm_array_pattern_node_empty_create(parser, &opening, &parser->previous));
16930 }
16931
16932 // Otherwise, we'll parse the inner pattern, then deal with it depending
16933 // on the type it returns.
16934 pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16935
16936 accept1(parser, PM_TOKEN_NEWLINE);
16937 expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
16938 pm_token_t closing = parser->previous;
16939
16940 switch (PM_NODE_TYPE(inner)) {
16941 case PM_ARRAY_PATTERN_NODE: {
16942 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16943 if (pattern_node->opening_loc.length == 0) {
16944 PM_NODE_START_SET_TOKEN(parser, pattern_node, &opening);
16945 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16946
16947 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16948 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16949
16950 return UP(pattern_node);
16951 }
16952
16953 break;
16954 }
16955 case PM_FIND_PATTERN_NODE: {
16956 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16957 if (pattern_node->opening_loc.length == 0) {
16958 PM_NODE_START_SET_TOKEN(parser, pattern_node, &opening);
16959 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16960
16961 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16962 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16963
16964 return UP(pattern_node);
16965 }
16966
16967 break;
16968 }
16969 default:
16970 break;
16971 }
16972
16973 pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
16974 pm_array_pattern_node_requireds_append(parser->arena, node, inner);
16975 return UP(node);
16976 }
16977 case PM_TOKEN_BRACE_LEFT: {
16978 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
16979 parser->pattern_matching_newlines = false;
16980
16982 pm_token_t opening = parser->current;
16983 parser_lex(parser);
16984
16985 if (accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
16986 // If we have an empty hash pattern, then we'll just return a new hash
16987 // pattern node.
16988 node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
16989 } else {
16990 pm_node_t *first_node;
16991
16992 switch (parser->current.type) {
16993 case PM_TOKEN_LABEL:
16994 parser_lex(parser);
16995 first_node = UP(pm_symbol_node_label_create(parser, &parser->previous));
16996 break;
16997 case PM_TOKEN_USTAR_STAR:
16998 first_node = parse_pattern_keyword_rest(parser, captures);
16999 break;
17000 case PM_TOKEN_STRING_BEGIN:
17001 first_node = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_DO_BLOCK | PM_PARSE_ACCEPTS_LABEL, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
17002 break;
17003 default: {
17004 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_str(parser->current.type));
17005 parser_lex(parser);
17006
17007 first_node = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
17008 break;
17009 }
17010 }
17011
17012 node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
17013
17014 accept1(parser, PM_TOKEN_NEWLINE);
17015 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE, &opening);
17016 pm_token_t closing = parser->previous;
17017
17018 PM_NODE_START_SET_TOKEN(parser, node, &opening);
17019 PM_NODE_LENGTH_SET_TOKEN(parser, node, &closing);
17020
17021 node->opening_loc = TOK2LOC(parser, &opening);
17022 node->closing_loc = TOK2LOC(parser, &closing);
17023 }
17024
17025 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17026 return UP(node);
17027 }
17028 case PM_TOKEN_UDOT_DOT:
17029 case PM_TOKEN_UDOT_DOT_DOT: {
17030 pm_token_t operator = parser->current;
17031 parser_lex(parser);
17032
17033 // Since we have a unary range operator, we need to parse the subsequent
17034 // expression as the right side of the range.
17035 switch (parser->current.type) {
17036 case PM_CASE_PRIMITIVE: {
17037 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17038 return UP(pm_range_node_create(parser, NULL, &operator, right));
17039 }
17040 default: {
17041 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
17042 pm_node_t *right = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &operator), PM_TOKEN_LENGTH(&operator)));
17043 return UP(pm_range_node_create(parser, NULL, &operator, right));
17044 }
17045 }
17046 }
17047 case PM_CASE_PRIMITIVE: {
17048 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_LABEL | PM_PARSE_ACCEPTS_DO_BLOCK, diag_id, (uint16_t) (depth + 1));
17049
17050 // If we found a label, we need to immediately return to the caller.
17051 if (pm_symbol_node_label_p(parser, node)) return node;
17052
17053 // Call nodes (arithmetic operations) are not allowed in patterns
17054 if (PM_NODE_TYPE(node) == PM_CALL_NODE) {
17055 pm_parser_err_node(parser, node, diag_id);
17056 return UP(pm_error_recovery_node_create_unexpected(parser, node));
17057 }
17058
17059 // Now that we have a primitive, we need to check if it's part of a range.
17060 if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
17061 pm_token_t operator = parser->previous;
17062
17063 // Now that we have the operator, we need to check if this is followed
17064 // by another expression. If it is, then we will create a full range
17065 // node. Otherwise, we'll create an endless range.
17066 switch (parser->current.type) {
17067 case PM_CASE_PRIMITIVE: {
17068 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17069 return UP(pm_range_node_create(parser, node, &operator, right));
17070 }
17071 default:
17072 return UP(pm_range_node_create(parser, node, &operator, NULL));
17073 }
17074 }
17075
17076 return node;
17077 }
17078 case PM_TOKEN_CARET: {
17079 parser_lex(parser);
17080 pm_token_t operator = parser->previous;
17081
17082 // At this point we have a pin operator. We need to check the subsequent
17083 // expression to determine if it's a variable or an expression.
17084 switch (parser->current.type) {
17085 case PM_TOKEN_IDENTIFIER: {
17086 parser_lex(parser);
17087 pm_node_t *variable = UP(parse_variable(parser));
17088
17089 if (variable == NULL) {
17090 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
17091 variable = UP(pm_local_variable_read_node_missing_create(parser, &parser->previous, 0));
17092 }
17093
17094 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
17095 }
17096 case PM_TOKEN_INSTANCE_VARIABLE: {
17097 parser_lex(parser);
17098 pm_node_t *variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
17099
17100 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
17101 }
17102 case PM_TOKEN_CLASS_VARIABLE: {
17103 parser_lex(parser);
17104 pm_node_t *variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
17105
17106 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
17107 }
17108 case PM_TOKEN_GLOBAL_VARIABLE: {
17109 parser_lex(parser);
17110 pm_node_t *variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
17111
17112 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
17113 }
17114 case PM_TOKEN_NUMBERED_REFERENCE: {
17115 parser_lex(parser);
17116 pm_node_t *variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
17117
17118 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
17119 }
17120 case PM_TOKEN_BACK_REFERENCE: {
17121 parser_lex(parser);
17122 pm_node_t *variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
17123
17124 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
17125 }
17126 case PM_TOKEN_PARENTHESIS_LEFT: {
17127 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17128 parser->pattern_matching_newlines = false;
17129
17130 pm_token_t lparen = parser->current;
17131 parser_lex(parser);
17132
17133 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_PARSE_ACCEPTS_DO_BLOCK | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
17134 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17135
17136 accept1(parser, PM_TOKEN_NEWLINE);
17137 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &lparen);
17138 return UP(pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous));
17139 }
17140 default: {
17141 // If we get here, then we have a pin operator followed by something
17142 // not understood. We'll create a missing node and return that.
17143 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
17144 pm_node_t *variable = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &operator), PM_TOKEN_LENGTH(&operator)));
17145 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
17146 }
17147 }
17148 }
17149 case PM_TOKEN_UCOLON_COLON: {
17150 pm_token_t delimiter = parser->current;
17151 parser_lex(parser);
17152
17153 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17154 pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
17155
17156 return parse_pattern_constant_path(parser, captures, UP(node), (uint16_t) (depth + 1));
17157 }
17158 case PM_TOKEN_CONSTANT: {
17159 pm_token_t constant = parser->current;
17160 parser_lex(parser);
17161
17162 pm_node_t *node = UP(pm_constant_read_node_create(parser, &constant));
17163 return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
17164 }
17165 default:
17166 pm_parser_err_current(parser, diag_id);
17167 return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
17168 }
17169}
17170
17171static bool
17172parse_pattern_alternation_error_each(const pm_node_t *node, void *data) {
17173 switch (PM_NODE_TYPE(node)) {
17174 case PM_LOCAL_VARIABLE_TARGET_NODE: {
17175 pm_parser_t *parser = (pm_parser_t *) data;
17176 pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE);
17177 return false;
17178 }
17179 default:
17180 return true;
17181 }
17182}
17183
17188static void
17189parse_pattern_alternation_error(pm_parser_t *parser, const pm_node_t *node) {
17190 pm_visit_node(node, parse_pattern_alternation_error_each, parser);
17191}
17192
17197static pm_node_t *
17198parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
17199 pm_node_t *node = first_node;
17200 bool alternation = false;
17201
17202 while ((node == NULL) || (alternation = accept1(parser, PM_TOKEN_PIPE))) {
17203 if (alternation && !PM_NODE_TYPE_P(node, PM_ALTERNATION_PATTERN_NODE) && captures->size) {
17204 parse_pattern_alternation_error(parser, node);
17205 }
17206
17207 switch (parser->current.type) {
17208 case PM_TOKEN_IDENTIFIER:
17209 case PM_TOKEN_BRACKET_LEFT_ARRAY:
17210 case PM_TOKEN_BRACE_LEFT:
17211 case PM_TOKEN_CARET:
17212 case PM_TOKEN_CONSTANT:
17213 case PM_TOKEN_UCOLON_COLON:
17214 case PM_TOKEN_UDOT_DOT:
17215 case PM_TOKEN_UDOT_DOT_DOT:
17216 case PM_CASE_PRIMITIVE: {
17217 if (!alternation) {
17218 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17219 } else {
17220 pm_token_t operator = parser->previous;
17221 pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
17222
17223 if (captures->size) parse_pattern_alternation_error(parser, right);
17224 node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
17225 }
17226
17227 break;
17228 }
17229 case PM_TOKEN_PARENTHESIS_LEFT:
17230 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
17231 pm_token_t operator = parser->previous;
17232 pm_token_t opening = parser->current;
17233 parser_lex(parser);
17234
17235 pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
17236 accept1(parser, PM_TOKEN_NEWLINE);
17237 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
17238 pm_node_t *right = UP(pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0));
17239
17240 if (!alternation) {
17241 node = right;
17242 } else {
17243 if (captures->size) parse_pattern_alternation_error(parser, right);
17244 node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
17245 }
17246
17247 break;
17248 }
17249 default: {
17250 pm_parser_err_current(parser, diag_id);
17251 pm_node_t *right = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
17252
17253 if (!alternation) {
17254 node = right;
17255 } else {
17256 if (captures->size) parse_pattern_alternation_error(parser, right);
17257 node = UP(pm_alternation_pattern_node_create(parser, node, right, &parser->previous));
17258 }
17259
17260 break;
17261 }
17262 }
17263 }
17264
17265 // If we have an =>, then we are assigning this pattern to a variable.
17266 // In this case we should create an assignment node.
17267 while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
17268 pm_token_t operator = parser->previous;
17269 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
17270
17271 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17272 int depth;
17273
17274 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17275 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17276 }
17277
17278 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
17279 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17280 parser,
17281 &TOK2LOC(parser, &parser->previous),
17282 constant_id,
17283 (uint32_t) (depth == -1 ? 0 : depth)
17284 );
17285
17286 node = UP(pm_capture_pattern_node_create(parser, node, target, &operator));
17287 }
17288
17289 return node;
17290}
17291
17295static pm_node_t *
17296parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
17297 pm_node_t *node = NULL;
17298
17299 bool leading_rest = false;
17300 bool trailing_rest = false;
17301
17302 switch (parser->current.type) {
17303 case PM_TOKEN_LABEL: {
17304 parser_lex(parser);
17305 pm_node_t *key = UP(pm_symbol_node_label_create(parser, &parser->previous));
17306 node = UP(parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1)));
17307
17308 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17309 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17310 }
17311
17312 return node;
17313 }
17314 case PM_TOKEN_USTAR_STAR: {
17315 node = parse_pattern_keyword_rest(parser, captures);
17316 node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
17317
17318 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17319 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17320 }
17321
17322 return node;
17323 }
17324 case PM_TOKEN_STRING_BEGIN: {
17325 // We need special handling for string beginnings because they could
17326 // be dynamic symbols leading to hash patterns.
17327 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17328
17329 if (pm_symbol_node_label_p(parser, node)) {
17330 node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
17331
17332 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17333 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17334 }
17335
17336 return node;
17337 }
17338
17339 node = parse_pattern_primitives(parser, captures, node, diag_id, (uint16_t) (depth + 1));
17340 break;
17341 }
17342 case PM_TOKEN_USTAR: {
17343 if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
17344 parser_lex(parser);
17345 node = UP(parse_pattern_rest(parser, captures));
17346 leading_rest = true;
17347 break;
17348 }
17349 }
17351 default:
17352 node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
17353 break;
17354 }
17355
17356 // If we got a dynamic label symbol, then we need to treat it like the
17357 // beginning of a hash pattern.
17358 if (pm_symbol_node_label_p(parser, node)) {
17359 return UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
17360 }
17361
17362 if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
17363 // If we have a comma, then we are now parsing either an array pattern
17364 // or a find pattern. We need to parse all of the patterns, put them
17365 // into a big list, and then determine which type of node we have.
17366 pm_node_list_t nodes = { 0 };
17367 pm_node_list_append(parser->arena, &nodes, node);
17368
17369 // Gather up all of the patterns into the list.
17370 while (accept1(parser, PM_TOKEN_COMMA)) {
17371 // Break early here in case we have a trailing comma.
17372 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
17373 node = UP(pm_implicit_rest_node_create(parser, &parser->previous));
17374 pm_node_list_append(parser->arena, &nodes, node);
17375 trailing_rest = true;
17376 break;
17377 }
17378
17379 if (accept1(parser, PM_TOKEN_USTAR)) {
17380 node = UP(parse_pattern_rest(parser, captures));
17381
17382 // If we have already parsed a splat pattern, then this is an
17383 // error. We will continue to parse the rest of the patterns,
17384 // but we will indicate it as an error.
17385 if (trailing_rest) {
17386 pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
17387 }
17388
17389 trailing_rest = true;
17390 } else {
17391 node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
17392 }
17393
17394 pm_node_list_append(parser->arena, &nodes, node);
17395 }
17396
17397 // If the first pattern and the last pattern are rest patterns, then we
17398 // will call this a find pattern, regardless of how many rest patterns
17399 // are in between because we know we already added the appropriate
17400 // errors. Otherwise we will create an array pattern.
17401 if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
17402 node = UP(pm_find_pattern_node_create(parser, &nodes));
17403
17404 if (nodes.size == 2) {
17405 pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
17406 }
17407 } else {
17408 node = UP(pm_array_pattern_node_node_list_create(parser, &nodes));
17409
17410 if (leading_rest && trailing_rest) {
17411 pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
17412 }
17413 }
17414
17415 // nodes.nodes is arena-allocated; no explicit free needed.
17416 } else if (leading_rest) {
17417 // Otherwise, if we parsed a single splat pattern, then we know we have
17418 // an array pattern, so we can go ahead and create that node.
17419 node = UP(pm_array_pattern_node_rest_create(parser, node));
17420 }
17421
17422 return node;
17423}
17424
17430static PRISM_INLINE void
17431parse_negative_numeric(pm_node_t *node) {
17432 switch (PM_NODE_TYPE(node)) {
17433 case PM_INTEGER_NODE: {
17434 pm_integer_node_t *cast = (pm_integer_node_t *) node;
17435 cast->base.location.start--;
17436 cast->base.location.length++;
17437 cast->value.negative = true;
17438 break;
17439 }
17440 case PM_FLOAT_NODE: {
17441 pm_float_node_t *cast = (pm_float_node_t *) node;
17442 cast->base.location.start--;
17443 cast->base.location.length++;
17444 cast->value = -cast->value;
17445 break;
17446 }
17447 case PM_RATIONAL_NODE: {
17448 pm_rational_node_t *cast = (pm_rational_node_t *) node;
17449 cast->base.location.start--;
17450 cast->base.location.length++;
17451 cast->numerator.negative = true;
17452 break;
17453 }
17454 case PM_IMAGINARY_NODE:
17455 node->location.start--;
17456 node->location.length++;
17457 parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
17458 break;
17459 default:
17460 assert(false && "unreachable");
17461 break;
17462 }
17463}
17464
17470static void
17471pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
17472 switch (diag_id) {
17473 case PM_ERR_HASH_KEY: {
17474 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, pm_token_str(parser->previous.type));
17475 break;
17476 }
17477 case PM_ERR_HASH_VALUE:
17478 case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17479 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_str(parser->current.type));
17480 break;
17481 }
17482 case PM_ERR_UNARY_RECEIVER: {
17483 const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_str(parser->current.type));
17484 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, human, parser->previous.start[0]);
17485 break;
17486 }
17487 case PM_ERR_UNARY_DISALLOWED:
17488 case PM_ERR_EXPECT_ARGUMENT: {
17489 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_str(parser->current.type));
17490 break;
17491 }
17492 default:
17493 pm_parser_err_previous(parser, diag_id);
17494 break;
17495 }
17496}
17497
17501static void
17502parse_retry(pm_parser_t *parser, const pm_node_t *node) {
17503#define CONTEXT_NONE 0
17504#define CONTEXT_THROUGH_ENSURE 1
17505#define CONTEXT_THROUGH_ELSE 2
17506
17507 pm_context_node_t *context_node = parser->current_context;
17508 int context = CONTEXT_NONE;
17509
17510 while (context_node != NULL) {
17511 switch (context_node->context) {
17512 case PM_CONTEXT_BEGIN_RESCUE:
17513 case PM_CONTEXT_BLOCK_RESCUE:
17514 case PM_CONTEXT_CLASS_RESCUE:
17515 case PM_CONTEXT_DEF_RESCUE:
17516 case PM_CONTEXT_LAMBDA_RESCUE:
17517 case PM_CONTEXT_MODULE_RESCUE:
17518 case PM_CONTEXT_SCLASS_RESCUE:
17519 case PM_CONTEXT_DEFINED:
17520 case PM_CONTEXT_RESCUE_MODIFIER:
17521 // These are the good cases. We're allowed to have a retry here.
17522 return;
17523 case PM_CONTEXT_CLASS:
17524 case PM_CONTEXT_DEF:
17525 case PM_CONTEXT_DEF_PARAMS:
17526 case PM_CONTEXT_MAIN:
17527 case PM_CONTEXT_MODULE:
17528 case PM_CONTEXT_PREEXE:
17529 case PM_CONTEXT_SCLASS:
17530 // These are the bad cases. We're not allowed to have a retry in
17531 // these contexts.
17532 if (context == CONTEXT_NONE) {
17533 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE);
17534 } else if (context == CONTEXT_THROUGH_ENSURE) {
17535 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ENSURE);
17536 } else if (context == CONTEXT_THROUGH_ELSE) {
17537 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ELSE);
17538 }
17539 return;
17540 case PM_CONTEXT_BEGIN_ELSE:
17541 case PM_CONTEXT_BLOCK_ELSE:
17542 case PM_CONTEXT_CLASS_ELSE:
17543 case PM_CONTEXT_DEF_ELSE:
17544 case PM_CONTEXT_LAMBDA_ELSE:
17545 case PM_CONTEXT_MODULE_ELSE:
17546 case PM_CONTEXT_SCLASS_ELSE:
17547 // These are also bad cases, but with a more specific error
17548 // message indicating the else.
17549 context = CONTEXT_THROUGH_ELSE;
17550 break;
17551 case PM_CONTEXT_BEGIN_ENSURE:
17552 case PM_CONTEXT_BLOCK_ENSURE:
17553 case PM_CONTEXT_CLASS_ENSURE:
17554 case PM_CONTEXT_DEF_ENSURE:
17555 case PM_CONTEXT_LAMBDA_ENSURE:
17556 case PM_CONTEXT_MODULE_ENSURE:
17557 case PM_CONTEXT_SCLASS_ENSURE:
17558 // These are also bad cases, but with a more specific error
17559 // message indicating the ensure.
17560 context = CONTEXT_THROUGH_ENSURE;
17561 break;
17562 case PM_CONTEXT_NONE:
17563 // This case should never happen.
17564 assert(false && "unreachable");
17565 break;
17566 case PM_CONTEXT_BEGIN:
17567 case PM_CONTEXT_BLOCK_BRACES:
17568 case PM_CONTEXT_BLOCK_KEYWORDS:
17569 case PM_CONTEXT_BLOCK_PARAMETERS:
17570 case PM_CONTEXT_CASE_IN:
17571 case PM_CONTEXT_CASE_WHEN:
17572 case PM_CONTEXT_DEFAULT_PARAMS:
17573 case PM_CONTEXT_ELSE:
17574 case PM_CONTEXT_ELSIF:
17575 case PM_CONTEXT_EMBEXPR:
17576 case PM_CONTEXT_FOR_INDEX:
17577 case PM_CONTEXT_FOR:
17578 case PM_CONTEXT_IF:
17579 case PM_CONTEXT_LAMBDA_BRACES:
17580 case PM_CONTEXT_LAMBDA_DO_END:
17581 case PM_CONTEXT_LOOP_PREDICATE:
17582 case PM_CONTEXT_MULTI_TARGET:
17583 case PM_CONTEXT_PARENS:
17584 case PM_CONTEXT_POSTEXE:
17585 case PM_CONTEXT_PREDICATE:
17586 case PM_CONTEXT_TERNARY:
17587 case PM_CONTEXT_UNLESS:
17588 case PM_CONTEXT_UNTIL:
17589 case PM_CONTEXT_WHILE:
17590 // In these contexts we should continue walking up the list of
17591 // contexts.
17592 break;
17593 }
17594
17595 context_node = context_node->prev;
17596 }
17597
17598#undef CONTEXT_NONE
17599#undef CONTEXT_ENSURE
17600#undef CONTEXT_ELSE
17601}
17602
17606static void
17607parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17608 pm_context_node_t *context_node = parser->current_context;
17609
17610 while (context_node != NULL) {
17611 switch (context_node->context) {
17612 case PM_CONTEXT_DEF:
17613 case PM_CONTEXT_DEF_PARAMS:
17614 case PM_CONTEXT_DEFINED:
17615 case PM_CONTEXT_DEF_ENSURE:
17616 case PM_CONTEXT_DEF_RESCUE:
17617 case PM_CONTEXT_DEF_ELSE:
17618 // These are the good cases. We're allowed to have a block exit
17619 // in these contexts.
17620 return;
17621 case PM_CONTEXT_CLASS:
17622 case PM_CONTEXT_CLASS_ENSURE:
17623 case PM_CONTEXT_CLASS_RESCUE:
17624 case PM_CONTEXT_CLASS_ELSE:
17625 case PM_CONTEXT_MAIN:
17626 case PM_CONTEXT_MODULE:
17627 case PM_CONTEXT_MODULE_ENSURE:
17628 case PM_CONTEXT_MODULE_RESCUE:
17629 case PM_CONTEXT_MODULE_ELSE:
17630 case PM_CONTEXT_SCLASS:
17631 case PM_CONTEXT_SCLASS_RESCUE:
17632 case PM_CONTEXT_SCLASS_ENSURE:
17633 case PM_CONTEXT_SCLASS_ELSE:
17634 // These are the bad cases. We're not allowed to have a retry in
17635 // these contexts.
17636 pm_parser_err_node(parser, node, PM_ERR_INVALID_YIELD);
17637 return;
17638 case PM_CONTEXT_NONE:
17639 // This case should never happen.
17640 assert(false && "unreachable");
17641 break;
17642 case PM_CONTEXT_BEGIN:
17643 case PM_CONTEXT_BEGIN_ELSE:
17644 case PM_CONTEXT_BEGIN_ENSURE:
17645 case PM_CONTEXT_BEGIN_RESCUE:
17646 case PM_CONTEXT_BLOCK_BRACES:
17647 case PM_CONTEXT_BLOCK_KEYWORDS:
17648 case PM_CONTEXT_BLOCK_ELSE:
17649 case PM_CONTEXT_BLOCK_ENSURE:
17650 case PM_CONTEXT_BLOCK_PARAMETERS:
17651 case PM_CONTEXT_BLOCK_RESCUE:
17652 case PM_CONTEXT_CASE_IN:
17653 case PM_CONTEXT_CASE_WHEN:
17654 case PM_CONTEXT_DEFAULT_PARAMS:
17655 case PM_CONTEXT_ELSE:
17656 case PM_CONTEXT_ELSIF:
17657 case PM_CONTEXT_EMBEXPR:
17658 case PM_CONTEXT_FOR_INDEX:
17659 case PM_CONTEXT_FOR:
17660 case PM_CONTEXT_IF:
17661 case PM_CONTEXT_LAMBDA_BRACES:
17662 case PM_CONTEXT_LAMBDA_DO_END:
17663 case PM_CONTEXT_LAMBDA_ELSE:
17664 case PM_CONTEXT_LAMBDA_ENSURE:
17665 case PM_CONTEXT_LAMBDA_RESCUE:
17666 case PM_CONTEXT_LOOP_PREDICATE:
17667 case PM_CONTEXT_MULTI_TARGET:
17668 case PM_CONTEXT_PARENS:
17669 case PM_CONTEXT_POSTEXE:
17670 case PM_CONTEXT_PREDICATE:
17671 case PM_CONTEXT_PREEXE:
17672 case PM_CONTEXT_RESCUE_MODIFIER:
17673 case PM_CONTEXT_TERNARY:
17674 case PM_CONTEXT_UNLESS:
17675 case PM_CONTEXT_UNTIL:
17676 case PM_CONTEXT_WHILE:
17677 // In these contexts we should continue walking up the list of
17678 // contexts.
17679 break;
17680 }
17681
17682 context_node = context_node->prev;
17683 }
17684}
17685
17690static PRISM_INLINE bool
17691pm_call_node_command_p(const pm_call_node_t *node) {
17692 return (
17693 (node->opening_loc.length == 0) &&
17694 (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
17695 (node->arguments != NULL || node->block != NULL)
17696 );
17697}
17698
17704static bool
17705pm_command_call_value_p(const pm_node_t *node) {
17706 switch (PM_NODE_TYPE(node)) {
17707 case PM_CALL_NODE: {
17708 const pm_call_node_t *call = (const pm_call_node_t *) node;
17709
17710 // Command-style calls (e.g., foo bar, obj.foo bar).
17711 // Attribute writes (e.g., a.b = 1) are not commands.
17712 if (pm_call_node_command_p(call) && !PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE) && (call->receiver == NULL || call->call_operator_loc.length > 0)) {
17713 return true;
17714 }
17715
17716 // A `!` or `not` prefix wrapping a command call (e.g.,
17717 // `!foo bar`, `not foo bar`) is also a command-call value.
17718 if (call->receiver != NULL && call->arguments == NULL && call->opening_loc.length == 0 && call->call_operator_loc.length == 0) {
17719 return pm_command_call_value_p(call->receiver);
17720 }
17721
17722 return false;
17723 }
17724 case PM_SUPER_NODE: {
17725 const pm_super_node_t *cast = (const pm_super_node_t *) node;
17726 return cast->lparen_loc.length == 0 && (cast->arguments != NULL || cast->block != NULL);
17727 }
17728 case PM_YIELD_NODE: {
17729 const pm_yield_node_t *cast = (const pm_yield_node_t *) node;
17730 return cast->lparen_loc.length == 0 && cast->arguments != NULL;
17731 }
17732 case PM_RESCUE_MODIFIER_NODE:
17733 return pm_command_call_value_p(((const pm_rescue_modifier_node_t *) node)->expression);
17734 case PM_DEF_NODE: {
17735 const pm_def_node_t *cast = (const pm_def_node_t *) node;
17736 if (cast->equal_loc.length > 0 && cast->body != NULL) {
17737 const pm_node_t *body = cast->body;
17738 if (PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE)) {
17739 body = ((const pm_statements_node_t *) body)->body.nodes[((const pm_statements_node_t *) body)->body.size - 1];
17740 }
17741 return pm_command_call_value_p(body);
17742 }
17743 return false;
17744 }
17745 default:
17746 return false;
17747 }
17748}
17749
17756static bool
17757pm_block_call_p(const pm_node_t *node) {
17758 while (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
17759 const pm_call_node_t *call = (const pm_call_node_t *) node;
17760 if (call->opening_loc.length > 0) return false;
17761
17762 // Root: command with do-block (e.g., `foo bar do end`).
17763 if (call->arguments != NULL && call->block != NULL && PM_NODE_TYPE_P(call->block, PM_BLOCK_NODE)) {
17764 return true;
17765 }
17766
17767 // Walk up the receiver chain (e.g., `foo bar do end.baz`).
17768 if (call->call_operator_loc.length > 0 && call->receiver != NULL) {
17769 node = call->receiver;
17770 continue;
17771 }
17772
17773 return false;
17774 }
17775
17776 return false;
17777}
17778
17783static pm_node_t *
17784parse_case(pm_parser_t *parser, uint8_t flags, uint16_t depth) {
17785 size_t opening_newline_index = token_newline_index(parser);
17786 parser_lex(parser);
17787
17788 pm_token_t case_keyword = parser->previous;
17789 pm_node_t *predicate = NULL;
17790
17791 pm_node_list_t current_block_exits = { 0 };
17792 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
17793
17794 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
17795 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
17796 predicate = NULL;
17797 } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
17798 predicate = NULL;
17799 } else if (!token_begins_expression_p(parser->current.type)) {
17800 predicate = NULL;
17801 } else {
17802 predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
17803 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
17804 }
17805
17806 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
17807 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
17808 parser_lex(parser);
17809 pop_block_exits(parser, previous_block_exits);
17810 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
17811 return UP(pm_case_node_create(parser, &case_keyword, predicate, &parser->previous));
17812 }
17813
17814 /* At this point we can create a case node, though we don't yet know if it
17815 * is a case-in or case-when node. */
17816 pm_node_t *node;
17817
17818 if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
17819 pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, NULL);
17820 pm_static_literals_t literals = { 0 };
17821
17822 /* At this point we've seen a when keyword, so we know this is a
17823 * case-when node. We will continue to parse the when nodes until we hit
17824 * the end of the list. */
17825 while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
17826 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
17827 parser_lex(parser);
17828
17829 pm_token_t when_keyword = parser->previous;
17830 pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
17831
17832 do {
17833 if (accept1(parser, PM_TOKEN_USTAR)) {
17834 pm_token_t operator = parser->previous;
17835 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
17836
17837 pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
17838 pm_when_node_conditions_append(parser->arena, when_node, UP(splat_node));
17839
17840 if (PM_NODE_TYPE_P(expression, PM_ERROR_RECOVERY_NODE)) break;
17841 } else {
17842 pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
17843 pm_when_node_conditions_append(parser->arena, when_node, condition);
17844
17845 /* If we found a missing node, then this is a syntax error
17846 * and we should stop looping. */
17847 if (PM_NODE_TYPE_P(condition, PM_ERROR_RECOVERY_NODE)) break;
17848
17849 /* If this is a string node, then we need to mark it as
17850 * frozen because when clause strings are frozen. */
17851 if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
17852 pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
17853 } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
17854 pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
17855 }
17856
17857 pm_when_clause_static_literals_add(parser, &literals, condition);
17858 }
17859 } while (accept1(parser, PM_TOKEN_COMMA));
17860
17861 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
17862 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
17863 pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous);
17864 }
17865 } else {
17866 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
17867 pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous);
17868 }
17869
17870 if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
17871 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
17872 if (statements != NULL) {
17873 pm_when_node_statements_set(when_node, statements);
17874 }
17875 }
17876
17877 pm_case_node_condition_append(parser->arena, case_node, UP(when_node));
17878 }
17879
17880 /* If we didn't parse any conditions (in or when) then we need to
17881 * indicate that we have an error. */
17882 if (case_node->conditions.size == 0) {
17883 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
17884 }
17885
17886 pm_static_literals_free(&literals);
17887 node = UP(case_node);
17888 } else {
17889 pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate);
17890
17891 /* If this is a case-match node (i.e., it is a pattern matching case
17892 * statement) then we must have a predicate. */
17893 if (predicate == NULL) {
17894 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
17895 }
17896
17897 /* At this point we expect that we're parsing a case-in node. We will
17898 * continue to parse the in nodes until we hit the end of the list. */
17899 while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
17900 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
17901
17902 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17903 parser->pattern_matching_newlines = true;
17904
17905 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
17906 parser->command_start = false;
17907 parser_lex(parser);
17908
17909 pm_token_t in_keyword = parser->previous;
17910
17911 pm_constant_id_list_t captures = { 0 };
17912 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
17913
17914 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17915
17916 /* Since we're in the top-level of the case-in node we need to
17917 * check for guard clauses in the form of `if` or `unless`
17918 * statements. */
17919 if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
17920 pm_token_t keyword = parser->previous;
17921 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
17922 pattern = UP(pm_if_node_modifier_create(parser, pattern, &keyword, predicate));
17923 } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
17924 pm_token_t keyword = parser->previous;
17925 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
17926 pattern = UP(pm_unless_node_modifier_create(parser, pattern, &keyword, predicate));
17927 }
17928
17929 /* Now we need to check for the terminator of the in node's pattern.
17930 * It can be a newline or semicolon optionally followed by a `then`
17931 * keyword. */
17932 pm_token_t then_keyword = { 0 };
17933 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
17934 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
17935 then_keyword = parser->previous;
17936 }
17937 } else {
17938 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
17939 then_keyword = parser->previous;
17940 }
17941
17942 /* Now we can actually parse the statements associated with the in
17943 * node. */
17944 pm_statements_node_t *statements;
17945 if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
17946 statements = NULL;
17947 } else {
17948 statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
17949 }
17950
17951 /* Now that we have the full pattern and statements, we can create
17952 * the node and attach it to the case node. */
17953 pm_node_t *condition = UP(pm_in_node_create(parser, pattern, statements, &in_keyword, NTOK2PTR(then_keyword)));
17954 pm_case_match_node_condition_append(parser->arena, case_node, condition);
17955 }
17956
17957 /* If we didn't parse any conditions (in or when) then we need to
17958 * indicate that we have an error. */
17959 if (case_node->conditions.size == 0) {
17960 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
17961 }
17962
17963 node = UP(case_node);
17964 }
17965
17966 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
17967 if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
17968 pm_token_t else_keyword = parser->previous;
17969 pm_else_node_t *else_node;
17970
17971 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
17972 else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
17973 } else {
17974 else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
17975 }
17976
17977 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
17978 pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
17979 } else {
17980 pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
17981 }
17982 }
17983
17984 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
17985 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM, &case_keyword);
17986
17987 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
17988 pm_case_node_end_keyword_loc_set(parser, (pm_case_node_t *) node, &parser->previous);
17989 } else {
17990 pm_case_match_node_end_keyword_loc_set(parser, (pm_case_match_node_t *) node, &parser->previous);
17991 }
17992
17993 pop_block_exits(parser, previous_block_exits);
17994 return node;
17995}
17996
18001static pm_node_t *
18002parse_class(pm_parser_t *parser, uint8_t flags, uint16_t depth) {
18003 size_t opening_newline_index = token_newline_index(parser);
18004 parser_lex(parser);
18005
18006 pm_token_t class_keyword = parser->previous;
18007 pm_do_loop_stack_push(parser, false);
18008
18009 pm_node_list_t current_block_exits = { 0 };
18010 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18011
18012 if (accept1(parser, PM_TOKEN_LESS_LESS)) {
18013 pm_token_t operator = parser->previous;
18014 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
18015
18016 pm_parser_scope_push(parser, true);
18017 if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18018 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_str(parser->current.type));
18019 }
18020
18021 pm_node_t *statements = NULL;
18022 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18023 pm_accepts_block_stack_push(parser, true);
18024 statements = UP(parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1)));
18025 pm_accepts_block_stack_pop(parser);
18026 }
18027
18028 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
18029 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18030 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1)));
18031 } else {
18032 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
18033 }
18034
18035 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
18036
18037 pm_constant_id_list_t locals;
18038 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18039
18040 pm_parser_scope_pop(parser);
18041 pm_do_loop_stack_pop(parser);
18042
18043 flush_block_exits(parser, previous_block_exits);
18044 return UP(pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous));
18045 }
18046
18047 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
18048 pm_token_t name = parser->previous;
18049 if (name.type != PM_TOKEN_CONSTANT) {
18050 pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
18051 }
18052
18053 pm_token_t inheritance_operator = { 0 };
18054 pm_node_t *superclass;
18055
18056 if (match1(parser, PM_TOKEN_LESS)) {
18057 inheritance_operator = parser->current;
18058 lex_state_set(parser, PM_LEX_STATE_BEG);
18059
18060 parser->command_start = true;
18061 parser_lex(parser);
18062
18063 superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
18064 } else {
18065 superclass = NULL;
18066 }
18067
18068 pm_parser_scope_push(parser, true);
18069
18070 if (inheritance_operator.start != NULL) {
18071 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
18072 } else {
18073 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18074 }
18075 pm_node_t *statements = NULL;
18076
18077 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18078 pm_accepts_block_stack_push(parser, true);
18079 statements = UP(parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1)));
18080 pm_accepts_block_stack_pop(parser);
18081 }
18082
18083 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
18084 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18085 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1)));
18086 } else {
18087 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
18088 }
18089
18090 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
18091
18092 if (context_def_p(parser)) {
18093 pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
18094 }
18095
18096 pm_constant_id_list_t locals;
18097 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18098
18099 pm_parser_scope_pop(parser);
18100 pm_do_loop_stack_pop(parser);
18101
18102 if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
18103 pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
18104 if (!PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) {
18105 constant_path = UP(pm_error_recovery_node_create_unexpected(parser, constant_path));
18106 }
18107 }
18108
18109 pop_block_exits(parser, previous_block_exits);
18110 return UP(pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, NTOK2PTR(inheritance_operator), superclass, statements, &parser->previous));
18111}
18112
18116static pm_node_t *
18117parse_def(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, uint16_t depth) {
18118 pm_node_list_t current_block_exits = { 0 };
18119 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18120
18121 pm_token_t def_keyword = parser->current;
18122 size_t opening_newline_index = token_newline_index(parser);
18123
18124 pm_node_t *receiver = NULL;
18125 pm_token_t operator = { 0 };
18126 pm_token_t name;
18127
18128 /* This context is necessary for lexing `...` in a bare params correctly. It
18129 * must be pushed before lexing the first param, so it is here. */
18130 context_push(parser, PM_CONTEXT_DEF_PARAMS);
18131 parser_lex(parser);
18132
18133 /* This will be false if the method name is not a valid identifier but could
18134 * be followed by an operator. */
18135 bool valid_name = true;
18136
18137 switch (parser->current.type) {
18138 case PM_CASE_OPERATOR:
18139 pm_parser_scope_push(parser, true);
18140 lex_state_set(parser, PM_LEX_STATE_ENDFN);
18141 parser_lex(parser);
18142
18143 name = parser->previous;
18144 break;
18145 case PM_TOKEN_IDENTIFIER: {
18146 parser_lex(parser);
18147
18148 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
18149 receiver = parse_variable_call(parser);
18150
18151 pm_parser_scope_push(parser, true);
18152 lex_state_set(parser, PM_LEX_STATE_FNAME);
18153 parser_lex(parser);
18154
18155 operator = parser->previous;
18156 name = parse_method_definition_name(parser);
18157 } else {
18158 pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous));
18159 pm_parser_scope_push(parser, true);
18160
18161 name = parser->previous;
18162 }
18163
18164 break;
18165 }
18166 case PM_TOKEN_INSTANCE_VARIABLE:
18167 case PM_TOKEN_CLASS_VARIABLE:
18168 case PM_TOKEN_GLOBAL_VARIABLE:
18169 valid_name = false;
18171 case PM_TOKEN_CONSTANT:
18172 case PM_TOKEN_KEYWORD_NIL:
18173 case PM_TOKEN_KEYWORD_SELF:
18174 case PM_TOKEN_KEYWORD_TRUE:
18175 case PM_TOKEN_KEYWORD_FALSE:
18176 case PM_TOKEN_KEYWORD___FILE__:
18177 case PM_TOKEN_KEYWORD___LINE__:
18178 case PM_TOKEN_KEYWORD___ENCODING__: {
18179 pm_parser_scope_push(parser, true);
18180 parser_lex(parser);
18181
18182 pm_token_t identifier = parser->previous;
18183
18184 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
18185 lex_state_set(parser, PM_LEX_STATE_FNAME);
18186 parser_lex(parser);
18187 operator = parser->previous;
18188
18189 switch (identifier.type) {
18190 case PM_TOKEN_CONSTANT:
18191 receiver = UP(pm_constant_read_node_create(parser, &identifier));
18192 break;
18193 case PM_TOKEN_INSTANCE_VARIABLE:
18194 receiver = UP(pm_instance_variable_read_node_create(parser, &identifier));
18195 break;
18196 case PM_TOKEN_CLASS_VARIABLE:
18197 receiver = UP(pm_class_variable_read_node_create(parser, &identifier));
18198 break;
18199 case PM_TOKEN_GLOBAL_VARIABLE:
18200 receiver = UP(pm_global_variable_read_node_create(parser, &identifier));
18201 break;
18202 case PM_TOKEN_KEYWORD_NIL:
18203 receiver = UP(pm_nil_node_create(parser, &identifier));
18204 break;
18205 case PM_TOKEN_KEYWORD_SELF:
18206 receiver = UP(pm_self_node_create(parser, &identifier));
18207 break;
18208 case PM_TOKEN_KEYWORD_TRUE:
18209 receiver = UP(pm_true_node_create(parser, &identifier));
18210 break;
18211 case PM_TOKEN_KEYWORD_FALSE:
18212 receiver = UP(pm_false_node_create(parser, &identifier));
18213 break;
18214 case PM_TOKEN_KEYWORD___FILE__:
18215 receiver = UP(pm_source_file_node_create(parser, &identifier));
18216 break;
18217 case PM_TOKEN_KEYWORD___LINE__:
18218 receiver = UP(pm_source_line_node_create(parser, &identifier));
18219 break;
18220 case PM_TOKEN_KEYWORD___ENCODING__:
18221 receiver = UP(pm_source_encoding_node_create(parser, &identifier));
18222 break;
18223 default:
18224 break;
18225 }
18226
18227 name = parse_method_definition_name(parser);
18228 } else {
18229 if (!valid_name) {
18230 PM_PARSER_ERR_TOKEN_FORMAT(parser, &identifier, PM_ERR_DEF_NAME, pm_token_str(identifier.type));
18231 }
18232
18233 name = identifier;
18234 }
18235 break;
18236 }
18237 case PM_TOKEN_PARENTHESIS_LEFT: {
18238 /* The current context is `PM_CONTEXT_DEF_PARAMS`, however the inner
18239 * expression of this parenthesis should not be processed under this
18240 * context. Thus, the context is popped here. */
18241 context_pop(parser);
18242 parser_lex(parser);
18243
18244 pm_token_t lparen = parser->previous;
18245 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
18246
18247 accept1(parser, PM_TOKEN_NEWLINE);
18248 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18249 pm_token_t rparen = parser->previous;
18250
18251 lex_state_set(parser, PM_LEX_STATE_FNAME);
18252 expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
18253
18254 operator = parser->previous;
18255 receiver = UP(pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0));
18256
18257 /* To push `PM_CONTEXT_DEF_PARAMS` again is for the same reason as
18258 * described the above. */
18259 pm_parser_scope_push(parser, true);
18260 context_push(parser, PM_CONTEXT_DEF_PARAMS);
18261 name = parse_method_definition_name(parser);
18262 break;
18263 }
18264 default:
18265 pm_parser_scope_push(parser, true);
18266 name = parse_method_definition_name(parser);
18267 break;
18268 }
18269
18270 pm_token_t lparen = { 0 };
18271 pm_token_t rparen = { 0 };
18272 pm_parameters_node_t *params;
18273
18274 bool accept_endless_def = true;
18275 switch (parser->current.type) {
18276 case PM_TOKEN_PARENTHESIS_LEFT: {
18277 parser_lex(parser);
18278 lparen = parser->previous;
18279
18280 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18281 params = NULL;
18282 } else {
18283 /* https://bugs.ruby-lang.org/issues/19107 */
18284 bool allow_trailing_comma = parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1;
18285 params = parse_parameters(
18286 parser,
18287 PM_BINDING_POWER_DEFINED,
18288 true,
18289 allow_trailing_comma,
18290 true,
18291 true,
18292 false,
18293 PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
18294 (uint16_t) (depth + 1)
18295 );
18296 }
18297
18298 lex_state_set(parser, PM_LEX_STATE_BEG);
18299 parser->command_start = true;
18300
18301 context_pop(parser);
18302 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18303 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_str(parser->current.type));
18304 parser->previous.start = parser->previous.end;
18305 parser->previous.type = 0;
18306 }
18307
18308 rparen = parser->previous;
18309 break;
18310 }
18311 case PM_CASE_PARAMETER: {
18312 /* If we're about to lex a label, we need to add the label state to
18313 * make sure the next newline is ignored. */
18314 if (parser->current.type == PM_TOKEN_LABEL) {
18315 lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
18316 }
18317
18318 params = parse_parameters(
18319 parser,
18320 PM_BINDING_POWER_DEFINED,
18321 false,
18322 false,
18323 true,
18324 true,
18325 false,
18326 PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
18327 (uint16_t) (depth + 1)
18328 );
18329
18330 /* Reject `def * = 1` and similar. We have to specifically check for
18331 * them because they create ambiguity with optional arguments. */
18332 accept_endless_def = false;
18333
18334 context_pop(parser);
18335 break;
18336 }
18337 default: {
18338 params = NULL;
18339 context_pop(parser);
18340 break;
18341 }
18342 }
18343
18344 pm_node_t *statements = NULL;
18345 pm_token_t equal = { 0 };
18346 pm_token_t end_keyword = { 0 };
18347
18348 if (accept1(parser, PM_TOKEN_EQUAL)) {
18349 if (token_is_setter_name(&name)) {
18350 pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
18351 }
18352 if (!accept_endless_def) {
18353 pm_parser_err_previous(parser, PM_ERR_DEF_ENDLESS_PARAMETERS);
18354 }
18355 if (
18356 parser->current_context->context == PM_CONTEXT_DEFAULT_PARAMS &&
18357 parser->current_context->prev->context == PM_CONTEXT_BLOCK_PARAMETERS
18358 ) {
18359 PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &def_keyword), PM_TOKENS_LENGTH(&def_keyword, &parser->previous), PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE, "endless method definition");
18360 }
18361 equal = parser->previous;
18362
18363 context_push(parser, PM_CONTEXT_DEF);
18364 pm_do_loop_stack_push(parser, false);
18365 statements = UP(pm_statements_node_create(parser));
18366
18367 uint8_t allow_flags;
18368 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
18369 allow_flags = flags & PM_PARSE_ACCEPTS_COMMAND_CALL;
18370 } else {
18371 /* Allow `def foo = puts "Hello"` but not
18372 * `private def foo = puts "Hello"` */
18373 allow_flags = (binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION) ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0;
18374 }
18375
18376 /* Inside a def body, we push true onto the accepts_block_stack so that
18377 * `do` is lexed as PM_TOKEN_KEYWORD_DO (which can only start a block
18378 * for primary-level constructs, not commands). During command argument
18379 * parsing, the stack is pushed to false, causing `do` to be lexed as
18380 * PM_TOKEN_KEYWORD_DO_BLOCK, which is not consumed inside the endless
18381 * def body and instead left for the outer context. */
18382 pm_accepts_block_stack_push(parser, true);
18383 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_flags | PM_PARSE_IN_ENDLESS_DEF, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
18384 pm_accepts_block_stack_pop(parser);
18385
18386 /* If an unconsumed PM_TOKEN_KEYWORD_DO follows the body, it is an error
18387 * (e.g., `def f = 1 do end`). PM_TOKEN_KEYWORD_DO_BLOCK is
18388 * intentionally not caught here — it should bubble up to the outer
18389 * context (e.g., `private def f = puts "Hello" do end` where the block
18390 * attaches to `private`). */
18391 if (accept1(parser, PM_TOKEN_KEYWORD_DO)) {
18392 pm_block_node_t *block = parse_block(parser, (uint16_t) (depth + 1));
18393 pm_parser_err_node(parser, UP(block), PM_ERR_DEF_ENDLESS_DO_BLOCK);
18394 }
18395
18396 if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
18397 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
18398
18399 pm_token_t rescue_keyword = parser->previous;
18400
18401 /* In the Ruby grammar, the rescue value of an endless method
18402 * command excludes and/or and in/=>. */
18403 pm_node_t *value = parse_expression(parser, PM_BINDING_POWER_MATCH + 1, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
18404 context_pop(parser);
18405
18406 statement = UP(pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value));
18407 }
18408
18409 /* A nested endless def whose body is a command call (e.g.,
18410 * `def f = def g = foo bar`) is a command assignment and cannot appear
18411 * as a def body. */
18412 if (PM_NODE_TYPE_P(statement, PM_DEF_NODE) && pm_command_call_value_p(statement)) {
18413 PM_PARSER_ERR_NODE_FORMAT(parser, statement, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
18414 }
18415
18416 pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
18417 pm_do_loop_stack_pop(parser);
18418 context_pop(parser);
18419 } else {
18420 if (lparen.start == NULL) {
18421 lex_state_set(parser, PM_LEX_STATE_BEG);
18422 parser->command_start = true;
18423 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
18424 } else {
18425 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18426 }
18427
18428 pm_accepts_block_stack_push(parser, true);
18429 pm_do_loop_stack_push(parser, false);
18430
18431 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18432 pm_accepts_block_stack_push(parser, true);
18433 statements = UP(parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1)));
18434 pm_accepts_block_stack_pop(parser);
18435 }
18436
18437 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
18438 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18439 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1)));
18440 } else {
18441 parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
18442 }
18443
18444 pm_accepts_block_stack_pop(parser);
18445 pm_do_loop_stack_pop(parser);
18446
18447 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM, &def_keyword);
18448 end_keyword = parser->previous;
18449 }
18450
18451 pm_constant_id_list_t locals;
18452 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18453 pm_parser_scope_pop(parser);
18454
18455 /* If the final character is `@` as is the case when defining methods to
18456 * override the unary operators, we should ignore the @ in the same way we
18457 * do for symbols. */
18458 pm_constant_id_t name_id = pm_parser_constant_id_raw(parser, name.start, parse_operator_symbol_name(&name));
18459
18460 flush_block_exits(parser, previous_block_exits);
18461
18462 return UP(pm_def_node_create(
18463 parser,
18464 name_id,
18465 &name,
18466 receiver,
18467 params,
18468 statements,
18469 &locals,
18470 &def_keyword,
18471 NTOK2PTR(operator),
18472 NTOK2PTR(lparen),
18473 NTOK2PTR(rparen),
18474 NTOK2PTR(equal),
18475 NTOK2PTR(end_keyword)
18476 ));
18477}
18478
18482static pm_node_t *
18483parse_module(pm_parser_t *parser, uint8_t flags, uint16_t depth) {
18484 pm_node_list_t current_block_exits = { 0 };
18485 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18486
18487 size_t opening_newline_index = token_newline_index(parser);
18488 parser_lex(parser);
18489 pm_token_t module_keyword = parser->previous;
18490
18491 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
18492 pm_token_t name;
18493
18494 /* If we can recover from a syntax error that occurred while parsing the
18495 * name of the module, then we'll handle that here. */
18496 if (PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) {
18497 pop_block_exits(parser, previous_block_exits);
18498
18499 pm_token_t missing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
18500 return UP(pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing));
18501 }
18502
18503 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
18504 pm_token_t double_colon = parser->previous;
18505
18506 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
18507 constant_path = UP(pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous));
18508 }
18509
18510 /* Here we retrieve the name of the module. If it wasn't a constant, then
18511 * it's possible that `module foo` was passed, which is a syntax error. We
18512 * handle that here as well. */
18513 name = parser->previous;
18514 if (name.type != PM_TOKEN_CONSTANT) {
18515 pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
18516 }
18517
18518 if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE) && !PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) {
18519 constant_path = UP(pm_error_recovery_node_create_unexpected(parser, constant_path));
18520 }
18521
18522 pm_parser_scope_push(parser, true);
18523 accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
18524 pm_node_t *statements = NULL;
18525
18526 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18527 pm_accepts_block_stack_push(parser, true);
18528 statements = UP(parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1)));
18529 pm_accepts_block_stack_pop(parser);
18530 }
18531
18532 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
18533 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18534 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1)));
18535 } else {
18536 parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
18537 }
18538
18539 pm_constant_id_list_t locals;
18540 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18541
18542 pm_parser_scope_pop(parser);
18543 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM, &module_keyword);
18544
18545 if (context_def_p(parser)) {
18546 pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
18547 }
18548
18549 pop_block_exits(parser, previous_block_exits);
18550
18551 return UP(pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous));
18552}
18553
18557static pm_node_t *
18558parse_string_array(pm_parser_t *parser, uint16_t depth) {
18559 parser_lex(parser);
18560 pm_token_t opening = parser->previous;
18561 pm_array_node_t *array = pm_array_node_create(parser, &opening);
18562
18563 /* This is the current node that we are parsing that will be added to the
18564 * list of elements. */
18565 pm_node_t *current = NULL;
18566
18567 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
18568 switch (parser->current.type) {
18569 case PM_TOKEN_WORDS_SEP: {
18570 /* Reset the explicit encoding if we hit a separator since each
18571 * element can have its own encoding. */
18572 parser->explicit_encoding = NULL;
18573
18574 if (current == NULL) {
18575 /* If we hit a separator before we have any content, then we
18576 * don't need to do anything. */
18577 } else {
18578 /* If we hit a separator after we've hit content, then we
18579 * need to append that content to the list and reset the
18580 * current node. */
18581 pm_array_node_elements_append(parser->arena, array, current);
18582 current = NULL;
18583 }
18584
18585 parser_lex(parser);
18586 break;
18587 }
18588 case PM_TOKEN_STRING_CONTENT: {
18589 pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
18590 pm_node_flag_set(string, parse_unescaped_encoding(parser));
18591 parser_lex(parser);
18592
18593 if (current == NULL) {
18594 /* If we hit content and the current node is NULL, then this
18595 * is the first string content we've seen. In that case
18596 * we're going to create a new string node and set that to
18597 * the current. */
18598 current = string;
18599 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
18600 /* If we hit string content and the current node is an
18601 * interpolated string, then we need to append the string
18602 * content to the list of child nodes. */
18603 pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, string);
18604 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
18605 /* If we hit string content and the current node is a string
18606 * node, then we need to convert the current node into an
18607 * interpolated string and add the string content to the
18608 * list of child nodes. */
18609 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
18610 pm_interpolated_string_node_append(parser, interpolated, current);
18611 pm_interpolated_string_node_append(parser, interpolated, string);
18612 current = UP(interpolated);
18613 } else {
18614 assert(false && "unreachable");
18615 }
18616
18617 break;
18618 }
18619 case PM_TOKEN_EMBVAR: {
18620 if (current == NULL) {
18621 /* If we hit an embedded variable and the current node is
18622 * NULL, then this is the start of a new string. We'll set
18623 * the current node to a new interpolated string. */
18624 current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL));
18625 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
18626 /* If we hit an embedded variable and the current node is a
18627 * string node, then we'll convert the current into an
18628 * interpolated string and add the string node to the list
18629 * of parts. */
18630 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
18631 pm_interpolated_string_node_append(parser, interpolated, current);
18632 current = UP(interpolated);
18633 } else {
18634 /* If we hit an embedded variable and the current node is an
18635 * interpolated string, then we'll just add the embedded
18636 * variable. */
18637 }
18638
18639 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
18640 pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part);
18641 break;
18642 }
18643 case PM_TOKEN_EMBEXPR_BEGIN: {
18644 if (current == NULL) {
18645 /* If we hit an embedded expression and the current node is
18646 * NULL, then this is the start of a new string. We'll set
18647 * the current node to a new interpolated string. */
18648 current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL));
18649 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
18650 /* If we hit an embedded expression and the current node is
18651 * a string node, then we'll convert the current into an
18652 * interpolated string and add the string node to the list
18653 * of parts. */
18654 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
18655 pm_interpolated_string_node_append(parser, interpolated, current);
18656 current = UP(interpolated);
18657 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
18658 /* If we hit an embedded expression and the current node is
18659 * an interpolated string, then we'll just continue on. */
18660 } else {
18661 assert(false && "unreachable");
18662 }
18663
18664 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
18665 pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part);
18666 break;
18667 }
18668 default:
18669 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
18670 parser_lex(parser);
18671 break;
18672 }
18673 }
18674
18675 /* If we have a current node, then we need to append it to the list. */
18676 if (current) {
18677 pm_array_node_elements_append(parser->arena, array, current);
18678 }
18679
18680 pm_token_t closing = parser->current;
18681 if (match1(parser, PM_TOKEN_EOF)) {
18682 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
18683 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
18684 } else {
18685 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
18686 }
18687
18688 pm_array_node_close_set(parser, array, &closing);
18689 return UP(array);
18690}
18691
18695static pm_node_t *
18696parse_symbol_array(pm_parser_t *parser, uint16_t depth) {
18697 parser_lex(parser);
18698 pm_token_t opening = parser->previous;
18699 pm_array_node_t *array = pm_array_node_create(parser, &opening);
18700
18701 /* This is the current node that we are parsing that will be added to the
18702 * list of elements. */
18703 pm_node_t *current = NULL;
18704
18705 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
18706 switch (parser->current.type) {
18707 case PM_TOKEN_WORDS_SEP: {
18708 if (current == NULL) {
18709 /* If we hit a separator before we have any content, then we
18710 * don't need to do anything. */
18711 } else {
18712 /* If we hit a separator after we've hit content, then we
18713 * need to append that content to the list and reset the
18714 * current node. */
18715 pm_array_node_elements_append(parser->arena, array, current);
18716 current = NULL;
18717 }
18718
18719 parser_lex(parser);
18720 break;
18721 }
18722 case PM_TOKEN_STRING_CONTENT: {
18723 if (current == NULL) {
18724 /* If we hit content and the current node is NULL, then this
18725 * is the first string content we've seen. In that case
18726 * we're going to create a new string node and set that to
18727 * the current. */
18728 current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL));
18729 parser_lex(parser);
18730 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
18731 /* If we hit string content and the current node is an
18732 * interpolated string, then we need to append the string
18733 * content to the list of child nodes. */
18734 pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
18735 parser_lex(parser);
18736
18737 pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, string);
18738 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
18739 /* If we hit string content and the current node is a symbol
18740 * node, then we need to convert the current node into an
18741 * interpolated string and add the string content to the
18742 * list of child nodes. */
18743 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
18744 pm_token_t content = {
18745 .type = PM_TOKEN_STRING_CONTENT,
18746 .start = parser->start + cast->value_loc.start,
18747 .end = parser->start + cast->value_loc.start + cast->value_loc.length
18748 };
18749
18750 pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped));
18751 pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL));
18752 parser_lex(parser);
18753
18754 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
18755 pm_interpolated_symbol_node_append(parser->arena, interpolated, first_string);
18756 pm_interpolated_symbol_node_append(parser->arena, interpolated, second_string);
18757
18758 current = UP(interpolated);
18759 } else {
18760 assert(false && "unreachable");
18761 }
18762
18763 break;
18764 }
18765 case PM_TOKEN_EMBVAR: {
18766 bool start_location_set = false;
18767 if (current == NULL) {
18768 /* If we hit an embedded variable and the current node is
18769 * NULL, then this is the start of a new string. We'll set
18770 * the current node to a new interpolated string. */
18771 current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL));
18772 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
18773 /* If we hit an embedded variable and the current node is a
18774 * string node, then we'll convert the current into an
18775 * interpolated string and add the string node to the list
18776 * of parts. */
18777 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
18778
18779 current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
18780 pm_interpolated_symbol_node_append(parser->arena, interpolated, current);
18781 PM_NODE_START_SET_NODE(interpolated, current);
18782 start_location_set = true;
18783 current = UP(interpolated);
18784 } else {
18785 /* If we hit an embedded variable and the current node is an
18786 * interpolated string, then we'll just add the embedded
18787 * variable. */
18788 }
18789
18790 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
18791 pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, part);
18792 if (!start_location_set) {
18793 PM_NODE_START_SET_NODE(current, part);
18794 }
18795 break;
18796 }
18797 case PM_TOKEN_EMBEXPR_BEGIN: {
18798 bool start_location_set = false;
18799 if (current == NULL) {
18800 /* If we hit an embedded expression and the current node is
18801 * NULL, then this is the start of a new string. We'll set
18802 * the current node to a new interpolated string. */
18803 current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL));
18804 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
18805 /* If we hit an embedded expression and the current node is
18806 * a string node, then we'll convert the current into an
18807 * interpolated string and add the string node to the list
18808 * of parts. */
18809 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
18810
18811 current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
18812 pm_interpolated_symbol_node_append(parser->arena, interpolated, current);
18813 PM_NODE_START_SET_NODE(interpolated, current);
18814 start_location_set = true;
18815 current = UP(interpolated);
18816 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
18817 /* If we hit an embedded expression and the current node is
18818 * an interpolated string, then we'll just continue on. */
18819 } else {
18820 assert(false && "unreachable");
18821 }
18822
18823 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
18824 pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, part);
18825 if (!start_location_set) {
18826 PM_NODE_START_SET_NODE(current, part);
18827 }
18828 break;
18829 }
18830 default:
18831 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
18832 parser_lex(parser);
18833 break;
18834 }
18835 }
18836
18837 /* If we have a current node, then we need to append it to the list. */
18838 if (current) {
18839 pm_array_node_elements_append(parser->arena, array, current);
18840 }
18841
18842 pm_token_t closing = parser->current;
18843 if (match1(parser, PM_TOKEN_EOF)) {
18844 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
18845 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
18846 } else {
18847 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
18848 }
18849 pm_array_node_close_set(parser, array, &closing);
18850
18851 return UP(array);
18852}
18853
18858static pm_node_t *
18859parse_parentheses(pm_parser_t *parser, pm_binding_power_t binding_power, uint16_t depth) {
18860 pm_token_t opening = parser->current;
18861 pm_node_flags_t paren_flags = 0;
18862
18863 pm_node_list_t current_block_exits = { 0 };
18864 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18865
18866 parser_lex(parser);
18867 while (true) {
18868 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18869 paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18870 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
18871 break;
18872 }
18873 }
18874
18875 /* If this is the end of the file or we match a right parenthesis, then we
18876 * have an empty parentheses node, and we can immediately return. */
18877 if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
18878 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18879 pop_block_exits(parser, previous_block_exits);
18880 return UP(pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, paren_flags));
18881 }
18882
18883 /* Otherwise, we're going to parse the first statement in the list of
18884 * statements within the parentheses. */
18885 pm_accepts_block_stack_push(parser, true);
18886 context_push(parser, PM_CONTEXT_PARENS);
18887 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18888 context_pop(parser);
18889
18890 /* Determine if this statement is followed by a terminator. In the case of a
18891 * single statement, this is fine. But in the case of multiple statements
18892 * it's required. */
18893 bool terminator_found = false;
18894
18895 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18896 terminator_found = true;
18897 paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18898 } else if (accept1(parser, PM_TOKEN_NEWLINE)) {
18899 terminator_found = true;
18900 }
18901
18902 if (terminator_found) {
18903 while (true) {
18904 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18905 paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18906 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
18907 break;
18908 }
18909 }
18910 }
18911
18912 /* If we hit a right parenthesis, then we're done parsing the parentheses
18913 * node, and we can check which kind of node we should return. */
18914 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18915 if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
18916 lex_state_set(parser, PM_LEX_STATE_ENDARG);
18917 }
18918
18919 parser_lex(parser);
18920 pm_accepts_block_stack_pop(parser);
18921 pop_block_exits(parser, previous_block_exits);
18922
18923 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18924 /* If we have a single statement and are ending on a right
18925 * parenthesis, then we need to check if this is possibly a multiple
18926 * target node. */
18927 pm_multi_target_node_t *multi_target;
18928
18929 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.length == 0) {
18930 multi_target = (pm_multi_target_node_t *) statement;
18931 } else {
18932 multi_target = pm_multi_target_node_create(parser);
18933 pm_multi_target_node_targets_append(parser, multi_target, statement);
18934 }
18935
18936 multi_target->lparen_loc = TOK2LOC(parser, &opening);
18937 multi_target->rparen_loc = TOK2LOC(parser, &parser->previous);
18938 PM_NODE_START_SET_TOKEN(parser, multi_target, &opening);
18939 PM_NODE_LENGTH_SET_TOKEN(parser, multi_target, &parser->previous);
18940
18941 pm_node_t *result;
18942 if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
18943 result = parse_targets(parser, UP(multi_target), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18944 accept1(parser, PM_TOKEN_NEWLINE);
18945 } else {
18946 result = UP(multi_target);
18947 }
18948
18949 if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
18950 /* All set, this is explicitly allowed by the parent context. */
18951 } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
18952 /* All set, we're inside a for loop and we're parsing multiple
18953 * targets. */
18954 } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
18955 /* Multi targets are not allowed when it's not a statement
18956 * level. */
18957 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18958 } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
18959 /* Multi targets must be followed by an equal sign in order to
18960 * be valid (or a right parenthesis if they are nested). */
18961 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18962 }
18963
18964 return result;
18965 }
18966
18967 /* If we have a single statement and are ending on a right parenthesis
18968 * and we didn't return a multiple assignment node, then we can return a
18969 * regular parentheses node now. */
18970 pm_statements_node_t *statements = pm_statements_node_create(parser);
18971 pm_statements_node_body_append(parser, statements, statement, true);
18972
18973 return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, paren_flags));
18974 }
18975
18976 /* If we have more than one statement in the set of parentheses, then we are
18977 * going to parse all of them as a list of statements. We'll do that here.
18978 */
18979 context_push(parser, PM_CONTEXT_PARENS);
18980 paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18981
18982 pm_statements_node_t *statements = pm_statements_node_create(parser);
18983 pm_statements_node_body_append(parser, statements, statement, true);
18984
18985 /* If we didn't find a terminator and we didn't find a right parenthesis,
18986 * then this is a syntax error. */
18987 if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
18988 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
18989 }
18990
18991 /* Parse each statement within the parentheses. */
18992 while (true) {
18993 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18994 pm_statements_node_body_append(parser, statements, node, true);
18995
18996 /* If we're recovering from a syntax error, then we need to stop parsing
18997 * the statements now. */
18998 if (parser->recovering) {
18999 /* If this is the level of context where the recovery has happened,
19000 * then we can mark the parser as done recovering. */
19001 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
19002 break;
19003 }
19004
19005 /* If we couldn't parse an expression at all, then we need to bail out
19006 * of the loop. */
19007 if (PM_NODE_TYPE_P(node, PM_ERROR_RECOVERY_NODE)) break;
19008
19009 /* If we successfully parsed a statement, then we are going to need a
19010 * terminator to delimit them. */
19011 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
19012 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
19013 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
19014 } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19015 break;
19016 } else if (!match1(parser, PM_TOKEN_EOF)) {
19017 /* If we're at the end of the file, then we're going to add an error
19018 * after this for the ) anyway. */
19019 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
19020 }
19021 }
19022
19023 context_pop(parser);
19024 pm_accepts_block_stack_pop(parser);
19025 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19026
19027 /* When we're parsing multi targets, we allow them to be followed by a right
19028 * parenthesis if they are at the statement level. This is only possible if
19029 * they are the final statement in a parentheses. We need to explicitly
19030 * reject that here. */
19031 {
19032 pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
19033
19034 if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
19035 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
19036 pm_multi_target_node_targets_append(parser, multi_target, statement);
19037
19038 statement = UP(multi_target);
19039 statements->body.nodes[statements->body.size - 1] = statement;
19040 }
19041
19042 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
19043 const uint8_t *offset = parser->start + PM_NODE_END(statement);
19044 pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
19045 pm_node_t *value = UP(pm_error_recovery_node_create(parser, PM_NODE_END(statement), 0));
19046
19047 statement = UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value));
19048 statements->body.nodes[statements->body.size - 1] = statement;
19049
19050 pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
19051 }
19052 }
19053
19054 pop_block_exits(parser, previous_block_exits);
19055 pm_void_statements_check(parser, statements, true);
19056 return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, paren_flags));
19057}
19058
19062static PRISM_INLINE pm_node_t *
19063parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
19064 switch (parser->current.type) {
19065 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
19066 parser_lex(parser);
19067
19068 pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
19069 pm_accepts_block_stack_push(parser, true);
19070 bool parsed_bare_hash = false;
19071
19072 while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
19073 bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
19074
19075 // Handle the case where we don't have a comma and we have a
19076 // newline followed by a right bracket.
19077 if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
19078 break;
19079 }
19080
19081 // Ensure that we have a comma between elements in the array.
19082 if (array->elements.size > 0) {
19083 if (accept1(parser, PM_TOKEN_COMMA)) {
19084 // If there was a comma but we also accepts a newline,
19085 // then this is a syntax error.
19086 if (accepted_newline) {
19087 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
19088 }
19089 } else {
19090 // If there was no comma, then we need to add a syntax
19091 // error.
19092 PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_END(parser, &parser->previous), 0, PM_ERR_ARRAY_SEPARATOR, pm_token_str(parser->current.type));
19093 parser->previous.start = parser->previous.end;
19094 parser->previous.type = 0;
19095 }
19096 }
19097
19098 // If we have a right bracket immediately following a comma,
19099 // this is allowed since it's a trailing comma. In this case we
19100 // can break out of the loop.
19101 if (match1(parser, PM_TOKEN_BRACKET_RIGHT)) break;
19102
19103 pm_node_t *element;
19104
19105 if (accept1(parser, PM_TOKEN_USTAR)) {
19106 pm_token_t operator = parser->previous;
19107 pm_node_t *expression = NULL;
19108
19109 if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
19110 pm_parser_scope_forwarding_positionals_check(parser, &operator);
19111 } else {
19112 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19113 }
19114
19115 element = UP(pm_splat_node_create(parser, &operator, expression));
19116 } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
19117 if (parsed_bare_hash) {
19118 pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
19119 }
19120
19121 element = UP(pm_keyword_hash_node_create(parser));
19122 pm_static_literals_t hash_keys = { 0 };
19123
19124 if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_DO_BLOCK, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
19125 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
19126 }
19127
19128 pm_static_literals_free(&hash_keys);
19129 parsed_bare_hash = true;
19130 } else {
19131 element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_LABEL), PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
19132
19133 if (pm_symbol_node_label_p(parser, element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
19134 if (parsed_bare_hash) {
19135 pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
19136 }
19137
19138 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
19139 pm_static_literals_t hash_keys = { 0 };
19140 pm_hash_key_static_literals_add(parser, &hash_keys, element);
19141
19142 pm_token_t operator = { 0 };
19143 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
19144 operator = parser->previous;
19145 }
19146
19147 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
19148 pm_node_t *assoc = UP(pm_assoc_node_create(parser, element, NTOK2PTR(operator), value));
19149 pm_keyword_hash_node_elements_append(parser->arena, hash, assoc);
19150
19151 element = UP(hash);
19152 if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
19153 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
19154 }
19155
19156 pm_static_literals_free(&hash_keys);
19157 parsed_bare_hash = true;
19158 }
19159 }
19160
19161 pm_array_node_elements_append(parser->arena, array, element);
19162 if (PM_NODE_TYPE_P(element, PM_ERROR_RECOVERY_NODE)) break;
19163 }
19164
19165 accept1(parser, PM_TOKEN_NEWLINE);
19166
19167 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
19168 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_str(parser->current.type));
19169 parser->previous.start = parser->previous.end;
19170 parser->previous.type = 0;
19171 }
19172
19173 pm_array_node_close_set(parser, array, &parser->previous);
19174 pm_accepts_block_stack_pop(parser);
19175
19176 return UP(array);
19177 }
19178 case PM_TOKEN_PARENTHESIS_LEFT:
19179 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES:
19180 return parse_parentheses(parser, binding_power, depth);
19181 case PM_TOKEN_BRACE_LEFT: {
19182 // If we were passed a current_hash_keys via the parser, then that
19183 // means we're already parsing a hash and we want to share the set
19184 // of hash keys with this inner hash we're about to parse for the
19185 // sake of warnings. We'll set it to NULL after we grab it to make
19186 // sure subsequent expressions don't use it. Effectively this is a
19187 // way of getting around passing it to every call to
19188 // parse_expression.
19189 pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
19190 parser->current_hash_keys = NULL;
19191
19192 pm_accepts_block_stack_push(parser, true);
19193 parser_lex(parser);
19194
19195 pm_token_t opening = parser->previous;
19196 pm_hash_node_t *node = pm_hash_node_create(parser, &opening);
19197
19198 if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
19199 if (current_hash_keys != NULL) {
19200 parse_assocs(parser, current_hash_keys, UP(node), (uint16_t) (depth + 1));
19201 } else {
19202 pm_static_literals_t hash_keys = { 0 };
19203 parse_assocs(parser, &hash_keys, UP(node), (uint16_t) (depth + 1));
19204 pm_static_literals_free(&hash_keys);
19205 }
19206
19207 accept1(parser, PM_TOKEN_NEWLINE);
19208 }
19209
19210 pm_accepts_block_stack_pop(parser);
19211 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM, &opening);
19212 pm_hash_node_closing_loc_set(parser, node, &parser->previous);
19213
19214 return UP(node);
19215 }
19216 case PM_TOKEN_CHARACTER_LITERAL: {
19217 pm_node_t *node = UP(pm_string_node_create_current_string(
19218 parser,
19219 &(pm_token_t) {
19220 .type = PM_TOKEN_STRING_BEGIN,
19221 .start = parser->current.start,
19222 .end = parser->current.start + 1
19223 },
19224 &(pm_token_t) {
19225 .type = PM_TOKEN_STRING_CONTENT,
19226 .start = parser->current.start + 1,
19227 .end = parser->current.end
19228 },
19229 NULL
19230 ));
19231
19232 pm_node_flag_set(node, parse_unescaped_encoding(parser));
19233
19234 // Skip past the character literal here, since now we have handled
19235 // parser->explicit_encoding correctly.
19236 parser_lex(parser);
19237
19238 // Characters can be followed by strings in which case they are
19239 // automatically concatenated.
19240 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
19241 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
19242 }
19243
19244 return node;
19245 }
19246 case PM_TOKEN_CLASS_VARIABLE: {
19247 parser_lex(parser);
19248 pm_node_t *node = UP(pm_class_variable_read_node_create(parser, &parser->previous));
19249
19250 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
19251 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19252 }
19253
19254 return node;
19255 }
19256 case PM_TOKEN_CONSTANT: {
19257 parser_lex(parser);
19258 pm_token_t constant = parser->previous;
19259
19260 // If a constant is immediately followed by parentheses, then this is in
19261 // fact a method call, not a constant read.
19262 if (
19263 match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
19264 ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
19265 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
19266 match1(parser, PM_TOKEN_BRACE_LEFT)
19267 ) {
19268 pm_arguments_t arguments = { 0 };
19269 parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
19270 return UP(pm_call_node_fcall_create(parser, &constant, &arguments));
19271 }
19272
19273 pm_node_t *node = UP(pm_constant_read_node_create(parser, &parser->previous));
19274
19275 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
19276 // If we get here, then we have a comma immediately following a
19277 // constant, so we're going to parse this as a multiple assignment.
19278 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19279 }
19280
19281 return node;
19282 }
19283 case PM_TOKEN_UCOLON_COLON: {
19284 parser_lex(parser);
19285 pm_token_t delimiter = parser->previous;
19286
19287 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
19288 pm_node_t *node = UP(pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous));
19289
19290 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
19291 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19292 }
19293
19294 return node;
19295 }
19296 case PM_TOKEN_UDOT_DOT:
19297 case PM_TOKEN_UDOT_DOT_DOT: {
19298 pm_token_t operator = parser->current;
19299 parser_lex(parser);
19300
19301 pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
19302
19303 // Unary .. and ... are special because these are non-associative
19304 // operators that can also be unary operators. In this case we need
19305 // to explicitly reject code that has a .. or ... that follows this
19306 // expression.
19307 if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
19308 pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
19309 }
19310
19311 return UP(pm_range_node_create(parser, NULL, &operator, right));
19312 }
19313 case PM_TOKEN_FLOAT:
19314 parser_lex(parser);
19315 return UP(pm_float_node_create(parser, &parser->previous));
19316 case PM_TOKEN_FLOAT_IMAGINARY:
19317 parser_lex(parser);
19318 return UP(pm_float_node_imaginary_create(parser, &parser->previous));
19319 case PM_TOKEN_FLOAT_RATIONAL:
19320 parser_lex(parser);
19321 return UP(pm_float_node_rational_create(parser, &parser->previous));
19322 case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
19323 parser_lex(parser);
19324 return UP(pm_float_node_rational_imaginary_create(parser, &parser->previous));
19325 case PM_TOKEN_NUMBERED_REFERENCE: {
19326 parser_lex(parser);
19327 pm_node_t *node = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
19328
19329 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
19330 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19331 }
19332
19333 return node;
19334 }
19335 case PM_TOKEN_GLOBAL_VARIABLE: {
19336 parser_lex(parser);
19337 pm_node_t *node = UP(pm_global_variable_read_node_create(parser, &parser->previous));
19338
19339 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
19340 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19341 }
19342
19343 return node;
19344 }
19345 case PM_TOKEN_BACK_REFERENCE: {
19346 parser_lex(parser);
19347 pm_node_t *node = UP(pm_back_reference_read_node_create(parser, &parser->previous));
19348
19349 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
19350 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19351 }
19352
19353 return node;
19354 }
19355 case PM_TOKEN_IDENTIFIER:
19356 case PM_TOKEN_METHOD_NAME: {
19357 parser_lex(parser);
19358 pm_token_t identifier = parser->previous;
19359 pm_node_t *node = parse_variable_call(parser);
19360
19361 if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
19362 // If parse_variable_call returned with a call node, then we
19363 // know the identifier is not in the local table. In that case
19364 // we need to check if there are arguments following the
19365 // identifier.
19366 pm_call_node_t *call = (pm_call_node_t *) node;
19367 pm_arguments_t arguments = { 0 };
19368
19369 if (parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1))) {
19370 // Since we found arguments, we need to turn off the
19371 // variable call bit in the flags.
19372 pm_node_flag_unset(UP(call), PM_CALL_NODE_FLAGS_VARIABLE_CALL);
19373
19374 call->opening_loc = arguments.opening_loc;
19375 call->arguments = arguments.arguments;
19376 call->closing_loc = arguments.closing_loc;
19377 call->block = arguments.block;
19378
19379 const pm_location_t *end = pm_arguments_end(&arguments);
19380 if (end == NULL) {
19381 PM_NODE_LENGTH_SET_LOCATION(call, &call->message_loc);
19382 } else {
19383 PM_NODE_LENGTH_SET_LOCATION(call, end);
19384 }
19385 }
19386 } else {
19387 // Otherwise, we know the identifier is in the local table. This
19388 // can still be a method call if it is followed by arguments or
19389 // a block, so we need to check for that here.
19390 if (
19391 ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
19392 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
19393 match1(parser, PM_TOKEN_BRACE_LEFT)
19394 ) {
19395 pm_arguments_t arguments = { 0 };
19396 parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
19397 pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
19398
19399 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
19400 // If we're about to convert an 'it' implicit local
19401 // variable read into a method call, we need to remove
19402 // it from the list of implicit local variables.
19403 pm_node_unreference(parser, node);
19404 } else {
19405 // Otherwise, we're about to convert a regular local
19406 // variable read into a method call, in which case we
19407 // need to indicate that this was not a read for the
19408 // purposes of warnings.
19409 assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
19410
19411 if (pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &identifier), PM_TOKEN_LENGTH(&identifier))) {
19412 pm_node_unreference(parser, node);
19413 } else {
19415 pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
19416 }
19417 }
19418
19419 return UP(fcall);
19420 }
19421 }
19422
19423 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
19424 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19425 }
19426
19427 return node;
19428 }
19429 case PM_TOKEN_HEREDOC_START: {
19430 // Here we have found a heredoc. We'll parse it and add it to the
19431 // list of strings.
19432 assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
19433 pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
19434
19435 size_t common_whitespace = (size_t) -1;
19436 parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
19437
19438 parser_lex(parser);
19439 pm_token_t opening = parser->previous;
19440
19441 pm_node_t *node;
19442 pm_node_t *part;
19443
19444 if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
19445 // If we get here, then we have an empty heredoc. We'll create
19446 // an empty content token and return an empty string node.
19447 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
19448 pm_token_t content = parse_strings_empty_content(parser->previous.start);
19449
19450 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
19451 node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
19452 } else {
19453 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
19454 }
19455
19456 PM_NODE_LENGTH_SET_TOKEN(parser, node, &opening);
19457 } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
19458 // If we get here, then we tried to find something in the
19459 // heredoc but couldn't actually parse anything, so we'll just
19460 // return a missing node.
19461 //
19462 // parse_string_part handles its own errors, so there is no need
19463 // for us to add one here.
19464 node = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
19465 } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
19466 // If we get here, then the part that we parsed was plain string
19467 // content and we're at the end of the heredoc, so we can return
19468 // just a string node with the heredoc opening and closing as
19469 // its opening and closing.
19470 pm_node_flag_set(part, parse_unescaped_encoding(parser));
19471 pm_string_node_t *cast = (pm_string_node_t *) part;
19472
19473 cast->opening_loc = TOK2LOC(parser, &opening);
19474 cast->closing_loc = TOK2LOC(parser, &parser->current);
19475 cast->base.location = cast->opening_loc;
19476
19477 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
19478 assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
19479 cast->base.type = PM_X_STRING_NODE;
19480 }
19481
19482 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
19483 parse_heredoc_dedent_string(parser->arena, &cast->unescaped, common_whitespace);
19484 }
19485
19486 node = UP(cast);
19487 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
19488 } else {
19489 // If we get here, then we have multiple parts in the heredoc,
19490 // so we'll need to create an interpolated string node to hold
19491 // them all.
19492 pm_node_list_t parts = { 0 };
19493 pm_node_list_append(parser->arena, &parts, part);
19494
19495 while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
19496 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
19497 pm_node_list_append(parser->arena, &parts, part);
19498 }
19499 }
19500
19501 // Now that we have all of the parts, create the correct type of
19502 // interpolated node.
19503 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
19504 pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
19505 cast->parts = parts;
19506
19507 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
19508 pm_interpolated_xstring_node_closing_set(parser, cast, &parser->previous);
19509
19510 cast->base.location = cast->opening_loc;
19511 node = UP(cast);
19512 } else {
19513 pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
19514
19515 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
19516 pm_interpolated_string_node_closing_set(parser, cast, &parser->previous);
19517
19518 cast->base.location = cast->opening_loc;
19519 node = UP(cast);
19520 }
19521
19522 // If this is a heredoc that is indented with a ~, then we need
19523 // to dedent each line by the common leading whitespace.
19524 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
19525 pm_node_list_t *nodes;
19526 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
19527 nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
19528 } else {
19529 nodes = &((pm_interpolated_string_node_t *) node)->parts;
19530 }
19531
19532 parse_heredoc_dedent(parser, nodes, common_whitespace);
19533 }
19534 }
19535
19536 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
19537 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
19538 }
19539
19540 return node;
19541 }
19542 case PM_TOKEN_INSTANCE_VARIABLE: {
19543 parser_lex(parser);
19544 pm_node_t *node = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
19545
19546 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
19547 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19548 }
19549
19550 return node;
19551 }
19552 case PM_TOKEN_INTEGER: {
19553 pm_node_flags_t base = parser->integer.base;
19554 parser_lex(parser);
19555 return UP(pm_integer_node_create(parser, base, &parser->previous));
19556 }
19557 case PM_TOKEN_INTEGER_IMAGINARY: {
19558 pm_node_flags_t base = parser->integer.base;
19559 parser_lex(parser);
19560 return UP(pm_integer_node_imaginary_create(parser, base, &parser->previous));
19561 }
19562 case PM_TOKEN_INTEGER_RATIONAL: {
19563 pm_node_flags_t base = parser->integer.base;
19564 parser_lex(parser);
19565 return UP(pm_integer_node_rational_create(parser, base, &parser->previous));
19566 }
19567 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: {
19568 pm_node_flags_t base = parser->integer.base;
19569 parser_lex(parser);
19570 return UP(pm_integer_node_rational_imaginary_create(parser, base, &parser->previous));
19571 }
19572 case PM_TOKEN_KEYWORD___ENCODING__:
19573 parser_lex(parser);
19574 return UP(pm_source_encoding_node_create(parser, &parser->previous));
19575 case PM_TOKEN_KEYWORD___FILE__:
19576 parser_lex(parser);
19577 return UP(pm_source_file_node_create(parser, &parser->previous));
19578 case PM_TOKEN_KEYWORD___LINE__:
19579 parser_lex(parser);
19580 return UP(pm_source_line_node_create(parser, &parser->previous));
19581 case PM_TOKEN_KEYWORD_ALIAS: {
19582 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19583 pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
19584 }
19585
19586 parser_lex(parser);
19587 pm_token_t keyword = parser->previous;
19588
19589 pm_node_t *new_name = parse_alias_argument(parser, true, (uint16_t) (depth + 1));
19590 pm_node_t *old_name = parse_alias_argument(parser, false, (uint16_t) (depth + 1));
19591
19592 switch (PM_NODE_TYPE(new_name)) {
19593 case PM_BACK_REFERENCE_READ_NODE:
19594 case PM_NUMBERED_REFERENCE_READ_NODE:
19595 case PM_GLOBAL_VARIABLE_READ_NODE: {
19596 if (PM_NODE_TYPE_P(old_name, PM_BACK_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_GLOBAL_VARIABLE_READ_NODE)) {
19597 if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) {
19598 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
19599 }
19600 } else if (!PM_NODE_TYPE_P(old_name, PM_ERROR_RECOVERY_NODE)) {
19601 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
19602 old_name = UP(pm_error_recovery_node_create_unexpected(parser, old_name));
19603 }
19604
19605 return UP(pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name));
19606 }
19607 case PM_SYMBOL_NODE:
19608 case PM_INTERPOLATED_SYMBOL_NODE: {
19609 if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_ERROR_RECOVERY_NODE)) {
19610 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
19611 old_name = UP(pm_error_recovery_node_create_unexpected(parser, old_name));
19612 }
19613 }
19615 default:
19616 return UP(pm_alias_method_node_create(parser, &keyword, new_name, old_name));
19617 }
19618 }
19619 case PM_TOKEN_KEYWORD_CASE:
19620 return parse_case(parser, flags, depth);
19621 case PM_TOKEN_KEYWORD_BEGIN: {
19622 size_t opening_newline_index = token_newline_index(parser);
19623 parser_lex(parser);
19624
19625 pm_token_t begin_keyword = parser->previous;
19626 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19627
19628 pm_node_list_t current_block_exits = { 0 };
19629 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19630 pm_statements_node_t *begin_statements = NULL;
19631
19632 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19633 pm_accepts_block_stack_push(parser, true);
19634 begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN, (uint16_t) (depth + 1));
19635 pm_accepts_block_stack_pop(parser);
19636 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19637 }
19638
19639 pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
19640 parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
19641 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM, &begin_keyword);
19642
19643 PM_NODE_LENGTH_SET_TOKEN(parser, begin_node, &parser->previous);
19644 pm_begin_node_end_keyword_set(parser, begin_node, &parser->previous);
19645 pop_block_exits(parser, previous_block_exits);
19646 return UP(begin_node);
19647 }
19648 case PM_TOKEN_KEYWORD_BEGIN_UPCASE: {
19649 pm_node_list_t current_block_exits = { 0 };
19650 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19651
19652 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19653 pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
19654 }
19655
19656 parser_lex(parser);
19657 pm_token_t keyword = parser->previous;
19658
19659 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE);
19660 pm_token_t opening = parser->previous;
19661 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
19662
19663 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM, &opening);
19664 pm_context_t context = parser->current_context->context;
19665 if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
19666 pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
19667 }
19668
19669 flush_block_exits(parser, previous_block_exits);
19670 return UP(pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
19671 }
19672 case PM_TOKEN_KEYWORD_BREAK:
19673 case PM_TOKEN_KEYWORD_NEXT:
19674 case PM_TOKEN_KEYWORD_RETURN: {
19675 parser_lex(parser);
19676
19677 pm_token_t keyword = parser->previous;
19678 pm_arguments_t arguments = { 0 };
19679
19680 if (
19681 token_begins_expression_p(parser->current.type) ||
19682 match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)
19683 ) {
19684 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
19685
19686 if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
19687 pm_token_t next = parser->current;
19688 parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, flags, (uint16_t) (depth + 1));
19689
19690 // Reject `foo && return bar`.
19691 if (!(flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && arguments.arguments != NULL) {
19692 PM_PARSER_ERR_TOKEN_FORMAT(parser, &next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(next.type));
19693 }
19694 }
19695
19696 // It's possible that we've parsed a block argument through our
19697 // call to parse_arguments. If we found one, we should mark it
19698 // as invalid and destroy it, as we don't have a place for it.
19699 if (arguments.block != NULL) {
19700 pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
19701 pm_node_unreference(parser, arguments.block);
19702 arguments.block = NULL;
19703 }
19704 }
19705
19706 switch (keyword.type) {
19707 case PM_TOKEN_KEYWORD_BREAK: {
19708 pm_node_t *node = UP(pm_break_node_create(parser, &keyword, arguments.arguments));
19709 if (!parser->partial_script) parse_block_exit(parser, node);
19710 return node;
19711 }
19712 case PM_TOKEN_KEYWORD_NEXT: {
19713 pm_node_t *node = UP(pm_next_node_create(parser, &keyword, arguments.arguments));
19714 if (!parser->partial_script) parse_block_exit(parser, node);
19715 return node;
19716 }
19717 case PM_TOKEN_KEYWORD_RETURN: {
19718 pm_node_t *node = UP(pm_return_node_create(parser, &keyword, arguments.arguments));
19719 parse_return(parser, node);
19720 return node;
19721 }
19722 default:
19723 assert(false && "unreachable");
19724 return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
19725 }
19726 }
19727 case PM_TOKEN_KEYWORD_SUPER: {
19728 parser_lex(parser);
19729
19730 pm_token_t keyword = parser->previous;
19731 pm_arguments_t arguments = { 0 };
19732 parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
19733
19734 if (
19735 arguments.opening_loc.length == 0 &&
19736 arguments.arguments == NULL &&
19737 ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
19738 ) {
19739 return UP(pm_forwarding_super_node_create(parser, &keyword, &arguments));
19740 }
19741
19742 return UP(pm_super_node_create(parser, &keyword, &arguments));
19743 }
19744 case PM_TOKEN_KEYWORD_YIELD: {
19745 parser_lex(parser);
19746
19747 pm_token_t keyword = parser->previous;
19748 pm_arguments_t arguments = { 0 };
19749 parse_arguments_list(parser, &arguments, false, flags, (uint16_t) (depth + 1));
19750
19751 // It's possible that we've parsed a block argument through our
19752 // call to parse_arguments_list. If we found one, we should mark it
19753 // as invalid and destroy it, as we don't have a place for it on the
19754 // yield node.
19755 if (arguments.block != NULL) {
19756 pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
19757 pm_node_unreference(parser, arguments.block);
19758 arguments.block = NULL;
19759 }
19760
19761 pm_node_t *node = UP(pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc));
19762 if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
19763
19764 return node;
19765 }
19766 case PM_TOKEN_KEYWORD_CLASS:
19767 return parse_class(parser, flags, depth);
19768 case PM_TOKEN_KEYWORD_DEF:
19769 return parse_def(parser, binding_power, flags, depth);
19770 case PM_TOKEN_KEYWORD_DEFINED: {
19771 parser_lex(parser);
19772
19773 pm_token_t keyword = parser->previous;
19774 pm_token_t lparen = { 0 };
19775 pm_token_t rparen = { 0 };
19776 pm_node_t *expression;
19777
19778 context_push(parser, PM_CONTEXT_DEFINED);
19779 bool newline = accept1(parser, PM_TOKEN_NEWLINE);
19780
19781 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19782 lparen = parser->previous;
19783
19784 if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19785 expression = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
19786 lparen = (pm_token_t) { 0 };
19787 } else {
19788 expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19789
19790 if (!parser->recovering) {
19791 accept1(parser, PM_TOKEN_NEWLINE);
19792 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19793 rparen = parser->previous;
19794 }
19795 }
19796 } else {
19797 expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19798 }
19799
19800 context_pop(parser);
19801 return UP(pm_defined_node_create(
19802 parser,
19803 NTOK2PTR(lparen),
19804 expression,
19805 NTOK2PTR(rparen),
19806 &keyword
19807 ));
19808 }
19809 case PM_TOKEN_KEYWORD_END_UPCASE: {
19810 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19811 pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
19812 }
19813
19814 parser_lex(parser);
19815 pm_token_t keyword = parser->previous;
19816
19817 if (context_def_p(parser)) {
19818 pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
19819 }
19820
19821 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
19822 pm_token_t opening = parser->previous;
19823 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
19824
19825 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM, &opening);
19826 return UP(pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
19827 }
19828 case PM_TOKEN_KEYWORD_FALSE:
19829 parser_lex(parser);
19830 return UP(pm_false_node_create(parser, &parser->previous));
19831 case PM_TOKEN_KEYWORD_FOR: {
19832 size_t opening_newline_index = token_newline_index(parser);
19833 parser_lex(parser);
19834
19835 pm_token_t for_keyword = parser->previous;
19836 pm_node_t *index;
19837
19838 context_push(parser, PM_CONTEXT_FOR_INDEX);
19839
19840 // First, parse out the first index expression.
19841 if (accept1(parser, PM_TOKEN_USTAR)) {
19842 pm_token_t star_operator = parser->previous;
19843 pm_node_t *name = NULL;
19844
19845 if (token_begins_expression_p(parser->current.type)) {
19846 name = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19847 }
19848
19849 index = UP(pm_splat_node_create(parser, &star_operator, name));
19850 } else if (token_begins_expression_p(parser->current.type)) {
19851 index = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
19852 } else {
19853 pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
19854 index = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &for_keyword), PM_TOKEN_LENGTH(&for_keyword)));
19855 }
19856
19857 // Now, if there are multiple index expressions, parse them out.
19858 if (match1(parser, PM_TOKEN_COMMA)) {
19859 index = parse_targets(parser, index, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19860 } else {
19861 index = parse_target(parser, index, false, false);
19862 }
19863
19864 context_pop(parser);
19865 pm_do_loop_stack_push(parser, true);
19866
19867 expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
19868 pm_token_t in_keyword = parser->previous;
19869
19870 pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
19871 pm_do_loop_stack_pop(parser);
19872
19873 pm_token_t do_keyword = { 0 };
19874 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19875 do_keyword = parser->previous;
19876 } else {
19877 if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
19878 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_str(parser->current.type));
19879 }
19880 }
19881
19882 pm_statements_node_t *statements = NULL;
19883 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19884 statements = parse_statements(parser, PM_CONTEXT_FOR, (uint16_t) (depth + 1));
19885 }
19886
19887 parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
19888 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM, &for_keyword);
19889
19890 return UP(pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, NTOK2PTR(do_keyword), &parser->previous));
19891 }
19892 case PM_TOKEN_KEYWORD_IF:
19893 if (parser_end_of_line_p(parser)) {
19894 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_KEYWORD_EOL);
19895 }
19896
19897 size_t opening_newline_index = token_newline_index(parser);
19898 bool if_after_else = parser->previous.type == PM_TOKEN_KEYWORD_ELSE;
19899 parser_lex(parser);
19900
19901 return parse_conditional(parser, PM_CONTEXT_IF, opening_newline_index, if_after_else, (uint16_t) (depth + 1));
19902 case PM_TOKEN_KEYWORD_UNDEF: {
19903 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19904 pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
19905 }
19906
19907 parser_lex(parser);
19908 pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
19909 pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19910
19911 if (PM_NODE_TYPE_P(name, PM_ERROR_RECOVERY_NODE)) {
19912 } else {
19913 pm_undef_node_append(parser->arena, undef, name);
19914
19915 while (match1(parser, PM_TOKEN_COMMA)) {
19916 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
19917 parser_lex(parser);
19918 name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19919
19920 if (PM_NODE_TYPE_P(name, PM_ERROR_RECOVERY_NODE)) {
19921 break;
19922 }
19923
19924 pm_undef_node_append(parser->arena, undef, name);
19925 }
19926 }
19927
19928 return UP(undef);
19929 }
19930 case PM_TOKEN_KEYWORD_NOT: {
19931 parser_lex(parser);
19932
19933 pm_token_t message = parser->previous;
19934 pm_arguments_t arguments = { 0 };
19935 pm_node_t *receiver = NULL;
19936
19937 // The `not` keyword without parentheses is only valid in contexts
19938 // where it would be parsed as an expression (i.e., at or below
19939 // the `not` binding power level). In other contexts (e.g., method
19940 // arguments, array elements, assignment right-hand sides),
19941 // parentheses are required: `not(x)`. An exception is made for
19942 // endless def bodies, where `not` is valid as both `arg` and
19943 // `command` (e.g., `def f = not 1`, `def f = not foo bar`).
19944 if (binding_power > PM_BINDING_POWER_NOT && !(flags & PM_PARSE_IN_ENDLESS_DEF) && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19945 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) {
19946 pm_parser_err(parser, PM_TOKEN_END(parser, &parser->previous), 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN);
19947 } else {
19948 accept1(parser, PM_TOKEN_NEWLINE);
19949 pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER);
19950 }
19951
19952 return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
19953 }
19954
19955 accept1(parser, PM_TOKEN_NEWLINE);
19956
19957 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19958 pm_token_t lparen = parser->previous;
19959
19960 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19961 receiver = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
19962 } else {
19963 arguments.opening_loc = TOK2LOC(parser, &lparen);
19964 receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19965
19966 if (!parser->recovering) {
19967 accept1(parser, PM_TOKEN_NEWLINE);
19968 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19969 arguments.closing_loc = TOK2LOC(parser, &parser->previous);
19970 }
19971 }
19972 } else {
19973 receiver = parse_expression(parser, PM_BINDING_POWER_NOT, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19974 }
19975
19976 return UP(pm_call_node_not_create(parser, receiver, &message, &arguments));
19977 }
19978 case PM_TOKEN_KEYWORD_UNLESS: {
19979 size_t opening_newline_index = token_newline_index(parser);
19980 parser_lex(parser);
19981
19982 return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
19983 }
19984 case PM_TOKEN_KEYWORD_MODULE:
19985 return parse_module(parser, flags, depth);
19986 case PM_TOKEN_KEYWORD_NIL:
19987 parser_lex(parser);
19988 return UP(pm_nil_node_create(parser, &parser->previous));
19989 case PM_TOKEN_KEYWORD_REDO: {
19990 parser_lex(parser);
19991
19992 pm_node_t *node = UP(pm_redo_node_create(parser, &parser->previous));
19993 if (!parser->partial_script) parse_block_exit(parser, node);
19994
19995 return node;
19996 }
19997 case PM_TOKEN_KEYWORD_RETRY: {
19998 parser_lex(parser);
19999
20000 pm_node_t *node = UP(pm_retry_node_create(parser, &parser->previous));
20001 parse_retry(parser, node);
20002
20003 return node;
20004 }
20005 case PM_TOKEN_KEYWORD_SELF:
20006 parser_lex(parser);
20007 return UP(pm_self_node_create(parser, &parser->previous));
20008 case PM_TOKEN_KEYWORD_TRUE:
20009 parser_lex(parser);
20010 return UP(pm_true_node_create(parser, &parser->previous));
20011 case PM_TOKEN_KEYWORD_UNTIL: {
20012 size_t opening_newline_index = token_newline_index(parser);
20013
20014 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
20015 pm_do_loop_stack_push(parser, true);
20016
20017 parser_lex(parser);
20018 pm_token_t keyword = parser->previous;
20019 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
20020
20021 pm_do_loop_stack_pop(parser);
20022 context_pop(parser);
20023
20024 pm_token_t do_keyword = { 0 };
20025 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
20026 do_keyword = parser->previous;
20027 } else {
20028 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
20029 }
20030
20031 pm_statements_node_t *statements = NULL;
20032 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
20033 pm_accepts_block_stack_push(parser, true);
20034 statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
20035 pm_accepts_block_stack_pop(parser);
20036 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
20037 }
20038
20039 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
20040 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM, &keyword);
20041
20042 return UP(pm_until_node_create(parser, &keyword, NTOK2PTR(do_keyword), &parser->previous, predicate, statements, 0));
20043 }
20044 case PM_TOKEN_KEYWORD_WHILE: {
20045 size_t opening_newline_index = token_newline_index(parser);
20046
20047 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
20048 pm_do_loop_stack_push(parser, true);
20049
20050 parser_lex(parser);
20051 pm_token_t keyword = parser->previous;
20052 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
20053
20054 pm_do_loop_stack_pop(parser);
20055 context_pop(parser);
20056
20057 pm_token_t do_keyword = { 0 };
20058 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
20059 do_keyword = parser->previous;
20060 } else {
20061 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
20062 }
20063
20064 pm_statements_node_t *statements = NULL;
20065 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
20066 pm_accepts_block_stack_push(parser, true);
20067 statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
20068 pm_accepts_block_stack_pop(parser);
20069 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
20070 }
20071
20072 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
20073 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM, &keyword);
20074
20075 return UP(pm_while_node_create(parser, &keyword, NTOK2PTR(do_keyword), &parser->previous, predicate, statements, 0));
20076 }
20077 case PM_TOKEN_PERCENT_LOWER_I: {
20078 parser_lex(parser);
20079 pm_token_t opening = parser->previous;
20080 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20081 pm_node_t *current = NULL;
20082
20083 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20084 accept1(parser, PM_TOKEN_WORDS_SEP);
20085 if (match1(parser, PM_TOKEN_STRING_END)) break;
20086
20087 // Interpolation is not possible but nested heredocs can still lead to
20088 // consecutive (disjoint) string tokens when the final newline is escaped.
20089 while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20090 // Record the string node, moving to interpolation if needed.
20091 if (current == NULL) {
20092 current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL));
20093 parser_lex(parser);
20094 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
20095 pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
20096 parser_lex(parser);
20097 pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, string);
20098 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20099 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
20100 pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = parser->start + cast->value_loc.start, .end = parser->start + cast->value_loc.start + cast->value_loc.length };
20101 pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped));
20102 pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL));
20103 parser_lex(parser);
20104
20105 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
20106 pm_interpolated_symbol_node_append(parser->arena, interpolated, first_string);
20107 pm_interpolated_symbol_node_append(parser->arena, interpolated, second_string);
20108
20109 // current is arena-allocated so no explicit free is needed.
20110 current = UP(interpolated);
20111 } else {
20112 assert(false && "unreachable");
20113 }
20114 }
20115
20116 if (current) {
20117 pm_array_node_elements_append(parser->arena, array, current);
20118 current = NULL;
20119 } else {
20120 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
20121 }
20122 }
20123
20124 pm_token_t closing = parser->current;
20125 if (match1(parser, PM_TOKEN_EOF)) {
20126 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
20127 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
20128 } else {
20129 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
20130 }
20131 pm_array_node_close_set(parser, array, &closing);
20132
20133 return UP(array);
20134 }
20135 case PM_TOKEN_PERCENT_UPPER_I:
20136 return parse_symbol_array(parser, depth);
20137 case PM_TOKEN_PERCENT_LOWER_W: {
20138 parser_lex(parser);
20139 pm_token_t opening = parser->previous;
20140 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20141 pm_node_t *current = NULL;
20142
20143 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20144 accept1(parser, PM_TOKEN_WORDS_SEP);
20145 if (match1(parser, PM_TOKEN_STRING_END)) break;
20146
20147 // Interpolation is not possible but nested heredocs can still lead to
20148 // consecutive (disjoint) string tokens when the final newline is escaped.
20149 while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20150 pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
20151
20152 // Record the string node, moving to interpolation if needed.
20153 if (current == NULL) {
20154 current = string;
20155 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20156 pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, string);
20157 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20158 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
20159 pm_interpolated_string_node_append(parser, interpolated, current);
20160 pm_interpolated_string_node_append(parser, interpolated, string);
20161 current = UP(interpolated);
20162 } else {
20163 assert(false && "unreachable");
20164 }
20165 parser_lex(parser);
20166 }
20167
20168 if (current) {
20169 pm_array_node_elements_append(parser->arena, array, current);
20170 current = NULL;
20171 } else {
20172 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
20173 }
20174 }
20175
20176 pm_token_t closing = parser->current;
20177 if (match1(parser, PM_TOKEN_EOF)) {
20178 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
20179 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
20180 } else {
20181 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
20182 }
20183
20184 pm_array_node_close_set(parser, array, &closing);
20185 return UP(array);
20186 }
20187 case PM_TOKEN_PERCENT_UPPER_W:
20188 return parse_string_array(parser, depth);
20189 case PM_TOKEN_REGEXP_BEGIN: {
20190 pm_token_t opening = parser->current;
20191 parser_lex(parser);
20192
20193 if (match1(parser, PM_TOKEN_REGEXP_END)) {
20194 // If we get here, then we have an end immediately after a start. In
20195 // that case we'll create an empty content token and return an
20196 // uninterpolated regular expression.
20197 pm_token_t content = (pm_token_t) {
20198 .type = PM_TOKEN_STRING_CONTENT,
20199 .start = parser->previous.end,
20200 .end = parser->previous.end
20201 };
20202
20203 parser_lex(parser);
20204
20205 pm_regular_expression_node_t *node = pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
20206 pm_node_flag_set(UP(node), pm_regexp_parse(parser, node, NULL, NULL));
20207 return UP(node);
20208 }
20209
20211
20212 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20213 // In this case we've hit string content so we know the regular
20214 // expression at least has something in it. We'll need to check if the
20215 // following token is the end (in which case we can return a plain
20216 // regular expression) or if it's not then it has interpolation.
20217 pm_string_t unescaped = parser->current_string;
20218 pm_token_t content = parser->current;
20219 parser_lex(parser);
20220
20221 // If we hit an end, then we can create a regular expression
20222 // node without interpolation, which can be represented more
20223 // succinctly and more easily compiled.
20224 if (accept1(parser, PM_TOKEN_REGEXP_END)) {
20225 pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
20226
20227 // If we're not immediately followed by a =~, then we
20228 // parse and validate now. If it is followed by a =~,
20229 // then it will get parsed in the =~ handler where
20230 // named captures can also be extracted.
20231 if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
20232 pm_node_flag_set(UP(node), pm_regexp_parse(parser, node, NULL, NULL));
20233 }
20234
20235 return UP(node);
20236 }
20237
20238 // If we get here, then we have interpolation so we'll need to create
20239 // a regular expression node with interpolation.
20240 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20241
20242 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
20243 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
20244 // This is extremely strange, but the first string part of a
20245 // regular expression will always be tagged as binary if we
20246 // are in a US-ASCII file, no matter its contents.
20247 pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
20248 }
20249
20250 pm_interpolated_regular_expression_node_append(parser->arena, interpolated, part);
20251 } else {
20252 // If the first part of the body of the regular expression is not a
20253 // string content, then we have interpolation and we need to create an
20254 // interpolated regular expression node.
20255 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20256 }
20257
20258 // Now that we're here and we have interpolation, we'll parse all of the
20259 // parts into the list.
20260 pm_node_t *part;
20261 while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
20262 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20263 pm_interpolated_regular_expression_node_append(parser->arena, interpolated, part);
20264 }
20265 }
20266
20267 pm_token_t closing = parser->current;
20268 if (match1(parser, PM_TOKEN_EOF)) {
20269 pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
20270 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
20271 } else {
20272 expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
20273 }
20274
20275 pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
20276 return UP(interpolated);
20277 }
20278 case PM_TOKEN_BACKTICK:
20279 case PM_TOKEN_PERCENT_LOWER_X: {
20280 parser_lex(parser);
20281 pm_token_t opening = parser->previous;
20282
20283 // When we get here, we don't know if this string is going to have
20284 // interpolation or not, even though it is allowed. Still, we want to be
20285 // able to return a string node without interpolation if we can since
20286 // it'll be faster.
20287 if (match1(parser, PM_TOKEN_STRING_END)) {
20288 // If we get here, then we have an end immediately after a start. In
20289 // that case we'll create an empty content token and return an
20290 // uninterpolated string.
20291 pm_token_t content = (pm_token_t) {
20292 .type = PM_TOKEN_STRING_CONTENT,
20293 .start = parser->previous.end,
20294 .end = parser->previous.end
20295 };
20296
20297 parser_lex(parser);
20298 return UP(pm_xstring_node_create(parser, &opening, &content, &parser->previous));
20299 }
20300
20302
20303 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20304 // In this case we've hit string content so we know the string
20305 // at least has something in it. We'll need to check if the
20306 // following token is the end (in which case we can return a
20307 // plain string) or if it's not then it has interpolation.
20308 pm_string_t unescaped = parser->current_string;
20309 pm_token_t content = parser->current;
20310 parser_lex(parser);
20311
20312 if (match1(parser, PM_TOKEN_STRING_END)) {
20313 pm_node_t *node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
20314 pm_node_flag_set(node, parse_unescaped_encoding(parser));
20315 parser_lex(parser);
20316 return node;
20317 }
20318
20319 // If we get here, then we have interpolation so we'll need to
20320 // create a string node with interpolation.
20321 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20322
20323 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
20324 pm_node_flag_set(part, parse_unescaped_encoding(parser));
20325
20326 pm_interpolated_xstring_node_append(parser->arena, node, part);
20327 } else {
20328 // If the first part of the body of the string is not a string
20329 // content, then we have interpolation and we need to create an
20330 // interpolated string node.
20331 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20332 }
20333
20334 pm_node_t *part;
20335 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20336 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20337 pm_interpolated_xstring_node_append(parser->arena, node, part);
20338 }
20339 }
20340
20341 pm_token_t closing = parser->current;
20342 if (match1(parser, PM_TOKEN_EOF)) {
20343 pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
20344 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
20345 } else {
20346 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
20347 }
20348 pm_interpolated_xstring_node_closing_set(parser, node, &closing);
20349
20350 return UP(node);
20351 }
20352 case PM_TOKEN_USTAR: {
20353 parser_lex(parser);
20354
20355 // * operators at the beginning of expressions are only valid in the
20356 // context of a multiple assignment. We enforce that here. We'll
20357 // still lex past it though and create a missing node place.
20358 if (binding_power != PM_BINDING_POWER_STATEMENT) {
20359 pm_parser_err_prefix(parser, diag_id);
20360 return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
20361 }
20362
20363 pm_token_t operator = parser->previous;
20364 pm_node_t *name = NULL;
20365
20366 if (token_begins_expression_p(parser->current.type)) {
20367 name = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
20368 }
20369
20370 pm_node_t *splat = UP(pm_splat_node_create(parser, &operator, name));
20371
20372 if (match1(parser, PM_TOKEN_COMMA)) {
20373 return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
20374 } else {
20375 return parse_target_validate(parser, splat, true);
20376 }
20377 }
20378 case PM_TOKEN_BANG: {
20379 if (binding_power > PM_BINDING_POWER_UNARY) {
20380 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20381 }
20382
20383 parser_lex(parser);
20384
20385 pm_token_t operator = parser->previous;
20386 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (binding_power < PM_BINDING_POWER_MATCH ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20387 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
20388
20389 pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
20390 return UP(node);
20391 }
20392 case PM_TOKEN_TILDE: {
20393 if (binding_power > PM_BINDING_POWER_UNARY) {
20394 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20395 }
20396 parser_lex(parser);
20397
20398 pm_token_t operator = parser->previous;
20399 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20400 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
20401
20402 return UP(node);
20403 }
20404 case PM_TOKEN_UMINUS: {
20405 if (binding_power > PM_BINDING_POWER_UNARY) {
20406 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20407 }
20408 parser_lex(parser);
20409
20410 pm_token_t operator = parser->previous;
20411 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20412 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
20413
20414 return UP(node);
20415 }
20416 case PM_TOKEN_UMINUS_NUM: {
20417 parser_lex(parser);
20418
20419 pm_token_t operator = parser->previous;
20420 pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20421
20422 if (accept1(parser, PM_TOKEN_STAR_STAR)) {
20423 pm_token_t exponent_operator = parser->previous;
20424 pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
20425 node = UP(pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0));
20426 node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
20427 } else {
20428 switch (PM_NODE_TYPE(node)) {
20429 case PM_INTEGER_NODE:
20430 case PM_FLOAT_NODE:
20431 case PM_RATIONAL_NODE:
20432 case PM_IMAGINARY_NODE:
20433 parse_negative_numeric(node);
20434 break;
20435 default:
20436 node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
20437 break;
20438 }
20439 }
20440
20441 return node;
20442 }
20443 case PM_TOKEN_MINUS_GREATER: {
20444 int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting;
20445 parser->lambda_enclosure_nesting = parser->enclosure_nesting;
20446
20447 size_t opening_newline_index = token_newline_index(parser);
20448 pm_accepts_block_stack_push(parser, true);
20449 parser_lex(parser);
20450
20451 pm_token_t operator = parser->previous;
20452 pm_parser_scope_push(parser, false);
20453
20454 pm_block_parameters_node_t *block_parameters;
20455
20456 switch (parser->current.type) {
20457 case PM_TOKEN_PARENTHESIS_LEFT: {
20458 pm_token_t opening = parser->current;
20459 parser_lex(parser);
20460
20461 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
20462 block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
20463 } else {
20464 block_parameters = parse_block_parameters(parser, false, &opening, true, true, (uint16_t) (depth + 1));
20465 }
20466
20467 accept1(parser, PM_TOKEN_NEWLINE);
20468 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
20469
20470 pm_block_parameters_node_closing_set(parser, block_parameters, &parser->previous);
20471 break;
20472 }
20473 case PM_CASE_PARAMETER: {
20474 pm_accepts_block_stack_push(parser, false);
20475 block_parameters = parse_block_parameters(parser, false, NULL, true, false, (uint16_t) (depth + 1));
20476 pm_accepts_block_stack_pop(parser);
20477 break;
20478 }
20479 default: {
20480 block_parameters = NULL;
20481 break;
20482 }
20483 }
20484
20485 pm_token_t opening;
20486 pm_node_t *body = NULL;
20487 parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
20488
20489 if (accept1(parser, PM_TOKEN_LAMBDA_BEGIN)) {
20490 opening = parser->previous;
20491
20492 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
20493 body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1)));
20494 }
20495
20496 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20497 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE, &opening);
20498 } else {
20499 expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
20500 opening = parser->previous;
20501
20502 if (!match3(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20503 body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1)));
20504 }
20505
20506 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20507 assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
20508 body = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1)));
20509 } else {
20510 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20511 }
20512
20513 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END, &operator);
20514 }
20515
20516 pm_constant_id_list_t locals;
20517 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
20518 pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &operator, &parser->previous);
20519
20520 pm_parser_scope_pop(parser);
20521 pm_accepts_block_stack_pop(parser);
20522
20523 return UP(pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body));
20524 }
20525 case PM_TOKEN_UPLUS: {
20526 if (binding_power > PM_BINDING_POWER_UNARY) {
20527 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20528 }
20529 parser_lex(parser);
20530
20531 pm_token_t operator = parser->previous;
20532 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20533 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
20534
20535 return UP(node);
20536 }
20537 case PM_TOKEN_STRING_BEGIN:
20538 return parse_strings(parser, NULL, flags & PM_PARSE_ACCEPTS_LABEL, (uint16_t) (depth + 1));
20539 case PM_TOKEN_SYMBOL_BEGIN: {
20540 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
20541 parser_lex(parser);
20542
20543 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END, (uint16_t) (depth + 1));
20544 }
20545 default: {
20546 pm_context_t recoverable = context_recoverable(parser, &parser->current);
20547
20548 if (recoverable != PM_CONTEXT_NONE) {
20549 parser->recovering = true;
20550
20551 // If the given error is not the generic one, then we'll add it
20552 // here because it will provide more context in addition to the
20553 // recoverable error that we will also add.
20554 if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
20555 pm_parser_err_prefix(parser, diag_id);
20556 }
20557
20558 // If we get here, then we are assuming this token is closing a
20559 // parent context, so we'll indicate that to the user so that
20560 // they know how we behaved.
20561 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_str(parser->current.type), context_human(recoverable));
20562 } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
20563 // We're going to make a special case here, because "cannot
20564 // parse expression" is pretty generic, and we know here that we
20565 // have an unexpected token.
20566 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_str(parser->current.type));
20567 } else {
20568 pm_parser_err_prefix(parser, diag_id);
20569 }
20570
20571 return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
20572 }
20573 }
20574}
20575
20585static pm_node_t *
20586parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
20587 pm_node_t *value = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? (flags & PM_PARSE_ACCEPTS_COMMAND_CALL) : (previous_binding_power < PM_BINDING_POWER_MATCH ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0))), diag_id, (uint16_t) (depth + 1));
20588
20589 // Assignments whose value is a command call (e.g., a = b c) can only
20590 // be followed by modifiers (if/unless/while/until/rescue) and not by
20591 // operators with higher binding power. If we find one, emit an error
20592 // and skip the operator and its right-hand side.
20593 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER && (pm_command_call_value_p(value) || pm_block_call_p(value))) {
20594 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
20595 parser_lex(parser);
20596 parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20597 }
20598
20599 // Contradicting binding powers, the right-hand-side value of the assignment
20600 // allows the `rescue` modifier.
20601 if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20602 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20603
20604 pm_token_t rescue = parser->current;
20605 parser_lex(parser);
20606
20607 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20608 context_pop(parser);
20609
20610 return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
20611 }
20612
20613 return value;
20614}
20615
20620static void
20621parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
20622 switch (PM_NODE_TYPE(node)) {
20623 case PM_BEGIN_NODE: {
20624 const pm_begin_node_t *cast = (const pm_begin_node_t *) node;
20625 if (cast->statements != NULL) parse_assignment_value_local(parser, (const pm_node_t *) cast->statements);
20626 break;
20627 }
20628 case PM_LOCAL_VARIABLE_WRITE_NODE: {
20630 pm_locals_read(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
20631 break;
20632 }
20633 case PM_PARENTHESES_NODE: {
20634 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
20635 if (cast->body != NULL) parse_assignment_value_local(parser, cast->body);
20636 break;
20637 }
20638 case PM_STATEMENTS_NODE: {
20639 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
20640 const pm_node_t *statement;
20641
20642 PM_NODE_LIST_FOREACH(&cast->body, index, statement) {
20643 parse_assignment_value_local(parser, statement);
20644 }
20645 break;
20646 }
20647 default:
20648 break;
20649 }
20650}
20651
20664static pm_node_t *
20665parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
20666 bool permitted = true;
20667 if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
20668
20669 pm_node_t *value = parse_starred_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? (flags & PM_PARSE_ACCEPTS_COMMAND_CALL) : (previous_binding_power < PM_BINDING_POWER_MODIFIER ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0))), diag_id, (uint16_t) (depth + 1));
20670 if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
20671
20672 parse_assignment_value_local(parser, value);
20673 bool single_value = true;
20674
20675 // Block calls (command call + do block, e.g., `foo bar do end`) cannot
20676 // be followed by a comma to form a multi-value RHS because each element
20677 // of a multi-value assignment must be an `arg`, not a `block_call`.
20678 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && !pm_block_call_p(value) && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
20679 single_value = false;
20680
20681 pm_array_node_t *array = pm_array_node_create(parser, NULL);
20682 pm_array_node_elements_append(parser->arena, array, value);
20683 value = UP(array);
20684
20685 while (accept1(parser, PM_TOKEN_COMMA)) {
20686 pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
20687
20688 pm_array_node_elements_append(parser->arena, array, element);
20689 if (PM_NODE_TYPE_P(element, PM_ERROR_RECOVERY_NODE)) break;
20690
20691 parse_assignment_value_local(parser, element);
20692 }
20693 }
20694
20695 // Assignments whose value is a command call (e.g., a = b c) can only
20696 // be followed by modifiers (if/unless/while/until/rescue) and not by
20697 // operators with higher binding power. If we find one, emit an error
20698 // and skip the operator and its right-hand side.
20699 if (single_value && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER && (pm_command_call_value_p(value) || pm_block_call_p(value))) {
20700 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
20701 parser_lex(parser);
20702 parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20703 }
20704
20705 // Contradicting binding powers, the right-hand-side value of the assignment
20706 // allows the `rescue` modifier.
20707 if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20708 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20709
20710 pm_token_t rescue = parser->current;
20711 parser_lex(parser);
20712
20713 bool accepts_command_call_inner = false;
20714
20715 // RHS can accept command call iff the value is a call with arguments
20716 // but without parenthesis.
20717 if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
20718 pm_call_node_t *call_node = (pm_call_node_t *) value;
20719 if ((call_node->arguments != NULL) && (call_node->opening_loc.length == 0)) {
20720 accepts_command_call_inner = true;
20721 }
20722 }
20723
20724 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (accepts_command_call_inner ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20725 context_pop(parser);
20726
20727 return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
20728 }
20729
20730 return value;
20731}
20732
20740static void
20741parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
20742 if (call_node->arguments != NULL) {
20743 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
20744 pm_node_unreference(parser, UP(call_node->arguments));
20745 call_node->arguments = NULL;
20746 }
20747
20748 if (call_node->block != NULL) {
20749 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
20750 pm_node_unreference(parser, UP(call_node->block));
20751 call_node->block = NULL;
20752 }
20753}
20754
20755static PRISM_INLINE const uint8_t *
20756pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20757 cursor++;
20758
20759 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20760 uint8_t value = escape_hexadecimal_digit(*cursor);
20761 cursor++;
20762
20763 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20764 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(*cursor));
20765 cursor++;
20766 }
20767
20768 pm_buffer_append_byte(unescaped, value);
20769 } else {
20770 pm_buffer_append_string(unescaped, "\\x", 2);
20771 }
20772
20773 return cursor;
20774}
20775
20776static PRISM_INLINE const uint8_t *
20777pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20778 uint8_t value = (uint8_t) (*cursor - '0');
20779 cursor++;
20780
20781 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20782 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20783 cursor++;
20784
20785 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20786 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20787 cursor++;
20788 }
20789 }
20790
20791 pm_buffer_append_byte(unescaped, value);
20792 return cursor;
20793}
20794
20795static PRISM_INLINE const uint8_t *
20796pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end, const pm_location_t *error_location) {
20797 const uint8_t *start = cursor - 1;
20798 cursor++;
20799
20800 if (cursor >= end) {
20801 pm_buffer_append_string(unescaped, "\\u", 2);
20802 return cursor;
20803 }
20804
20805 if (*cursor != '{') {
20806 size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
20807 uint32_t value = escape_unicode(parser, cursor, length, error_location, 0);
20808
20809 if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
20810 pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start));
20811 }
20812
20813 return cursor + length;
20814 }
20815
20816 cursor++;
20817 for (;;) {
20818 while (cursor < end && *cursor == ' ') cursor++;
20819
20820 if (cursor >= end) break;
20821 if (*cursor == '}') {
20822 cursor++;
20823 break;
20824 }
20825
20826 size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor);
20827 if (length == 0) {
20828 break;
20829 }
20830 uint32_t value = escape_unicode(parser, cursor, length, error_location, 0);
20831
20832 (void) pm_buffer_append_unicode_codepoint(unescaped, value);
20833 cursor += length;
20834 }
20835
20836 return cursor;
20837}
20838
20839static void
20840pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor, const pm_location_t *error_location) {
20841 const uint8_t *end = source + length;
20842 pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source));
20843
20844 for (;;) {
20845 if (++cursor >= end) {
20846 pm_buffer_append_byte(unescaped, '\\');
20847 return;
20848 }
20849
20850 switch (*cursor) {
20851 case 'x':
20852 cursor = pm_named_capture_escape_hex(unescaped, cursor, end);
20853 break;
20854 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
20855 cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
20856 break;
20857 case 'u':
20858 cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end, error_location);
20859 break;
20860 default:
20861 pm_buffer_append_byte(unescaped, '\\');
20862 break;
20863 }
20864
20865 const uint8_t *next_cursor = pm_memchr(cursor, '\\', (size_t) (end - cursor), parser->encoding_changed, parser->encoding);
20866 if (next_cursor == NULL) break;
20867
20868 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (next_cursor - cursor));
20869 cursor = next_cursor;
20870 }
20871
20872 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (end - cursor));
20873}
20874
20879static void
20880parse_regular_expression_named_capture(pm_parser_t *parser, const pm_string_t *capture, bool shared, pm_regexp_name_data_t *callback_data) {
20881 pm_call_node_t *call = callback_data->call;
20882 pm_constant_id_list_t *names = &callback_data->names;
20883
20884 const uint8_t *source = pm_string_source(capture);
20885 size_t length = pm_string_length(capture);
20886 pm_buffer_t unescaped = { 0 };
20887
20888 // First, we need to handle escapes within the name of the capture group.
20889 // This is because regular expressions have three different representations
20890 // in prism. The first is the plain source code. The second is the
20891 // representation that will be sent to the regular expression engine, which
20892 // is the value of the "unescaped" field. This is poorly named, because it
20893 // actually still contains escapes, just a subset of them that the regular
20894 // expression engine knows how to handle. The third representation is fully
20895 // unescaped, which is what we need.
20896 const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding);
20897 if (PRISM_UNLIKELY(cursor != NULL)) {
20898 pm_named_capture_escape(parser, &unescaped, source, length, cursor, shared ? NULL : &call->receiver->location);
20899 source = (const uint8_t *) pm_buffer_value(&unescaped);
20900 length = pm_buffer_length(&unescaped);
20901 }
20902
20903 const uint8_t *start;
20904 const uint8_t *end;
20905 pm_constant_id_t name;
20906
20907 // If the name of the capture group isn't a valid identifier, we do
20908 // not add it to the local table.
20909 if (!pm_slice_is_valid_local(parser, source, source + length)) {
20910 pm_buffer_cleanup(&unescaped);
20911 return;
20912 }
20913
20914 if (shared) {
20915 // If the unescaped string is a slice of the source, then we can
20916 // copy the names directly. The pointers will line up.
20917 start = source;
20918 end = source + length;
20919 name = pm_parser_constant_id_raw(parser, start, end);
20920 } else {
20921 // Otherwise, the name is a slice of the malloc-ed owned string,
20922 // in which case we need to copy it out into a new string.
20923 start = parser->start + PM_NODE_START(call->receiver);
20924 end = parser->start + PM_NODE_END(call->receiver);
20925
20926 uint8_t *memory = (uint8_t *) pm_arena_alloc(parser->arena, length, 1);
20927 memcpy(memory, source, length);
20928 name = pm_parser_constant_id_owned(parser, memory, length);
20929 }
20930
20931 // Add this name to the list of constants if it is valid, not duplicated,
20932 // and not a keyword.
20933 if (name != 0 && !pm_constant_id_list_includes(names, name)) {
20934 pm_constant_id_list_append(parser->arena, names, name);
20935
20936 int depth;
20937 if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
20938 // If the local is not already a local but it is a keyword, then we
20939 // do not want to add a capture for this.
20940 if (pm_local_is_keyword((const char *) source, length)) {
20941 pm_buffer_cleanup(&unescaped);
20942 return;
20943 }
20944
20945 // If the identifier is not already a local, then we will add it to
20946 // the local table.
20947 pm_parser_local_add(parser, name, start, end, 0);
20948 }
20949
20950 // Here we lazily create the MatchWriteNode since we know we're
20951 // about to add a target.
20952 if (callback_data->match == NULL) {
20953 callback_data->match = pm_match_write_node_create(parser, call);
20954 }
20955
20956 // Next, create the local variable target and add it to the list of
20957 // targets for the match.
20958 pm_node_t *target = UP(pm_local_variable_target_node_create(parser, &TOK2LOC(parser, &((pm_token_t) { .type = 0, .start = start, .end = end })), name, depth == -1 ? 0 : (uint32_t) depth));
20959 pm_node_list_append(parser->arena, &callback_data->match->targets, target);
20960 }
20961
20962 pm_buffer_cleanup(&unescaped);
20963}
20964
20970static pm_node_t *
20971parse_interpolated_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
20972 pm_regexp_name_data_t callback_data = {
20973 .call = call,
20974 .match = NULL,
20975 .names = { 0 },
20976 };
20977
20978 pm_regexp_parse_named_captures(parser, pm_string_source(content), pm_string_length(content), false, extended_mode, parse_regular_expression_named_capture, &callback_data);
20979
20980 if (callback_data.match != NULL) {
20981 return UP(callback_data.match);
20982 } else {
20983 return UP(call);
20984 }
20985}
20986
20987static PRISM_INLINE pm_node_t *
20988parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, uint8_t flags, uint16_t depth) {
20989 pm_token_t token = parser->current;
20990
20991 switch (token.type) {
20992 case PM_TOKEN_EQUAL: {
20993 switch (PM_NODE_TYPE(node)) {
20994 case PM_CALL_NODE: {
20995 // If we have no arguments to the call node and we need this
20996 // to be a target then this is either a method call or a
20997 // local variable write. This _must_ happen before the value
20998 // is parsed because it could be referenced in the value.
20999 pm_call_node_t *call_node = (pm_call_node_t *) node;
21000 if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21001 pm_parser_local_add_location(parser, &call_node->message_loc, 0);
21002 }
21003 }
21005 case PM_CASE_WRITABLE: {
21006 // When we have `it = value`, we need to add `it` as a local
21007 // variable before parsing the value, in case the value
21008 // references the variable.
21009 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
21010 pm_parser_local_add_location(parser, &node->location, 0);
21011 }
21012
21013 parser_lex(parser);
21014 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21015
21016 if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
21017 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
21018 }
21019
21020 return parse_write(parser, node, &token, value);
21021 }
21022 case PM_SPLAT_NODE: {
21023 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
21024 pm_multi_target_node_targets_append(parser, multi_target, node);
21025
21026 parser_lex(parser);
21027 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21028 return parse_write(parser, UP(multi_target), &token, value);
21029 }
21030 case PM_SOURCE_ENCODING_NODE:
21031 case PM_FALSE_NODE:
21032 case PM_SOURCE_FILE_NODE:
21033 case PM_SOURCE_LINE_NODE:
21034 case PM_NIL_NODE:
21035 case PM_SELF_NODE:
21036 case PM_TRUE_NODE: {
21037 // In these special cases, we have specific error messages
21038 // and we will replace them with local variable writes.
21039 parser_lex(parser);
21040 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21041 return parse_unwriteable_write(parser, node, &token, value);
21042 }
21043 default:
21044 // In this case we have an = sign, but we don't know what
21045 // it's for. We need to treat it as an error. We'll mark it
21046 // as an error and skip past it.
21047 parser_lex(parser);
21048 pm_parser_err_token(parser, &token, PM_ERR_EXPRESSION_NOT_WRITABLE);
21049 return node;
21050 }
21051 }
21052 case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL: {
21053 switch (PM_NODE_TYPE(node)) {
21054 case PM_BACK_REFERENCE_READ_NODE:
21055 case PM_NUMBERED_REFERENCE_READ_NODE:
21056 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21058 case PM_GLOBAL_VARIABLE_READ_NODE: {
21059 parser_lex(parser);
21060
21061 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21062 pm_node_t *result = UP(pm_global_variable_and_write_node_create(parser, node, &token, value));
21063
21064 return result;
21065 }
21066 case PM_CLASS_VARIABLE_READ_NODE: {
21067 parser_lex(parser);
21068
21069 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21070 pm_node_t *result = UP(pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
21071
21072 return result;
21073 }
21074 case PM_CONSTANT_PATH_NODE: {
21075 parser_lex(parser);
21076
21077 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21078 pm_node_t *write = UP(pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
21079
21080 return parse_shareable_constant_write(parser, write);
21081 }
21082 case PM_CONSTANT_READ_NODE: {
21083 parser_lex(parser);
21084
21085 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21086 pm_node_t *write = UP(pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
21087
21088 return parse_shareable_constant_write(parser, write);
21089 }
21090 case PM_INSTANCE_VARIABLE_READ_NODE: {
21091 parser_lex(parser);
21092
21093 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21094 pm_node_t *result = UP(pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
21095
21096 return result;
21097 }
21098 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21099 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21100 parser_lex(parser);
21101
21102 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21103 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0));
21104
21105 pm_node_unreference(parser, node);
21106 return result;
21107 }
21108 case PM_LOCAL_VARIABLE_READ_NODE: {
21109 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
21110 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.length, PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + node->location.start);
21111 pm_node_unreference(parser, node);
21112 }
21113
21115 parser_lex(parser);
21116
21117 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21118 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth));
21119
21120 return result;
21121 }
21122 case PM_CALL_NODE: {
21123 pm_call_node_t *cast = (pm_call_node_t *) node;
21124
21125 // If we have a vcall (a method with no arguments and no
21126 // receiver that could have been a local variable) then we
21127 // will transform it into a local variable write.
21128 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21129 pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
21130 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
21131 parser_lex(parser);
21132
21133 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21134 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
21135
21136 return result;
21137 }
21138
21139 // Move past the token here so that we have already added
21140 // the local variable by this point.
21141 parser_lex(parser);
21142
21143 // If there is no call operator and the message is "[]" then
21144 // this is an aref expression, and we can transform it into
21145 // an aset expression.
21146 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21147 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21148 return UP(pm_index_and_write_node_create(parser, cast, &token, value));
21149 }
21150
21151 // If this node cannot be writable, then we have an error.
21152 if (pm_call_node_writable_p(parser, cast)) {
21153 parse_write_name(parser, &cast->name);
21154 } else {
21155 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21156 }
21157
21158 parse_call_operator_write(parser, cast, &token);
21159 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21160 return UP(pm_call_and_write_node_create(parser, cast, &token, value));
21161 }
21162 case PM_MULTI_WRITE_NODE: {
21163 parser_lex(parser);
21164 pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
21165 return node;
21166 }
21167 default:
21168 parser_lex(parser);
21169
21170 // In this case we have an &&= sign, but we don't know what it's for.
21171 // We need to treat it as an error. For now, we'll mark it as an error
21172 // and just skip right past it.
21173 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
21174 return node;
21175 }
21176 }
21177 case PM_TOKEN_PIPE_PIPE_EQUAL: {
21178 switch (PM_NODE_TYPE(node)) {
21179 case PM_BACK_REFERENCE_READ_NODE:
21180 case PM_NUMBERED_REFERENCE_READ_NODE:
21181 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21183 case PM_GLOBAL_VARIABLE_READ_NODE: {
21184 parser_lex(parser);
21185
21186 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21187 pm_node_t *result = UP(pm_global_variable_or_write_node_create(parser, node, &token, value));
21188
21189 return result;
21190 }
21191 case PM_CLASS_VARIABLE_READ_NODE: {
21192 parser_lex(parser);
21193
21194 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21195 pm_node_t *result = UP(pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
21196
21197 return result;
21198 }
21199 case PM_CONSTANT_PATH_NODE: {
21200 parser_lex(parser);
21201
21202 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21203 pm_node_t *write = UP(pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
21204
21205 return parse_shareable_constant_write(parser, write);
21206 }
21207 case PM_CONSTANT_READ_NODE: {
21208 parser_lex(parser);
21209
21210 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21211 pm_node_t *write = UP(pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
21212
21213 return parse_shareable_constant_write(parser, write);
21214 }
21215 case PM_INSTANCE_VARIABLE_READ_NODE: {
21216 parser_lex(parser);
21217
21218 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21219 pm_node_t *result = UP(pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
21220
21221 return result;
21222 }
21223 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21224 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21225 parser_lex(parser);
21226
21227 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21228 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0));
21229
21230 pm_node_unreference(parser, node);
21231 return result;
21232 }
21233 case PM_LOCAL_VARIABLE_READ_NODE: {
21234 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
21235 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(node));
21236 pm_node_unreference(parser, node);
21237 }
21238
21240 parser_lex(parser);
21241
21242 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21243 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth));
21244
21245 return result;
21246 }
21247 case PM_CALL_NODE: {
21248 pm_call_node_t *cast = (pm_call_node_t *) node;
21249
21250 // If we have a vcall (a method with no arguments and no
21251 // receiver that could have been a local variable) then we
21252 // will transform it into a local variable write.
21253 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21254 pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
21255 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
21256 parser_lex(parser);
21257
21258 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21259 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
21260
21261 return result;
21262 }
21263
21264 // Move past the token here so that we have already added
21265 // the local variable by this point.
21266 parser_lex(parser);
21267
21268 // If there is no call operator and the message is "[]" then
21269 // this is an aref expression, and we can transform it into
21270 // an aset expression.
21271 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21272 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21273 return UP(pm_index_or_write_node_create(parser, cast, &token, value));
21274 }
21275
21276 // If this node cannot be writable, then we have an error.
21277 if (pm_call_node_writable_p(parser, cast)) {
21278 parse_write_name(parser, &cast->name);
21279 } else {
21280 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21281 }
21282
21283 parse_call_operator_write(parser, cast, &token);
21284 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21285 return UP(pm_call_or_write_node_create(parser, cast, &token, value));
21286 }
21287 case PM_MULTI_WRITE_NODE: {
21288 parser_lex(parser);
21289 pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
21290 return node;
21291 }
21292 default:
21293 parser_lex(parser);
21294
21295 // In this case we have an ||= sign, but we don't know what it's for.
21296 // We need to treat it as an error. For now, we'll mark it as an error
21297 // and just skip right past it.
21298 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
21299 return node;
21300 }
21301 }
21302 case PM_TOKEN_AMPERSAND_EQUAL:
21303 case PM_TOKEN_CARET_EQUAL:
21304 case PM_TOKEN_GREATER_GREATER_EQUAL:
21305 case PM_TOKEN_LESS_LESS_EQUAL:
21306 case PM_TOKEN_MINUS_EQUAL:
21307 case PM_TOKEN_PERCENT_EQUAL:
21308 case PM_TOKEN_PIPE_EQUAL:
21309 case PM_TOKEN_PLUS_EQUAL:
21310 case PM_TOKEN_SLASH_EQUAL:
21311 case PM_TOKEN_STAR_EQUAL:
21312 case PM_TOKEN_STAR_STAR_EQUAL: {
21313 switch (PM_NODE_TYPE(node)) {
21314 case PM_BACK_REFERENCE_READ_NODE:
21315 case PM_NUMBERED_REFERENCE_READ_NODE:
21316 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21318 case PM_GLOBAL_VARIABLE_READ_NODE: {
21319 parser_lex(parser);
21320
21321 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21322 pm_node_t *result = UP(pm_global_variable_operator_write_node_create(parser, node, &token, value));
21323
21324 return result;
21325 }
21326 case PM_CLASS_VARIABLE_READ_NODE: {
21327 parser_lex(parser);
21328
21329 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21330 pm_node_t *result = UP(pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
21331
21332 return result;
21333 }
21334 case PM_CONSTANT_PATH_NODE: {
21335 parser_lex(parser);
21336
21337 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21338 pm_node_t *write = UP(pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
21339
21340 return parse_shareable_constant_write(parser, write);
21341 }
21342 case PM_CONSTANT_READ_NODE: {
21343 parser_lex(parser);
21344
21345 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21346 pm_node_t *write = UP(pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
21347
21348 return parse_shareable_constant_write(parser, write);
21349 }
21350 case PM_INSTANCE_VARIABLE_READ_NODE: {
21351 parser_lex(parser);
21352
21353 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21354 pm_node_t *result = UP(pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
21355
21356 return result;
21357 }
21358 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21359 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21360 parser_lex(parser);
21361
21362 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21363 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0));
21364
21365 pm_node_unreference(parser, node);
21366 return result;
21367 }
21368 case PM_LOCAL_VARIABLE_READ_NODE: {
21369 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
21370 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(node));
21371 pm_node_unreference(parser, node);
21372 }
21373
21375 parser_lex(parser);
21376
21377 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21378 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth));
21379
21380 return result;
21381 }
21382 case PM_CALL_NODE: {
21383 parser_lex(parser);
21384 pm_call_node_t *cast = (pm_call_node_t *) node;
21385
21386 // If we have a vcall (a method with no arguments and no
21387 // receiver that could have been a local variable) then we
21388 // will transform it into a local variable write.
21389 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21390 pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
21391 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
21392 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21393 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
21394
21395 return result;
21396 }
21397
21398 // If there is no call operator and the message is "[]" then
21399 // this is an aref expression, and we can transform it into
21400 // an aset expression.
21401 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21402 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21403 return UP(pm_index_operator_write_node_create(parser, cast, &token, value));
21404 }
21405
21406 // If this node cannot be writable, then we have an error.
21407 if (pm_call_node_writable_p(parser, cast)) {
21408 parse_write_name(parser, &cast->name);
21409 } else {
21410 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21411 }
21412
21413 parse_call_operator_write(parser, cast, &token);
21414 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21415 return UP(pm_call_operator_write_node_create(parser, cast, &token, value));
21416 }
21417 case PM_MULTI_WRITE_NODE: {
21418 parser_lex(parser);
21419 pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
21420 return node;
21421 }
21422 default:
21423 parser_lex(parser);
21424
21425 // In this case we have an operator but we don't know what it's for.
21426 // We need to treat it as an error. For now, we'll mark it as an error
21427 // and just skip right past it.
21428 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_str(parser->current.type));
21429 return node;
21430 }
21431 }
21432 case PM_TOKEN_AMPERSAND_AMPERSAND:
21433 case PM_TOKEN_KEYWORD_AND: {
21434 parser_lex(parser);
21435
21436 pm_node_t *right = parse_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (parser->previous.type == PM_TOKEN_KEYWORD_AND ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21437 return UP(pm_and_node_create(parser, node, &token, right));
21438 }
21439 case PM_TOKEN_KEYWORD_OR:
21440 case PM_TOKEN_PIPE_PIPE: {
21441 parser_lex(parser);
21442
21443 pm_node_t *right = parse_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (parser->previous.type == PM_TOKEN_KEYWORD_OR ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21444 return UP(pm_or_node_create(parser, node, &token, right));
21445 }
21446 case PM_TOKEN_EQUAL_TILDE: {
21447 // Note that we _must_ parse the value before adding the local
21448 // variables in order to properly mirror the behavior of Ruby. For
21449 // example,
21450 //
21451 // /(?<foo>bar)/ =~ foo
21452 //
21453 // In this case, `foo` should be a method call and not a local yet.
21454 parser_lex(parser);
21455 pm_node_t *argument = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21456
21457 // By default, we're going to create a call node and then return it.
21458 pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
21459 pm_node_t *result = UP(call);
21460
21461 // If the receiver of this =~ is a regular expression node, then we
21462 // need to introduce local variables for it based on its named
21463 // capture groups.
21464 if (PM_NODE_TYPE_P(node, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE)) {
21465 // It's possible to have an interpolated regular expression node
21466 // that only contains strings. This is because it can be split
21467 // up by a heredoc. In this case we need to concat the unescaped
21468 // strings together and then parse them as a regular expression.
21470
21471 bool interpolated = false;
21472 size_t total_length = 0;
21473
21474 pm_node_t *part;
21475 PM_NODE_LIST_FOREACH(parts, index, part) {
21476 if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
21477 total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
21478 } else {
21479 interpolated = true;
21480 break;
21481 }
21482 }
21483
21484 if (!interpolated && total_length > 0) {
21485 void *memory = xmalloc(total_length);
21486 if (!memory) abort();
21487
21488 uint8_t *cursor = memory;
21489 PM_NODE_LIST_FOREACH(parts, index, part) {
21490 pm_string_t *unescaped = &((pm_string_node_t *) part)->unescaped;
21491 size_t length = pm_string_length(unescaped);
21492
21493 memcpy(cursor, pm_string_source(unescaped), length);
21494 cursor += length;
21495 }
21496
21497 pm_string_t owned;
21498 pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
21499
21500 result = parse_interpolated_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21501 pm_string_cleanup(&owned);
21502 }
21503 } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
21504 // If we have a regular expression node, then we can parse
21505 // the named captures and validate encoding in one pass.
21507
21508 pm_regexp_name_data_t name_data = {
21509 .call = call,
21510 .match = NULL,
21511 .names = { 0 },
21512 };
21513
21514 pm_node_flag_set(UP(regexp), pm_regexp_parse(parser, regexp, parse_regular_expression_named_capture, &name_data));
21515
21516 if (name_data.match != NULL) {
21517 result = UP(name_data.match);
21518 }
21519 }
21520
21521 return result;
21522 }
21523 case PM_TOKEN_UAMPERSAND:
21524 case PM_TOKEN_USTAR:
21525 case PM_TOKEN_USTAR_STAR:
21526 // The only times this will occur are when we are in an error state,
21527 // but we'll put them in here so that errors can propagate.
21528 case PM_TOKEN_BANG_EQUAL:
21529 case PM_TOKEN_BANG_TILDE:
21530 case PM_TOKEN_EQUAL_EQUAL:
21531 case PM_TOKEN_EQUAL_EQUAL_EQUAL:
21532 case PM_TOKEN_LESS_EQUAL_GREATER:
21533 case PM_TOKEN_CARET:
21534 case PM_TOKEN_PIPE:
21535 case PM_TOKEN_AMPERSAND:
21536 case PM_TOKEN_GREATER_GREATER:
21537 case PM_TOKEN_LESS_LESS:
21538 case PM_TOKEN_MINUS:
21539 case PM_TOKEN_PLUS:
21540 case PM_TOKEN_PERCENT:
21541 case PM_TOKEN_SLASH:
21542 case PM_TOKEN_STAR:
21543 case PM_TOKEN_STAR_STAR: {
21544 parser_lex(parser);
21545 pm_token_t operator = parser->previous;
21546 switch (PM_NODE_TYPE(node)) {
21547 case PM_RESCUE_MODIFIER_NODE: {
21549 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21550 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
21551 }
21552 break;
21553 }
21554 case PM_AND_NODE: {
21555 pm_and_node_t *cast = (pm_and_node_t *) node;
21556 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21557 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
21558 }
21559 break;
21560 }
21561 case PM_OR_NODE: {
21562 pm_or_node_t *cast = (pm_or_node_t *) node;
21563 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21564 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
21565 }
21566 break;
21567 }
21568 default:
21569 break;
21570 }
21571
21572 pm_node_t *argument = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21573 return UP(pm_call_node_binary_create(parser, node, &token, argument, 0));
21574 }
21575 case PM_TOKEN_GREATER:
21576 case PM_TOKEN_GREATER_EQUAL:
21577 case PM_TOKEN_LESS:
21578 case PM_TOKEN_LESS_EQUAL: {
21579 if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
21580 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
21581 }
21582
21583 parser_lex(parser);
21584 pm_node_t *argument = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21585 return UP(pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON));
21586 }
21587 case PM_TOKEN_AMPERSAND_DOT:
21588 case PM_TOKEN_DOT: {
21589 parser_lex(parser);
21590 pm_token_t operator = parser->previous;
21591 pm_arguments_t arguments = { 0 };
21592
21593 // This if statement handles the foo.() syntax.
21594 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
21595 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21596 return UP(pm_call_node_shorthand_create(parser, node, &operator, &arguments));
21597 }
21598
21599 switch (PM_NODE_TYPE(node)) {
21600 case PM_RESCUE_MODIFIER_NODE: {
21602 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21603 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
21604 }
21605 break;
21606 }
21607 case PM_AND_NODE: {
21608 pm_and_node_t *cast = (pm_and_node_t *) node;
21609 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21610 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
21611 }
21612 break;
21613 }
21614 case PM_OR_NODE: {
21615 pm_or_node_t *cast = (pm_or_node_t *) node;
21616 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21617 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
21618 }
21619 break;
21620 }
21621 default:
21622 break;
21623 }
21624
21625 pm_token_t message;
21626
21627 switch (parser->current.type) {
21628 case PM_CASE_OPERATOR:
21629 case PM_CASE_KEYWORD:
21630 case PM_TOKEN_CONSTANT:
21631 case PM_TOKEN_IDENTIFIER:
21632 case PM_TOKEN_METHOD_NAME: {
21633 parser_lex(parser);
21634 message = parser->previous;
21635 break;
21636 }
21637 default: {
21638 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_str(parser->current.type));
21639 message = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
21640 }
21641 }
21642
21643 parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
21644 pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
21645
21646 if (
21647 (previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
21648 arguments.arguments == NULL &&
21649 arguments.opening_loc.length == 0 &&
21650 match1(parser, PM_TOKEN_COMMA)
21651 ) {
21652 return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21653 } else {
21654 return UP(call);
21655 }
21656 }
21657 case PM_TOKEN_DOT_DOT:
21658 case PM_TOKEN_DOT_DOT_DOT: {
21659 parser_lex(parser);
21660
21661 pm_node_t *right = NULL;
21662 if (token_begins_expression_p(parser->current.type)) {
21663 right = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21664 }
21665
21666 return UP(pm_range_node_create(parser, node, &token, right));
21667 }
21668 case PM_TOKEN_KEYWORD_IF_MODIFIER: {
21669 pm_token_t keyword = parser->current;
21670 parser_lex(parser);
21671
21672 pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
21673 return UP(pm_if_node_modifier_create(parser, node, &keyword, predicate));
21674 }
21675 case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: {
21676 pm_token_t keyword = parser->current;
21677 parser_lex(parser);
21678
21679 pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
21680 return UP(pm_unless_node_modifier_create(parser, node, &keyword, predicate));
21681 }
21682 case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: {
21683 parser_lex(parser);
21684 pm_statements_node_t *statements = pm_statements_node_create(parser);
21685 pm_statements_node_body_append(parser, statements, node, true);
21686
21687 pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
21688 return UP(pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
21689 }
21690 case PM_TOKEN_KEYWORD_WHILE_MODIFIER: {
21691 parser_lex(parser);
21692 pm_statements_node_t *statements = pm_statements_node_create(parser);
21693 pm_statements_node_body_append(parser, statements, node, true);
21694
21695 pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
21696 return UP(pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
21697 }
21698 case PM_TOKEN_QUESTION_MARK: {
21699 context_push(parser, PM_CONTEXT_TERNARY);
21700 pm_node_list_t current_block_exits = { 0 };
21701 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21702
21703 pm_token_t qmark = parser->current;
21704 parser_lex(parser);
21705
21706 pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
21707
21708 if (parser->recovering) {
21709 // If parsing the true expression of this ternary resulted in a syntax
21710 // error that we can recover from, then we're going to put missing nodes
21711 // and tokens into the remaining places. We want to be sure to do this
21712 // before the `expect` function call to make sure it doesn't
21713 // accidentally move past a ':' token that occurs after the syntax
21714 // error.
21715 pm_token_t colon = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
21716 pm_node_t *false_expression = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &colon), PM_TOKEN_LENGTH(&colon)));
21717
21718 context_pop(parser);
21719 pop_block_exits(parser, previous_block_exits);
21720 return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
21721 }
21722
21723 accept1(parser, PM_TOKEN_NEWLINE);
21724 expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
21725
21726 pm_token_t colon = parser->previous;
21727 pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
21728
21729 context_pop(parser);
21730 pop_block_exits(parser, previous_block_exits);
21731 return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
21732 }
21733 case PM_TOKEN_COLON_COLON: {
21734 parser_lex(parser);
21735 pm_token_t delimiter = parser->previous;
21736
21737 switch (parser->current.type) {
21738 case PM_TOKEN_CONSTANT: {
21739 parser_lex(parser);
21740 pm_node_t *path;
21741
21742 if (
21743 (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
21744 ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
21745 ) {
21746 // If we have a constant immediately following a '::' operator, then
21747 // this can either be a constant path or a method call, depending on
21748 // what follows the constant.
21749 //
21750 // If we have parentheses, then this is a method call. That would
21751 // look like Foo::Bar().
21752 pm_token_t message = parser->previous;
21753 pm_arguments_t arguments = { 0 };
21754
21755 parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
21756 path = UP(pm_call_node_call_create(parser, node, &delimiter, &message, &arguments));
21757 } else {
21758 // Otherwise, this is a constant path. That would look like Foo::Bar.
21759 path = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
21760 }
21761
21762 // If this is followed by a comma then it is a multiple assignment.
21763 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21764 return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21765 }
21766
21767 return path;
21768 }
21769 case PM_CASE_OPERATOR:
21770 case PM_CASE_KEYWORD:
21771 case PM_TOKEN_IDENTIFIER:
21772 case PM_TOKEN_METHOD_NAME: {
21773 parser_lex(parser);
21774 pm_token_t message = parser->previous;
21775
21776 // If we have an identifier following a '::' operator, then it is for
21777 // sure a method call.
21778 pm_arguments_t arguments = { 0 };
21779 parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
21780 pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21781
21782 // If this is followed by a comma then it is a multiple assignment.
21783 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21784 return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21785 }
21786
21787 return UP(call);
21788 }
21789 case PM_TOKEN_PARENTHESIS_LEFT: {
21790 // If we have a parenthesis following a '::' operator, then it is the
21791 // method call shorthand. That would look like Foo::(bar).
21792 pm_arguments_t arguments = { 0 };
21793 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21794
21795 return UP(pm_call_node_shorthand_create(parser, node, &delimiter, &arguments));
21796 }
21797 default: {
21798 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
21799 return UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
21800 }
21801 }
21802 }
21803 case PM_TOKEN_KEYWORD_RESCUE_MODIFIER: {
21804 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
21805 parser_lex(parser);
21806 accept1(parser, PM_TOKEN_NEWLINE);
21807
21808 pm_node_t *value = parse_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
21809 context_pop(parser);
21810
21811 return UP(pm_rescue_modifier_node_create(parser, node, &token, value));
21812 }
21813 case PM_TOKEN_BRACKET_LEFT: {
21814 parser_lex(parser);
21815
21816 pm_arguments_t arguments = { 0 };
21817 arguments.opening_loc = TOK2LOC(parser, &parser->previous);
21818
21819 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
21820 pm_accepts_block_stack_push(parser, true);
21821 parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint8_t) (flags & ~PM_PARSE_ACCEPTS_DO_BLOCK), (uint16_t) (depth + 1));
21822 pm_accepts_block_stack_pop(parser);
21823 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
21824 }
21825
21826 arguments.closing_loc = TOK2LOC(parser, &parser->previous);
21827
21828 // If we have a comma after the closing bracket then this is a multiple
21829 // assignment and we should parse the targets.
21830 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21831 pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
21832 return parse_targets_validate(parser, UP(aref), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21833 }
21834
21835 // If we're at the end of the arguments, we can now check if there is a
21836 // block node that starts with a {. If there is, then we can parse it and
21837 // add it to the arguments.
21838 pm_block_node_t *block = NULL;
21839 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
21840 block = parse_block(parser, (uint16_t) (depth + 1));
21841 pm_arguments_validate_block(parser, &arguments, block);
21842 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
21843 block = parse_block(parser, (uint16_t) (depth + 1));
21844 }
21845
21846 if (block != NULL) {
21847 if (arguments.block != NULL) {
21848 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_AFTER_BLOCK);
21849 if (arguments.arguments == NULL) {
21850 arguments.arguments = pm_arguments_node_create(parser);
21851 }
21852 pm_arguments_node_arguments_append(parser->arena, arguments.arguments, arguments.block);
21853 }
21854
21855 arguments.block = UP(block);
21856 }
21857
21858 return UP(pm_call_node_aref_create(parser, node, &arguments));
21859 }
21860 case PM_TOKEN_KEYWORD_IN: {
21861 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21862 parser->pattern_matching_newlines = true;
21863
21864 pm_token_t operator = parser->current;
21865 parser->command_start = false;
21866 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21867 parser_lex(parser);
21868
21869 pm_constant_id_list_t captures = { 0 };
21870 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
21871
21872 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21873
21874 return UP(pm_match_predicate_node_create(parser, node, pattern, &operator));
21875 }
21876 case PM_TOKEN_EQUAL_GREATER: {
21877 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21878 parser->pattern_matching_newlines = true;
21879
21880 pm_token_t operator = parser->current;
21881 parser->command_start = false;
21882 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21883 parser_lex(parser);
21884
21885 pm_constant_id_list_t captures = { 0 };
21886 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
21887
21888 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21889
21890 return UP(pm_match_required_node_create(parser, node, pattern, &operator));
21891 }
21892 default:
21893 assert(false && "unreachable");
21894 return NULL;
21895 }
21896}
21897
21898#undef PM_PARSE_PATTERN_SINGLE
21899#undef PM_PARSE_PATTERN_TOP
21900#undef PM_PARSE_PATTERN_MULTI
21901
21914static bool
21915parse_expression_terminator(pm_parser_t *parser, pm_node_t *node) {
21916 pm_binding_power_t left = pm_binding_powers[parser->current.type].left;
21917
21918 switch (PM_NODE_TYPE(node)) {
21919 case PM_MULTI_WRITE_NODE:
21920 case PM_RETURN_NODE:
21921 case PM_BREAK_NODE:
21922 case PM_NEXT_NODE:
21923 return left > PM_BINDING_POWER_MODIFIER;
21924 case PM_CLASS_VARIABLE_WRITE_NODE:
21925 case PM_CONSTANT_PATH_WRITE_NODE:
21926 case PM_CONSTANT_WRITE_NODE:
21927 case PM_GLOBAL_VARIABLE_WRITE_NODE:
21928 case PM_INSTANCE_VARIABLE_WRITE_NODE:
21929 case PM_LOCAL_VARIABLE_WRITE_NODE:
21930 return PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && left > PM_BINDING_POWER_MODIFIER;
21931 case PM_CALL_NODE: {
21932 // Calls with an implicit array on the right-hand side are
21933 // statements and can only be followed by modifiers.
21934 if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY)) {
21935 return left > PM_BINDING_POWER_MODIFIER;
21936 }
21937
21938 // Command-style calls (including block commands like
21939 // `foo bar do end`) can only be followed by composition
21940 // (and/or) and modifier (if/unless/etc.) operators.
21941 if (pm_command_call_value_p(node)) {
21942 return left > PM_BINDING_POWER_COMPOSITION;
21943 }
21944
21945 // A block call (command with do-block, or any call chained
21946 // from one) can only be followed by call chaining (., ::,
21947 // &.), composition (and/or), and modifier operators.
21948 if (pm_block_call_p(node)) {
21949 return left > PM_BINDING_POWER_COMPOSITION && left < PM_BINDING_POWER_CALL;
21950 }
21951
21952 return false;
21953 }
21954 case PM_SUPER_NODE:
21955 case PM_YIELD_NODE:
21956 // Command-style super/yield (without parens) can only be followed
21957 // by composition and modifier operators.
21958 if (pm_command_call_value_p(node)) {
21959 return left > PM_BINDING_POWER_COMPOSITION;
21960 }
21961 return false;
21962 case PM_DEF_NODE:
21963 // An endless method whose body is a command-style call (e.g.,
21964 // `def f = foo bar`) is a command assignment and can only be
21965 // followed by modifiers.
21966 return left > PM_BINDING_POWER_MODIFIER && pm_command_call_value_p(node);
21967 case PM_RESCUE_MODIFIER_NODE:
21968 // A rescue modifier whose handler is a pattern match (=> or in)
21969 // produces a statement and cannot be followed by operators above
21970 // the modifier level.
21971 if (left > PM_BINDING_POWER_MODIFIER) {
21973 pm_node_t *rescue_expression = cast->rescue_expression;
21974 return PM_NODE_TYPE_P(rescue_expression, PM_MATCH_REQUIRED_NODE) || PM_NODE_TYPE_P(rescue_expression, PM_MATCH_PREDICATE_NODE);
21975 }
21976 return false;
21977 default:
21978 return false;
21979 }
21980}
21981
21990static pm_node_t *
21991parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
21992 if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
21993 pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
21994 return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
21995 }
21996
21997 pm_node_t *node = parse_expression_prefix(parser, binding_power, flags, diag_id, depth);
21998
21999 // Some prefix nodes are statements and can only be followed by modifiers
22000 // (if/unless/while/until/rescue) or nothing at all. We check these cheaply
22001 // here before entering the infix loop.
22002 switch (PM_NODE_TYPE(node)) {
22003 case PM_ERROR_RECOVERY_NODE:
22004 return node;
22005 case PM_PRE_EXECUTION_NODE:
22006 return node;
22007 case PM_POST_EXECUTION_NODE:
22008 case PM_ALIAS_GLOBAL_VARIABLE_NODE:
22009 case PM_ALIAS_METHOD_NODE:
22010 case PM_UNDEF_NODE:
22011 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22012 return node;
22013 }
22014 break;
22015 case PM_CALL_NODE:
22016 case PM_SUPER_NODE:
22017 case PM_YIELD_NODE:
22018 case PM_DEF_NODE:
22019 if (parse_expression_terminator(parser, node)) {
22020 return node;
22021 }
22022 break;
22023 case PM_SYMBOL_NODE:
22024 if (pm_symbol_node_label_p(parser, node)) {
22025 return node;
22026 }
22027 break;
22028 default:
22029 break;
22030 }
22031
22032 // Look and see if the next token can be parsed as an infix operator. If it
22033 // can, then we'll parse it using parse_expression_infix.
22034 pm_binding_powers_t current_binding_powers;
22035 pm_token_type_t current_token_type;
22036
22037 while (
22038 current_token_type = parser->current.type,
22039 current_binding_powers = pm_binding_powers[current_token_type],
22040 binding_power <= current_binding_powers.left &&
22041 current_binding_powers.binary
22042 ) {
22043 node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, flags, (uint16_t) (depth + 1));
22044 if (parse_expression_terminator(parser, node)) return node;
22045
22046 // If the operator is nonassoc and we should not be able to parse the
22047 // upcoming infix operator, break.
22048 if (current_binding_powers.nonassoc) {
22049 // If this is a non-assoc operator and we are about to parse the
22050 // exact same operator, then we need to add an error.
22051 if (match1(parser, current_token_type)) {
22052 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_str(parser->current.type), pm_token_str(current_token_type));
22053 break;
22054 }
22055
22056 // If this is an endless range, then we need to reject a couple of
22057 // additional operators because it violates the normal operator
22058 // precedence rules. Those patterns are:
22059 //
22060 // 1.. & 2
22061 // 1.. * 2
22062 //
22063 if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
22064 if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) {
22065 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_str(parser->current.type), pm_token_str(current_token_type));
22066 break;
22067 }
22068
22069 if (PM_BINDING_POWER_TERM <= pm_binding_powers[parser->current.type].left) {
22070 break;
22071 }
22072 } else if (current_binding_powers.left <= pm_binding_powers[parser->current.type].left) {
22073 break;
22074 }
22075 }
22076
22077 if (flags & PM_PARSE_ACCEPTS_COMMAND_CALL) {
22078 // A command-style method call is only accepted on method chains.
22079 // Thus, we check whether the parsed node can continue method chains.
22080 // The method chain can continue if the parsed node is one of the following five kinds:
22081 // (1) index access: foo[1]
22082 // (2) attribute access: foo.bar
22083 // (3) method call with parenthesis: foo.bar(1)
22084 // (4) method call with a block: foo.bar do end
22085 // (5) constant path: foo::Bar
22086 switch (node->type) {
22087 case PM_CALL_NODE: {
22088 pm_call_node_t *cast = (pm_call_node_t *)node;
22089 if (
22090 // (1) foo[1]
22091 !(
22092 cast->call_operator_loc.length == 0 &&
22093 cast->message_loc.length > 0 &&
22094 parser->start[cast->message_loc.start] == '[' &&
22095 parser->start[cast->message_loc.start + cast->message_loc.length - 1] == ']'
22096 ) &&
22097 // (2) foo.bar
22098 !(
22099 cast->call_operator_loc.length > 0 &&
22100 cast->arguments == NULL &&
22101 cast->block == NULL &&
22102 cast->opening_loc.length == 0
22103 ) &&
22104 // (3) foo.bar(1)
22105 !(
22106 cast->call_operator_loc.length > 0 &&
22107 cast->opening_loc.length > 0
22108 ) &&
22109 // (4) foo.bar do end
22110 !(
22111 cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
22112 )
22113 ) {
22114 flags &= (uint8_t) ~PM_PARSE_ACCEPTS_COMMAND_CALL;
22115 }
22116 break;
22117 }
22118 // (5) foo::Bar
22119 case PM_CONSTANT_PATH_NODE:
22120 break;
22121 default:
22122 flags &= (uint8_t) ~PM_PARSE_ACCEPTS_COMMAND_CALL;
22123 break;
22124 }
22125 }
22126
22127 if (context_terminator(parser->current_context->context, &parser->current)) {
22128 pm_binding_powers_t next_binding_powers = pm_binding_powers[parser->current.type];
22129 if (
22130 !next_binding_powers.binary ||
22131 binding_power > next_binding_powers.left ||
22132 (PM_NODE_TYPE_P(node, PM_CALL_NODE) && pm_call_node_command_p((pm_call_node_t *) node))
22133 ) {
22134 return node;
22135 }
22136 }
22137 }
22138
22139 return node;
22140}
22141
22146static pm_statements_node_t *
22147wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
22148 if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
22149 if (statements == NULL) {
22150 statements = pm_statements_node_create(parser);
22151 }
22152
22153 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22154 pm_arguments_node_arguments_append(
22155 parser->arena,
22156 arguments,
22157 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2)))
22158 );
22159
22160 pm_statements_node_body_append(parser, statements, UP(pm_call_node_fcall_synthesized_create(
22161 parser,
22162 arguments,
22163 pm_parser_constant_id_constant(parser, "print", 5)
22164 )), true);
22165 }
22166
22167 if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
22168 if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
22169 if (statements == NULL) {
22170 statements = pm_statements_node_create(parser);
22171 }
22172
22173 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22174 pm_arguments_node_arguments_append(
22175 parser->arena,
22176 arguments,
22177 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2)))
22178 );
22179
22180 pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
22181 pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, UP(receiver), "split", arguments);
22182
22183 pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
22184 parser,
22185 pm_parser_constant_id_constant(parser, "$F", 2),
22186 UP(call)
22187 );
22188
22189 pm_statements_node_body_prepend(parser->arena, statements, UP(write));
22190 }
22191
22192 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22193 pm_arguments_node_arguments_append(
22194 parser->arena,
22195 arguments,
22196 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2)))
22197 );
22198
22199 if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
22200 pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
22201 pm_keyword_hash_node_elements_append(parser->arena, keywords, UP(pm_assoc_node_create(
22202 parser,
22203 UP(pm_symbol_node_synthesized_create(parser, "chomp")),
22204 NULL,
22205 UP(pm_true_node_synthesized_create(parser))
22206 )));
22207
22208 pm_arguments_node_arguments_append(parser->arena, arguments, UP(keywords));
22209 pm_node_flag_set(UP(arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
22210 }
22211
22212 pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
22213 pm_statements_node_body_append(parser, wrapped_statements, UP(pm_while_node_synthesized_create(
22214 parser,
22215 UP(pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4))),
22216 statements
22217 )), true);
22218
22219 statements = wrapped_statements;
22220 }
22221
22222 return statements;
22223}
22224
22228static pm_node_t *
22229parse_program(pm_parser_t *parser) {
22230 // If the current scope is NULL, then we want to push a new top level scope.
22231 // The current scope could exist in the event that we are parsing an eval
22232 // and the user has passed into scopes that already exist.
22233 if (parser->current_scope == NULL) {
22234 pm_parser_scope_push(parser, true);
22235 }
22236
22237 pm_node_list_t current_block_exits = { 0 };
22238 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
22239
22240 parser_lex(parser);
22241 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
22242
22243 if (statements != NULL && !parser->parsing_eval) {
22244 // If we have statements, then the top-level statement should be
22245 // explicitly checked as well. We have to do this here because
22246 // everywhere else we check all but the last statement.
22247 assert(statements->body.size > 0);
22248 pm_void_statement_check(parser, statements->body.nodes[statements->body.size - 1]);
22249 }
22250
22251 pm_constant_id_list_t locals;
22252 pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
22253 pm_parser_scope_pop(parser);
22254
22255 // At the top level, see if we need to wrap the statements in a program
22256 // node with a while loop based on the options.
22257 if (parser->command_line & (PM_OPTIONS_COMMAND_LINE_P | PM_OPTIONS_COMMAND_LINE_N)) {
22258 statements = wrap_statements(parser, statements);
22259 } else {
22260 flush_block_exits(parser, previous_block_exits);
22261 }
22262
22263 // If this is an empty file, then we're still going to parse all of the
22264 // statements in order to gather up all of the comments and such. Here we'll
22265 // correct the location information.
22266 if (statements == NULL) {
22267 statements = pm_statements_node_create(parser);
22268 statements->base.location = (pm_location_t) { 0 };
22269 }
22270
22271 return UP(pm_program_node_create(parser, &locals, statements));
22272}
22273
22274/******************************************************************************/
22275/* External functions */
22276/******************************************************************************/
22277
22287static const char *
22288pm_strnstr(const char *big, const char *little, size_t big_length) {
22289 size_t little_length = strlen(little);
22290
22291 for (const char *max = big + big_length - little_length; big <= max; big++) {
22292 if (*big == *little && memcmp(big, little, little_length) == 0) return big;
22293 }
22294
22295 return NULL;
22296}
22297
22298#ifdef _WIN32
22299#define pm_parser_warn_shebang_carriage_return(parser, start, length) ((void) 0)
22300#else
22306static void
22307pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
22308 if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
22309 pm_parser_warn(parser, U32(start - parser->start), U32(length), PM_WARN_SHEBANG_CARRIAGE_RETURN);
22310 }
22311}
22312#endif
22313
22318static void
22319pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const char *engine, size_t length) {
22320 const char *switches = pm_strnstr(engine, " -", length);
22321 if (switches == NULL) return;
22322
22323 pm_options_t next_options = *options;
22324 options->shebang_callback(
22325 &next_options,
22326 (const uint8_t *) (switches + 1),
22327 length - ((size_t) (switches - engine)) - 1,
22328 options->shebang_callback_data
22329 );
22330
22331 size_t encoding_length;
22332 if ((encoding_length = pm_string_length(&next_options.encoding)) > 0) {
22333 const uint8_t *encoding_source = pm_string_source(&next_options.encoding);
22334 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22335 }
22336
22337 parser->command_line = next_options.command_line;
22338 parser->frozen_string_literal = next_options.frozen_string_literal;
22339}
22340
22344void
22345pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
22346 assert(arena != NULL);
22347 assert(source != NULL);
22348
22349 *parser = (pm_parser_t) {
22350 .arena = arena,
22351 .metadata_arena = { 0 },
22352 .node_id = 0,
22353 .lex_state = PM_LEX_STATE_BEG,
22354 .enclosure_nesting = 0,
22355 .lambda_enclosure_nesting = -1,
22356 .brace_nesting = 0,
22357 .do_loop_stack = 0,
22358 .accepts_block_stack = 0,
22359 .lex_modes = {
22360 .index = 0,
22361 .stack = {{ .mode = PM_LEX_DEFAULT }},
22362 .current = &parser->lex_modes.stack[0],
22363 },
22364 .start = source,
22365 .end = source + size,
22366 .previous = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22367 .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22368 .next_start = NULL,
22369 .heredoc_end = NULL,
22370 .data_loc = { 0 },
22371 .comment_list = { 0 },
22372 .magic_comment_list = { 0 },
22373 .warning_list = { 0 },
22374 .error_list = { 0 },
22375 .current_scope = NULL,
22376 .current_context = NULL,
22377 .encoding = PM_ENCODING_UTF_8_ENTRY,
22378 .encoding_changed_callback = NULL,
22379 .encoding_comment_start = source,
22380 .lex_callback = { 0 },
22381 .filepath = { 0 },
22382 .constant_pool = { 0 },
22383 .line_offsets = { 0 },
22384 .integer = { 0 },
22385 .current_string = PM_STRING_EMPTY,
22386 .start_line = 1,
22387 .explicit_encoding = NULL,
22388 .command_line = 0,
22389 .parsing_eval = false,
22390 .partial_script = false,
22391 .command_start = true,
22392 .recovering = false,
22393 .continuable = true,
22394 .encoding_locked = false,
22395 .encoding_changed = false,
22396 .pattern_matching_newlines = false,
22397 .in_keyword_arg = false,
22398 .current_block_exits = NULL,
22399 .semantic_token_seen = false,
22400 .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
22401 .warn_mismatched_indentation = true
22402 };
22403
22404 /* Pre-size the arenas based on input size to reduce the number of block
22405 * allocations (and the kernel page zeroing they trigger). The ratios were
22406 * measured empirically: AST arena ~3.3x input, metadata arena ~1.1x input.
22407 * The reserve call is a no-op when the capacity is at or below the default
22408 * arena block size, so small inputs don't waste an extra allocation. */
22409 if (size <= SIZE_MAX / 4) pm_arena_reserve(arena, size * 4);
22410 if (size <= SIZE_MAX / 5 * 4) pm_arena_reserve(&parser->metadata_arena, size + size / 4);
22411
22412 /* Initialize the constant pool. Measured across 1532 Ruby stdlib files, the
22413 * bytes/constant ratio has a median of ~56 and a 90th percentile of ~135.
22414 * We use 120 as a balance between over-allocation waste and resize
22415 * frequency. Resizes are cheap with arena allocation, so we lean toward
22416 * under-estimating. */
22417 uint32_t constant_size = ((uint32_t) size) / 120;
22418 pm_constant_pool_init(&parser->metadata_arena, &parser->constant_pool, constant_size < 4 ? 4 : constant_size);
22419
22420 /* Initialize the line offset list. Similar to the constant pool, we are
22421 * going to estimate the number of newlines that we will need based on the
22422 * size of the input. */
22423 size_t newline_size = size / 22;
22424 pm_line_offset_list_init(&parser->metadata_arena, &parser->line_offsets, newline_size < 4 ? 4 : newline_size);
22425
22426 // If options were provided to this parse, establish them here.
22427 if (options != NULL) {
22428 // filepath option
22429 parser->filepath = options->filepath;
22430
22431 // line option
22432 parser->start_line = options->line;
22433
22434 // encoding option
22435 size_t encoding_length = pm_string_length(&options->encoding);
22436 if (encoding_length > 0) {
22437 const uint8_t *encoding_source = pm_string_source(&options->encoding);
22438 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22439 }
22440
22441 // encoding_locked option
22442 parser->encoding_locked = options->encoding_locked;
22443
22444 // frozen_string_literal option
22445 parser->frozen_string_literal = options->frozen_string_literal;
22446
22447 // command_line option
22448 parser->command_line = options->command_line;
22449
22450 // version option
22451 parser->version = options->version;
22452
22453 // partial_script
22454 parser->partial_script = options->partial_script;
22455
22456 // scopes option
22457 parser->parsing_eval = options->scopes_count > 0;
22458 if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
22459
22460 for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
22461 const pm_options_scope_t *scope = pm_options_scope(options, scope_index);
22462 pm_parser_scope_push(parser, scope_index == 0);
22463
22464 // Scopes given from the outside are not allowed to have numbered
22465 // parameters.
22466 parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
22467
22468 for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
22469 const pm_string_t *local = pm_options_scope_local(scope, local_index);
22470
22471 const uint8_t *source = pm_string_source(local);
22472 size_t length = pm_string_length(local);
22473
22474 uint8_t *allocated = (uint8_t *) pm_arena_alloc(&parser->metadata_arena, length, 1);
22475 memcpy(allocated, source, length);
22476 pm_parser_local_add_owned(parser, allocated, length);
22477 }
22478 }
22479 }
22480
22481 // Now that we have established the user-provided options, check if
22482 // a version was given and parse as the latest version otherwise.
22483 if (parser->version == PM_OPTIONS_VERSION_UNSET) {
22484 parser->version = PM_OPTIONS_VERSION_LATEST;
22485 }
22486
22487 pm_accepts_block_stack_push(parser, true);
22488
22489 // Skip past the UTF-8 BOM if it exists.
22490 if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
22491 parser->current.end += 3;
22492 parser->encoding_comment_start += 3;
22493
22494 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
22495 parser->encoding = PM_ENCODING_UTF_8_ENTRY;
22496 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
22497 }
22498 }
22499
22500 // If the -x command line flag is set, or the first shebang of the file does
22501 // not include "ruby", then we'll search for a shebang that does include
22502 // "ruby" and start parsing from there.
22503 bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
22504
22505 // If the first two bytes of the source are a shebang, then we will do a bit
22506 // of extra processing.
22507 //
22508 // First, we'll indicate that the encoding comment is at the end of the
22509 // shebang. This means that when a shebang is present the encoding comment
22510 // can begin on the second line.
22511 //
22512 // Second, we will check if the shebang includes "ruby". If it does, then we
22513 // we will start parsing from there. We will also potentially warning the
22514 // user if there is a carriage return at the end of the shebang. We will
22515 // also potentially call the shebang callback if this is the main script to
22516 // allow the caller to parse the shebang and find any command-line options.
22517 // If the shebang does not include "ruby" and this is the main script being
22518 // parsed, then we will start searching the file for a shebang that does
22519 // contain "ruby" as if -x were passed on the command line.
22520 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
22521 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->current.end);
22522
22523 if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
22524 const char *engine;
22525
22526 if ((engine = pm_strnstr((const char *) parser->start, "ruby", length)) != NULL) {
22527 if (newline != NULL) {
22528 parser->encoding_comment_start = newline + 1;
22529
22530 if (options == NULL || options->main_script) {
22531 pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1);
22532 }
22533 }
22534
22535 if (options != NULL && options->main_script && options->shebang_callback != NULL) {
22536 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) parser->start)));
22537 }
22538
22539 search_shebang = false;
22540 } else if (options != NULL && options->main_script && !parser->parsing_eval) {
22541 search_shebang = true;
22542 }
22543 }
22544
22545 // Here we're going to find the first shebang that includes "ruby" and start
22546 // parsing from there.
22547 if (search_shebang) {
22548 // If a shebang that includes "ruby" is not found, then we're going to a
22549 // a load error to the list of errors on the parser.
22550 bool found_shebang = false;
22551
22552 // This is going to point to the start of each line as we check it.
22553 // We'll maintain a moving window looking at each line at they come.
22554 const uint8_t *cursor = parser->start;
22555
22556 // The newline pointer points to the end of the current line that we're
22557 // considering. If it is NULL, then we're at the end of the file.
22558 const uint8_t *newline = next_newline(cursor, parser->end - cursor);
22559
22560 while (newline != NULL) {
22561 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
22562
22563 cursor = newline + 1;
22564 newline = next_newline(cursor, parser->end - cursor);
22565
22566 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - cursor);
22567 if (length > 2 && cursor[0] == '#' && cursor[1] == '!') {
22568 const char *engine;
22569 if ((engine = pm_strnstr((const char *) cursor, "ruby", length)) != NULL) {
22570 found_shebang = true;
22571
22572 if (newline != NULL) {
22573 pm_parser_warn_shebang_carriage_return(parser, cursor, length + 1);
22574 parser->encoding_comment_start = newline + 1;
22575 }
22576
22577 if (options != NULL && options->shebang_callback != NULL) {
22578 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) cursor)));
22579 }
22580
22581 break;
22582 }
22583 }
22584 }
22585
22586 if (found_shebang) {
22587 parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22588 parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22589 } else {
22590 pm_parser_err(parser, 0, 0, PM_ERR_SCRIPT_NOT_FOUND);
22591 pm_line_offset_list_clear(&parser->line_offsets);
22592 }
22593 }
22594
22595 // The encoding comment can start after any amount of inline whitespace, so
22596 // here we'll advance it to the first non-inline-whitespace character so
22597 // that it is ready for future comparisons.
22598 parser->encoding_comment_start += pm_strspn_inline_whitespace(parser->encoding_comment_start, parser->end - parser->encoding_comment_start);
22599}
22600
22609pm_parser_new(pm_arena_t *arena, const uint8_t *source, size_t size, const pm_options_t *options) {
22610 pm_parser_t *parser = (pm_parser_t *) xmalloc(sizeof(pm_parser_t));
22611 if (parser == NULL) abort();
22612
22613 pm_parser_init(arena, parser, source, size, options);
22614 return parser;
22615}
22616
22620void
22621pm_parser_cleanup(pm_parser_t *parser) {
22622 pm_string_cleanup(&parser->filepath);
22623 pm_arena_cleanup(&parser->metadata_arena);
22624
22625 while (parser->current_scope != NULL) {
22626 // Normally, popping the scope doesn't free the locals since it is
22627 // assumed that ownership has transferred to the AST. However if we have
22628 // scopes while we're freeing the parser, it's likely they came from
22629 // eval scopes and we need to free them explicitly here.
22630 pm_parser_scope_pop(parser);
22631 }
22632
22633 while (parser->lex_modes.index >= PM_LEX_STACK_SIZE) {
22634 lex_mode_pop(parser);
22635 }
22636}
22637
22641void
22643 pm_parser_cleanup(parser);
22644 xfree_sized(parser, sizeof(pm_parser_t));
22645}
22646
22652static bool
22653pm_parse_err_is_fatal(pm_diagnostic_id_t diag_id) {
22654 switch (diag_id) {
22655 case PM_ERR_ARRAY_EXPRESSION_AFTER_STAR:
22656 case PM_ERR_BEGIN_UPCASE_BRACE:
22657 case PM_ERR_CLASS_VARIABLE_BARE:
22658 case PM_ERR_END_UPCASE_BRACE:
22659 case PM_ERR_ESCAPE_INVALID_HEXADECIMAL:
22660 case PM_ERR_ESCAPE_INVALID_UNICODE_LIST:
22661 case PM_ERR_ESCAPE_INVALID_UNICODE_SHORT:
22662 case PM_ERR_EXPRESSION_NOT_WRITABLE:
22663 case PM_ERR_EXPRESSION_NOT_WRITABLE_SELF:
22664 case PM_ERR_FLOAT_PARSE:
22665 case PM_ERR_GLOBAL_VARIABLE_BARE:
22666 case PM_ERR_HASH_KEY:
22667 case PM_ERR_HEREDOC_IDENTIFIER:
22668 case PM_ERR_INSTANCE_VARIABLE_BARE:
22669 case PM_ERR_INVALID_BLOCK_EXIT:
22670 case PM_ERR_INVALID_ENCODING_MAGIC_COMMENT:
22671 case PM_ERR_INVALID_FLOAT_EXPONENT:
22672 case PM_ERR_INVALID_NUMBER_BINARY:
22673 case PM_ERR_INVALID_NUMBER_DECIMAL:
22674 case PM_ERR_INVALID_NUMBER_HEXADECIMAL:
22675 case PM_ERR_INVALID_NUMBER_OCTAL:
22676 case PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING:
22677 case PM_ERR_NO_LOCAL_VARIABLE:
22678 case PM_ERR_PARAMETER_ORDER:
22679 case PM_ERR_STATEMENT_UNDEF:
22680 case PM_ERR_VOID_EXPRESSION:
22681 return true;
22682 default:
22683 return false;
22684 }
22685}
22686
22720static void
22721pm_parse_continuable(pm_parser_t *parser) {
22722 // If there are no errors then there is nothing to continue.
22723 if (parser->error_list.size == 0) {
22724 parser->continuable = false;
22725 return;
22726 }
22727
22728 if (!parser->continuable) return;
22729
22730 size_t source_length = (size_t) (parser->end - parser->start);
22731
22732 // First pass: check if there are any non-stray, non-fatal errors.
22733 bool has_non_stray_error = false;
22734 for (pm_diagnostic_t *error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
22735 if (error->diag_id != PM_ERR_UNEXPECTED_TOKEN_IGNORE && error->diag_id != PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT && !pm_parse_err_is_fatal(error->diag_id)) {
22736 has_non_stray_error = true;
22737 break;
22738 }
22739 }
22740
22741 // Second pass: check each error. We track the minimum source position
22742 // among non-stray, non-fatal errors seen so far in list order, which
22743 // lets us detect cascade stray tokens.
22744 size_t non_stray_min_start = SIZE_MAX;
22745
22746 for (pm_diagnostic_t *error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
22747 size_t error_start = (size_t) error->location.start;
22748 size_t error_end = error_start + (size_t) error->location.length;
22749 bool at_eof = error_end >= source_length;
22750
22751 // Fatal errors are non-continuable unless they occur at EOF.
22752 if (pm_parse_err_is_fatal(error->diag_id) && !at_eof) {
22753 parser->continuable = false;
22754 return;
22755 }
22756
22757 // Track non-stray, non-fatal error positions in list order.
22758 if (error->diag_id != PM_ERR_UNEXPECTED_TOKEN_IGNORE &&
22759 error->diag_id != PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT) {
22760 if (error_start < non_stray_min_start) non_stray_min_start = error_start;
22761 continue;
22762 }
22763
22764 // This is a stray token. Determine if it is a cascade effect
22765 // of a preceding error or genuinely stray.
22766
22767 // Rule (a): a non-stray error was seen earlier in the list at a
22768 // strictly earlier position — this stray is a cascade effect.
22769 if (non_stray_min_start < error_start) continue;
22770
22771 // Rule (b): this stray is at EOF with valid code before it.
22772 // Single-byte stray tokens at EOF (like `\` for line continuation)
22773 // are likely truncated tokens. Multi-byte stray tokens (like the
22774 // keyword `end`) need additional evidence that they are cascade
22775 // effects (i.e. non-stray errors exist elsewhere).
22776 if (at_eof && error_start > 0) {
22777 // Exception: closing delimiters at EOF are genuinely stray.
22778 if (error->location.length == 1) {
22779 const uint8_t *byte = parser->start + error_start;
22780 if (*byte == ')' || *byte == ']' || *byte == '}') {
22781 parser->continuable = false;
22782 return;
22783 }
22784
22785 // Single-byte non-delimiter stray at EOF: cascade.
22786 continue;
22787 }
22788
22789 // Multi-byte stray at EOF: cascade only if there are
22790 // non-stray errors (evidence of a preceding parse failure).
22791 if (has_non_stray_error) continue;
22792 }
22793
22794 // Rule (c): a stray `=` at the start of a line could be the
22795 // beginning of an embedded document (`=begin`). The remaining
22796 // bytes after `=` parse as an identifier, so the error is not
22797 // at EOF, but the construct is genuinely incomplete.
22798 if (error->location.length == 1) {
22799 const uint8_t *byte = parser->start + error_start;
22800 if (*byte == '=' && (error_start == 0 || *(byte - 1) == '\n')) continue;
22801 }
22802
22803 // This stray token is genuinely non-continuable.
22804 parser->continuable = false;
22805 return;
22806 }
22807}
22808
22812pm_node_t *
22814 pm_node_t *node = parse_program(parser);
22815 pm_parse_continuable(parser);
22816 return node;
22817}
22818
22825pm_node_t *
22826pm_parse_stream(pm_parser_t **parser, pm_arena_t *arena, pm_source_t *source, const pm_options_t *options) {
22827 bool eof = pm_source_stream_read(source);
22828
22829 pm_parser_t *tmp = pm_parser_new(arena, pm_source_source(source), pm_source_length(source), options);
22830 pm_node_t *node = pm_parse(tmp);
22831
22832 while (!eof && tmp->error_list.size > 0) {
22833 eof = pm_source_stream_read(source);
22834
22835 pm_parser_free(tmp);
22836 pm_arena_cleanup(arena);
22837
22838 tmp = pm_parser_new(arena, pm_source_source(source), pm_source_length(source), options);
22839 node = pm_parse(tmp);
22840 }
22841
22842 *parser = tmp;
22843 return node;
22844}
22845
22846#undef PM_CASE_KEYWORD
22847#undef PM_CASE_OPERATOR
22848#undef PM_CASE_WRITABLE
22849#undef PM_STRING_EMPTY
22850
22851// We optionally support serializing to a binary string. For systems that don't
22852// want or need this functionality, it can be turned off with the
22853// PRISM_EXCLUDE_SERIALIZATION define.
22854#ifndef PRISM_EXCLUDE_SERIALIZATION
22855
22856static PRISM_INLINE void
22857pm_serialize_header(pm_buffer_t *buffer) {
22858 pm_buffer_append_string(buffer, "PRISM", 5);
22859 pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
22860 pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR);
22861 pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH);
22862 pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
22863}
22864
22868void
22869pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
22870 pm_serialize_header(buffer);
22871 pm_serialize_content(parser, node, buffer);
22872 pm_buffer_append_byte(buffer, '\0');
22873}
22874
22879void
22880pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22881 pm_options_t options = { 0 };
22882 pm_options_read(&options, data);
22883
22884 pm_arena_t arena = { 0 };
22885 pm_parser_t parser;
22886 pm_parser_init(&arena, &parser, source, size, &options);
22887
22888 pm_node_t *node = pm_parse(&parser);
22889
22890 pm_serialize_header(buffer);
22891 pm_serialize_content(&parser, node, buffer);
22892 pm_buffer_append_byte(buffer, '\0');
22893
22894 pm_parser_cleanup(&parser);
22895 pm_arena_cleanup(&arena);
22896 pm_options_cleanup(&options);
22897}
22898
22903void
22904pm_serialize_parse_stream(pm_buffer_t *buffer, pm_source_t *source, const char *data) {
22905 pm_arena_t arena = { 0 };
22906 pm_parser_t *parser;
22907 pm_options_t options = { 0 };
22908 pm_options_read(&options, data);
22909
22910 pm_node_t *node = pm_parse_stream(&parser, &arena, source, &options);
22911 pm_serialize_header(buffer);
22912 pm_serialize_content(parser, node, buffer);
22913 pm_buffer_append_byte(buffer, '\0');
22914
22915 pm_parser_free(parser);
22916 pm_arena_cleanup(&arena);
22917 pm_options_cleanup(&options);
22918}
22919
22923void
22924pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22925 pm_options_t options = { 0 };
22926 pm_options_read(&options, data);
22927
22928 pm_arena_t arena = { 0 };
22929 pm_parser_t parser;
22930 pm_parser_init(&arena, &parser, source, size, &options);
22931
22932 pm_parse(&parser);
22933 pm_serialize_header(buffer);
22934 pm_serialize_encoding(parser.encoding, buffer);
22935 pm_buffer_append_varsint(buffer, parser.start_line);
22936 pm_serialize_comment_list(&parser.comment_list, buffer);
22937
22938 pm_parser_cleanup(&parser);
22939 pm_arena_cleanup(&arena);
22940 pm_options_cleanup(&options);
22941}
22942
22943#endif
#define PRISM_ALIGNOF
Get the alignment requirement of a type.
Definition align.h:15
pm_comment_type_t
This is the type of a comment that we've found while parsing.
Definition comments.h:18
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
A header file that defines macros to exclude certain features of the prism library.
#define PRISM_FALLTHROUGH
We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
Definition fallthrough.h:15
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
#define xcalloc
Old name of ruby_xcalloc.
Definition xmalloc.h:55
int len
Length of the buffer.
Definition io.h:8
#define PRISM_INLINE
Old Visual Studio versions do not support the inline keyword, so we need to define it to be __inline.
Definition inline.h:12
VALUE type(ANYARGS)
ANYARGS-ed function type.
static const uint8_t PM_OPTIONS_COMMAND_LINE_N
A bit representing whether or not the command line -n option was set.
Definition options.h:96
#define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
String literals should not be frozen.
Definition options.h:31
#define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
String literals should be made frozen.
Definition options.h:42
#define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
String literals may be frozen or mutable depending on the implementation default.
Definition options.h:37
static const uint8_t PM_OPTIONS_COMMAND_LINE_P
A bit representing whether or not the command line -p option was set.
Definition options.h:102
PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_parser_t * pm_parser_new(pm_arena_t *arena, const uint8_t *source, size_t size, const pm_options_t *options) PRISM_NONNULL(1)
Allocate and initialize a parser with the given start and end pointers.
Definition prism.c:22609
PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser) PRISM_NONNULL(1)
Free both the memory held by the given parser and the parser itself.
Definition prism.c:22642
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser) PRISM_NONNULL(1)
Initiate the parser with the given parser.
Definition prism.c:22813
#define PM_NODE_LIST_FOREACH(list, index, node)
Loop through each node in the node list, writing each node to the given pm_node_t pointer.
Definition node.h:18
The version of the Prism library.
#define PRISM_VERSION
The version of the Prism library as a constant string.
Definition version.h:29
#define PRISM_VERSION_PATCH
The patch version of the Prism library as an int.
Definition version.h:24
#define PRISM_VERSION_MINOR
The minor version of the Prism library as an int.
Definition version.h:19
#define PRISM_VERSION_MAJOR
The major version of the Prism library as an int.
Definition version.h:14
The functions related to serializing the AST to a binary format.
Functions for parsing streams.
AndNode.
Definition ast.h:1291
PM_NODE_ALIGNAS struct pm_node * left
AndNode::left.
Definition ast.h:1306
PM_NODE_ALIGNAS struct pm_node * right
AndNode::right.
Definition ast.h:1319
ArgumentsNode.
Definition ast.h:1351
pm_node_t base
The embedded base node.
Definition ast.h:1353
struct pm_node_list arguments
ArgumentsNode::arguments.
Definition ast.h:1363
This is a special out parameter to the parse_arguments_list function that includes opening and closin...
Definition prism.c:1763
pm_node_t * block
The optional block attached to the call.
Definition prism.c:1774
bool has_forwarding
The flag indicating whether this arguments list has forwarding argument.
Definition prism.c:1777
pm_location_t opening_loc
The optional location of the opening parenthesis or bracket.
Definition prism.c:1765
pm_arguments_node_t * arguments
The lazily-allocated optional arguments node.
Definition prism.c:1768
pm_location_t closing_loc
The optional location of the closing parenthesis or bracket.
Definition prism.c:1771
ArrayNode.
Definition ast.h:1381
struct pm_node_list elements
ArrayNode::elements.
Definition ast.h:1390
ArrayPatternNode.
Definition ast.h:1441
PM_NODE_ALIGNAS struct pm_node * constant
ArrayPatternNode::constant.
Definition ast.h:1459
pm_location_t opening_loc
ArrayPatternNode::opening_loc.
Definition ast.h:1499
pm_location_t closing_loc
ArrayPatternNode::closing_loc.
Definition ast.h:1509
AssocNode.
Definition ast.h:1524
PM_NODE_ALIGNAS struct pm_node * value
AssocNode::value.
Definition ast.h:1555
PM_NODE_ALIGNAS struct pm_node * key
AssocNode::key.
Definition ast.h:1542
BeginNode.
Definition ast.h:1647
PM_NODE_ALIGNAS struct pm_else_node * else_clause
BeginNode::else_clause.
Definition ast.h:1689
PM_NODE_ALIGNAS struct pm_ensure_node * ensure_clause
BeginNode::ensure_clause.
Definition ast.h:1699
PM_NODE_ALIGNAS struct pm_statements_node * statements
BeginNode::statements.
Definition ast.h:1669
PM_NODE_ALIGNAS struct pm_rescue_node * rescue_clause
BeginNode::rescue_clause.
Definition ast.h:1679
pm_node_t base
The embedded base node.
Definition ast.h:1649
This struct represents a set of binding powers used for a given token.
Definition prism.c:12445
bool binary
Whether or not this token can be used as a binary operator.
Definition prism.c:12453
pm_binding_power_t left
The left binding power.
Definition prism.c:12447
bool nonassoc
Whether or not this token can be used as non-associative binary operator.
Definition prism.c:12459
pm_binding_power_t right
The right binding power.
Definition prism.c:12450
BlockLocalVariableNode.
Definition ast.h:1764
BlockNode.
Definition ast.h:1791
BlockParametersNode.
Definition ast.h:1919
CallNode.
Definition ast.h:2143
pm_location_t opening_loc
CallNode::opening_loc.
Definition ast.h:2204
pm_location_t closing_loc
CallNode::closing_loc.
Definition ast.h:2224
pm_constant_id_t name
CallNode::name.
Definition ast.h:2184
PM_NODE_ALIGNAS struct pm_arguments_node * arguments
CallNode::arguments.
Definition ast.h:2214
pm_location_t equal_loc
CallNode::equal_loc.
Definition ast.h:2237
pm_location_t call_operator_loc
CallNode::call_operator_loc.
Definition ast.h:2174
pm_location_t message_loc
CallNode::message_loc.
Definition ast.h:2194
PM_NODE_ALIGNAS struct pm_node * block
CallNode::block.
Definition ast.h:2247
PM_NODE_ALIGNAS struct pm_node * receiver
CallNode::receiver.
Definition ast.h:2161
CaseMatchNode.
Definition ast.h:2578
struct pm_node_list conditions
CaseMatchNode::conditions.
Definition ast.h:2600
PM_NODE_ALIGNAS struct pm_else_node * else_clause
CaseMatchNode::else_clause.
Definition ast.h:2610
CaseNode.
Definition ast.h:2647
PM_NODE_ALIGNAS struct pm_else_node * else_clause
CaseNode::else_clause.
Definition ast.h:2679
struct pm_node_list conditions
CaseNode::conditions.
Definition ast.h:2669
ClassVariableReadNode.
Definition ast.h:2936
ClassVariableTargetNode.
Definition ast.h:2964
ClassVariableWriteNode.
Definition ast.h:2986
A list of constant IDs.
size_t size
The number of constant ids in the list.
ConstantPathNode.
Definition ast.h:3195
ConstantPathTargetNode.
Definition ast.h:3330
ConstantReadNode.
Definition ast.h:3423
ConstantTargetNode.
Definition ast.h:3451
ConstantWriteNode.
Definition ast.h:3473
DefNode.
Definition ast.h:3535
pm_location_t equal_loc
DefNode::equal_loc.
Definition ast.h:3592
PM_NODE_ALIGNAS struct pm_node * body
DefNode::body.
Definition ast.h:3562
ElseNode.
Definition ast.h:3649
PM_NODE_ALIGNAS struct pm_statements_node * statements
ElseNode::statements.
Definition ast.h:3661
EnsureNode.
Definition ast.h:3744
PM_NODE_ALIGNAS struct pm_statements_node * statements
EnsureNode::statements.
Definition ast.h:3756
FindPatternNode.
Definition ast.h:3823
pm_location_t opening_loc
FindPatternNode::opening_loc.
Definition ast.h:3887
PM_NODE_ALIGNAS struct pm_node * constant
FindPatternNode::constant.
Definition ast.h:3835
pm_location_t closing_loc
FindPatternNode::closing_loc.
Definition ast.h:3900
FlipFlopNode.
Definition ast.h:3918
FloatNode.
Definition ast.h:3950
double value
FloatNode::value.
Definition ast.h:3959
pm_node_t base
The embedded base node.
Definition ast.h:3952
ForwardingParameterNode.
Definition ast.h:4083
GlobalVariableReadNode.
Definition ast.h:4256
GlobalVariableTargetNode.
Definition ast.h:4284
GlobalVariableWriteNode.
Definition ast.h:4306
HashNode.
Definition ast.h:4367
struct pm_node_list elements
HashNode::elements.
Definition ast.h:4392
HashPatternNode.
Definition ast.h:4426
PM_NODE_ALIGNAS struct pm_node * constant
HashPatternNode::constant.
Definition ast.h:4441
pm_location_t opening_loc
HashPatternNode::opening_loc.
Definition ast.h:4480
pm_location_t closing_loc
HashPatternNode::closing_loc.
Definition ast.h:4493
IfNode.
Definition ast.h:4514
PM_NODE_ALIGNAS struct pm_statements_node * statements
IfNode::statements.
Definition ast.h:4573
PM_NODE_ALIGNAS struct pm_node * subsequent
IfNode::subsequent.
Definition ast.h:4592
ImaginaryNode.
Definition ast.h:4619
InNode.
Definition ast.h:4695
PM_NODE_ALIGNAS struct pm_statements_node * statements
InNode::statements.
Definition ast.h:4707
InstanceVariableReadNode.
Definition ast.h:5098
InstanceVariableTargetNode.
Definition ast.h:5126
InstanceVariableWriteNode.
Definition ast.h:5148
IntegerNode.
Definition ast.h:5215
pm_integer_t value
IntegerNode::value.
Definition ast.h:5224
pm_node_t base
The embedded base node.
Definition ast.h:5217
bool negative
Whether or not the integer is negative.
Definition integer.h:38
InterpolatedMatchLastLineNode.
Definition ast.h:5252
InterpolatedRegularExpressionNode.
Definition ast.h:5297
InterpolatedStringNode.
Definition ast.h:5333
pm_node_t base
The embedded base node.
Definition ast.h:5335
pm_location_t opening_loc
InterpolatedStringNode::opening_loc.
Definition ast.h:5340
InterpolatedSymbolNode.
Definition ast.h:5365
InterpolatedXStringNode.
Definition ast.h:5397
pm_location_t opening_loc
InterpolatedXStringNode::opening_loc.
Definition ast.h:5404
pm_node_t base
The embedded base node.
Definition ast.h:5399
struct pm_node_list parts
InterpolatedXStringNode::parts.
Definition ast.h:5409
KeywordHashNode.
Definition ast.h:5466
int32_t line
The line number.
uint32_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
LocalVariableReadNode.
Definition ast.h:5702
uint32_t depth
LocalVariableReadNode::depth.
Definition ast.h:5732
pm_constant_id_t name
LocalVariableReadNode::name.
Definition ast.h:5719
LocalVariableTargetNode.
Definition ast.h:5750
LocalVariableWriteNode.
Definition ast.h:5777
uint32_t depth
LocalVariableWriteNode::depth.
Definition ast.h:5803
pm_constant_id_t name
LocalVariableWriteNode::name.
Definition ast.h:5790
This struct represents a slice in the source code, defined by an offset and a length.
Definition ast.h:554
uint32_t start
The offset of the location from the start of the source.
Definition ast.h:556
uint32_t length
The length of the location.
Definition ast.h:559
MatchLastLineNode.
Definition ast.h:5868
struct pm_node_list targets
MatchWriteNode::targets.
Definition ast.h:6035
MultiTargetNode.
Definition ast.h:6102
pm_location_t lparen_loc
MultiTargetNode::lparen_loc.
Definition ast.h:6159
struct pm_node_list lefts
MultiTargetNode::lefts.
Definition ast.h:6119
pm_location_t rparen_loc
MultiTargetNode::rparen_loc.
Definition ast.h:6169
MultiWriteNode.
Definition ast.h:6184
A list of nodes in the source, most often used for lists of children.
Definition ast.h:567
size_t size
The number of nodes in the list.
Definition ast.h:569
struct pm_node ** nodes
The nodes in the list.
Definition ast.h:575
This is the base structure that represents a node in the syntax tree.
Definition ast.h:1065
pm_node_type_t type
This represents the type of the node.
Definition ast.h:1070
pm_location_t location
This is the location of the node in the source.
Definition ast.h:1088
OptionalParameterNode.
Definition ast.h:6478
OrNode.
Definition ast.h:6515
PM_NODE_ALIGNAS struct pm_node * right
OrNode::right.
Definition ast.h:6543
PM_NODE_ALIGNAS struct pm_node * left
OrNode::left.
Definition ast.h:6530
ParametersNode.
Definition ast.h:6569
PM_NODE_ALIGNAS struct pm_node * block
ParametersNode::block.
Definition ast.h:6606
PM_NODE_ALIGNAS struct pm_node * rest
ParametersNode::rest.
Definition ast.h:6586
PM_NODE_ALIGNAS struct pm_node * keyword_rest
ParametersNode::keyword_rest.
Definition ast.h:6601
ParenthesesNode.
Definition ast.h:6624
PM_NODE_ALIGNAS struct pm_node * body
ParenthesesNode::body.
Definition ast.h:6631
RangeNode.
Definition ast.h:6854
PM_NODE_ALIGNAS struct pm_node * right
RangeNode::right.
Definition ast.h:6883
PM_NODE_ALIGNAS struct pm_node * left
RangeNode::left.
Definition ast.h:6869
RationalNode.
Definition ast.h:6911
pm_node_t base
The embedded base node.
Definition ast.h:6913
pm_integer_t numerator
RationalNode::numerator.
Definition ast.h:6922
In order to properly set a regular expression's encoding and to validate the byte sequence for the un...
Definition prism.c:9735
pm_buffer_t regexp_buffer
The buffer holding the regexp source.
Definition prism.c:9740
pm_token_buffer_t base
The embedded base buffer.
Definition prism.c:9737
RegularExpressionNode.
Definition ast.h:6976
RequiredParameterNode.
Definition ast.h:7048
RescueModifierNode.
Definition ast.h:7070
PM_NODE_ALIGNAS struct pm_node * rescue_expression
RescueModifierNode::rescue_expression.
Definition ast.h:7087
RescueNode.
Definition ast.h:7107
PM_NODE_ALIGNAS struct pm_rescue_node * subsequent
RescueNode::subsequent.
Definition ast.h:7144
pm_location_t then_keyword_loc
RescueNode::then_keyword_loc.
Definition ast.h:7134
SplatNode.
Definition ast.h:7397
PM_NODE_ALIGNAS struct pm_node * expression
SplatNode::expression.
Definition ast.h:7409
StatementsNode.
Definition ast.h:7424
struct pm_node_list body
StatementsNode::body.
Definition ast.h:7431
pm_node_t base
The embedded base node.
Definition ast.h:7426
StringNode.
Definition ast.h:7458
pm_node_t base
The embedded base node.
Definition ast.h:7460
pm_string_t unescaped
StringNode::unescaped.
Definition ast.h:7480
pm_location_t content_loc
StringNode::content_loc.
Definition ast.h:7470
pm_location_t closing_loc
StringNode::closing_loc.
Definition ast.h:7475
pm_location_t opening_loc
StringNode::opening_loc.
Definition ast.h:7465
A generic string type that can have various ownership semantics.
Definition stringy.h:18
const uint8_t * source
A pointer to the start of the string.
Definition stringy.h:20
size_t length
The length of the string in bytes of memory.
Definition stringy.h:23
enum pm_string_t::@116 type
The type of the string.
SuperNode.
Definition ast.h:7500
PM_NODE_ALIGNAS struct pm_arguments_node * arguments
SuperNode::arguments.
Definition ast.h:7519
pm_location_t lparen_loc
SuperNode::lparen_loc.
Definition ast.h:7512
PM_NODE_ALIGNAS struct pm_node * block
SuperNode::block.
Definition ast.h:7529
SymbolNode.
Definition ast.h:7552
pm_location_t value_loc
SymbolNode::value_loc.
Definition ast.h:7564
pm_string_t unescaped
SymbolNode::unescaped.
Definition ast.h:7574
When we're lexing certain types (strings, symbols, lists, etc.) we have string content associated wit...
Definition prism.c:9709
pm_buffer_t buffer
The buffer that we're using to keep track of the string content.
Definition prism.c:9714
const uint8_t * cursor
The cursor into the source string that points to how far we have currently copied into the buffer.
Definition prism.c:9720
This struct represents a token in the Ruby source.
Definition ast.h:526
const uint8_t * end
A pointer to the end location of the token in the source.
Definition ast.h:534
const uint8_t * start
A pointer to the start location of the token in the source.
Definition ast.h:531
pm_token_type_t type
The type of the token.
Definition ast.h:528
UndefNode.
Definition ast.h:7606
UnlessNode.
Definition ast.h:7636
PM_NODE_ALIGNAS struct pm_statements_node * statements
UnlessNode::statements.
Definition ast.h:7685
PM_NODE_ALIGNAS struct pm_else_node * else_clause
UnlessNode::else_clause.
Definition ast.h:7695
WhenNode.
Definition ast.h:7770
PM_NODE_ALIGNAS struct pm_statements_node * statements
WhenNode::statements.
Definition ast.h:7792
XStringNode.
Definition ast.h:7859
YieldNode.
Definition ast.h:7896
pm_location_t lparen_loc
YieldNode::lparen_loc.
Definition ast.h:7908
PM_NODE_ALIGNAS struct pm_arguments_node * arguments
YieldNode::arguments.
Definition ast.h:7913
#define PRISM_UNUSED
GCC will warn if you specify a function or parameter that is unused at runtime.
Definition unused.h:13